Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Utils/LoopUtils.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file defines common loop utility functions.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Transforms/Utils/LoopUtils.h"
15
#include "llvm/ADT/ScopeExit.h"
16
#include "llvm/Analysis/AliasAnalysis.h"
17
#include "llvm/Analysis/BasicAliasAnalysis.h"
18
#include "llvm/Analysis/GlobalsModRef.h"
19
#include "llvm/Analysis/LoopInfo.h"
20
#include "llvm/Analysis/LoopPass.h"
21
#include "llvm/Analysis/ScalarEvolution.h"
22
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
23
#include "llvm/Analysis/ScalarEvolutionExpander.h"
24
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
25
#include "llvm/Analysis/TargetTransformInfo.h"
26
#include "llvm/IR/Dominators.h"
27
#include "llvm/IR/Instructions.h"
28
#include "llvm/IR/Module.h"
29
#include "llvm/IR/PatternMatch.h"
30
#include "llvm/IR/ValueHandle.h"
31
#include "llvm/Pass.h"
32
#include "llvm/Support/Debug.h"
33
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
34
35
using namespace llvm;
36
using namespace llvm::PatternMatch;
37
38
#define DEBUG_TYPE "loop-utils"
39
40
bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
41
121
                                        SmallPtrSetImpl<Instruction *> &Set) {
42
315
  for (User::op_iterator Use = I->op_begin(), E = I->op_end(); 
Use != E315
;
++Use194
)
43
223
    
if (223
!Set.count(dyn_cast<Instruction>(*Use))223
)
44
29
      return false;
45
92
  return true;
46
121
}
47
48
1.52M
bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
49
1.52M
  switch (Kind) {
50
501k
  default:
51
501k
    break;
52
1.02M
  case RK_IntegerAdd:
53
1.02M
  case RK_IntegerMult:
54
1.02M
  case RK_IntegerOr:
55
1.02M
  case RK_IntegerAnd:
56
1.02M
  case RK_IntegerXor:
57
1.02M
  case RK_IntegerMinMax:
58
1.02M
    return true;
59
501k
  }
60
501k
  return false;
61
501k
}
62
63
77.0k
bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
64
77.0k
  return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
65
77.0k
}
66
67
967k
bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
68
967k
  switch (Kind) {
69
637k
  default:
70
637k
    break;
71
329k
  case RK_IntegerAdd:
72
329k
  case RK_IntegerMult:
73
329k
  case RK_FloatAdd:
74
329k
  case RK_FloatMult:
75
329k
    return true;
76
637k
  }
77
637k
  return false;
78
637k
}
79
80
Instruction *
81
RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
82
                                     SmallPtrSetImpl<Instruction *> &Visited,
83
329k
                                     SmallPtrSetImpl<Instruction *> &CI) {
84
329k
  if (!Phi->hasOneUse())
85
239k
    return Phi;
86
89.9k
87
89.9k
  const APInt *M = nullptr;
88
89.9k
  Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
89
89.9k
90
89.9k
  // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
91
89.9k
  // with a new integer type of the corresponding bit width.
92
89.9k
  if (
match(J, m_c_And(m_Instruction(I), m_APInt(M)))89.9k
) {
93
147
    int32_t Bits = (*M + 1).exactLogBase2();
94
147
    if (
Bits > 0147
) {
95
117
      RT = IntegerType::get(Phi->getContext(), Bits);
96
117
      Visited.insert(Phi);
97
117
      CI.insert(J);
98
117
      return J;
99
117
    }
100
89.8k
  }
101
89.8k
  return Phi;
102
89.8k
}
103
104
bool RecurrenceDescriptor::getSourceExtensionKind(
105
    Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
106
    SmallPtrSetImpl<Instruction *> &Visited,
107
5
    SmallPtrSetImpl<Instruction *> &CI) {
108
5
109
5
  SmallVector<Instruction *, 8> Worklist;
110
5
  bool FoundOneOperand = false;
111
5
  unsigned DstSize = RT->getPrimitiveSizeInBits();
112
5
  Worklist.push_back(Exit);
113
5
114
5
  // Traverse the instructions in the reduction expression, beginning with the
115
5
  // exit value.
116
11
  while (
!Worklist.empty()11
) {
117
8
    Instruction *I = Worklist.pop_back_val();
118
14
    for (Use &U : I->operands()) {
119
14
120
14
      // Terminate the traversal if the operand is not an instruction, or we
121
14
      // reach the starting value.
122
14
      Instruction *J = dyn_cast<Instruction>(U.get());
123
14
      if (
!J || 14
J == Start14
)
124
3
        continue;
125
11
126
11
      // Otherwise, investigate the operation if it is also in the expression.
127
11
      
if (11
Visited.count(J)11
) {
128
3
        Worklist.push_back(J);
129
3
        continue;
130
3
      }
131
8
132
8
      // If the operand is not in Visited, it is not a reduction operation, but
133
8
      // it does feed into one. Make sure it is either a single-use sign- or
134
8
      // zero-extend instruction.
135
8
      CastInst *Cast = dyn_cast<CastInst>(J);
136
8
      bool IsSExtInst = isa<SExtInst>(J);
137
8
      if (
!Cast || 8
!Cast->hasOneUse()6
||
!(isa<ZExtInst>(J) || 6
IsSExtInst0
))
138
2
        return false;
139
6
140
6
      // Ensure the source type of the extend is no larger than the reduction
141
6
      // type. It is not necessary for the types to be identical.
142
6
      unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
143
6
      if (SrcSize > DstSize)
144
0
        return false;
145
6
146
6
      // Furthermore, ensure that all such extends are of the same kind.
147
6
      
if (6
FoundOneOperand6
) {
148
3
        if (IsSigned != IsSExtInst)
149
0
          return false;
150
3
      } else {
151
3
        FoundOneOperand = true;
152
3
        IsSigned = IsSExtInst;
153
3
      }
154
6
155
6
      // Lastly, if the source type of the extend matches the reduction type,
156
6
      // add the extend to CI so that we can avoid accounting for it in the
157
6
      // cost model.
158
6
      
if (6
SrcSize == DstSize6
)
159
4
        CI.insert(Cast);
160
14
    }
161
8
  }
162
3
  return true;
163
5
}
164
165
bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
166
                                           Loop *TheLoop, bool HasFunNoNaNAttr,
167
1.52M
                                           RecurrenceDescriptor &RedDes) {
168
1.52M
  if (Phi->getNumIncomingValues() != 2)
169
0
    return false;
170
1.52M
171
1.52M
  // Reduction variables are only found in the loop header block.
172
1.52M
  
if (1.52M
Phi->getParent() != TheLoop->getHeader()1.52M
)
173
0
    return false;
174
1.52M
175
1.52M
  // Obtain the reduction start value from the value that comes from the loop
176
1.52M
  // preheader.
177
1.52M
  Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
178
1.52M
179
1.52M
  // ExitInstruction is the single value which is used outside the loop.
180
1.52M
  // We only allow for a single reduction value to be used outside the loop.
181
1.52M
  // This includes users of the reduction, variables (which form a cycle
182
1.52M
  // which ends in the phi node).
183
1.52M
  Instruction *ExitInstruction = nullptr;
184
1.52M
  // Indicates that we found a reduction operation in our scan.
185
1.52M
  bool FoundReduxOp = false;
186
1.52M
187
1.52M
  // We start with the PHI node and scan for all of the users of this
188
1.52M
  // instruction. All users must be instructions that can be used as reduction
189
1.52M
  // variables (such as ADD). We must have a single out-of-block user. The cycle
190
1.52M
  // must include the original PHI.
191
1.52M
  bool FoundStartPHI = false;
192
1.52M
193
1.52M
  // To recognize min/max patterns formed by a icmp select sequence, we store
194
1.52M
  // the number of instruction we saw from the recognized min/max pattern,
195
1.52M
  //  to make sure we only see exactly the two instructions.
196
1.52M
  unsigned NumCmpSelectPatternInst = 0;
197
1.52M
  InstDesc ReduxDesc(false, nullptr);
198
1.52M
199
1.52M
  // Data used for determining if the recurrence has been type-promoted.
200
1.52M
  Type *RecurrenceType = Phi->getType();
201
1.52M
  SmallPtrSet<Instruction *, 4> CastInsts;
202
1.52M
  Instruction *Start = Phi;
203
1.52M
  bool IsSigned = false;
204
1.52M
205
1.52M
  SmallPtrSet<Instruction *, 8> VisitedInsts;
206
1.52M
  SmallVector<Instruction *, 8> Worklist;
207
1.52M
208
1.52M
  // Return early if the recurrence kind does not match the type of Phi. If the
209
1.52M
  // recurrence kind is arithmetic, we attempt to look through AND operations
210
1.52M
  // resulting from the type promotion performed by InstCombine.  Vector
211
1.52M
  // operations are not limited to the legal integer widths, so we may be able
212
1.52M
  // to evaluate the reduction in the narrower width.
213
1.52M
  if (
RecurrenceType->isFloatingPointTy()1.52M
) {
214
77.0k
    if (!isFloatingPointRecurrenceKind(Kind))
215
53.0k
      return false;
216
1.44M
  } else {
217
1.44M
    if (!isIntegerRecurrenceKind(Kind))
218
477k
      return false;
219
967k
    
if (967k
isArithmeticRecurrenceKind(Kind)967k
)
220
329k
      Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
221
1.44M
  }
222
1.52M
223
991k
  Worklist.push_back(Start);
224
991k
  VisitedInsts.insert(Start);
225
991k
226
991k
  // A value in the reduction can be used:
227
991k
  //  - By the reduction:
228
991k
  //      - Reduction operation:
229
991k
  //        - One use of reduction value (safe).
230
991k
  //        - Multiple use of reduction value (not safe).
231
991k
  //      - PHI:
232
991k
  //        - All uses of the PHI must be the reduction (safe).
233
991k
  //        - Otherwise, not safe.
234
991k
  //  - By instructions outside of the loop (safe).
235
991k
  //      * One value may have several outside users, but all outside
236
991k
  //        uses must be of the same value.
237
991k
  //  - By an instruction that is not part of the reduction (not safe).
238
991k
  //    This is either:
239
991k
  //      * An instruction type other than PHI or the reduction operation.
240
991k
  //      * A PHI in the header other than the initial PHI.
241
1.98M
  while (
!Worklist.empty()1.98M
) {
242
1.97M
    Instruction *Cur = Worklist.back();
243
1.97M
    Worklist.pop_back();
244
1.97M
245
1.97M
    // No Users.
246
1.97M
    // If the instruction has no users then this is a broken chain and can't be
247
1.97M
    // a reduction variable.
248
1.97M
    if (Cur->use_empty())
249
9.18k
      return false;
250
1.96M
251
1.96M
    bool IsAPhi = isa<PHINode>(Cur);
252
1.96M
253
1.96M
    // A header PHI use other than the original PHI.
254
1.96M
    if (
Cur != Phi && 1.96M
IsAPhi972k
&&
Cur->getParent() == Phi->getParent()130
)
255
9
      return false;
256
1.96M
257
1.96M
    // Reductions of instructions such as Div, and Sub is only possible if the
258
1.96M
    // LHS is the reduction variable.
259
1.96M
    
if (1.96M
!Cur->isCommutative() && 1.96M
!IsAPhi1.65M
&&
!isa<SelectInst>(Cur)666k
&&
260
1.96M
        
!isa<ICmpInst>(Cur)645k
&&
!isa<FCmpInst>(Cur)622k
&&
261
619k
        !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
262
323k
      return false;
263
1.64M
264
1.64M
    // Any reduction instruction must be of one of the allowed kinds. We ignore
265
1.64M
    // the starting value (the Phi or an AND instruction if the Phi has been
266
1.64M
    // type-promoted).
267
1.64M
    
if (1.64M
Cur != Start1.64M
) {
268
648k
      ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
269
648k
      if (!ReduxDesc.isRecurrence())
270
582k
        return false;
271
1.05M
    }
272
1.05M
273
1.05M
    // A reduction operation must only have one use of the reduction value.
274
1.05M
    
if (1.05M
!IsAPhi && 1.05M
Kind != RK_IntegerMinMax66.7k
&&
Kind != RK_FloatMinMax64.9k
&&
275
64.8k
        hasMultipleUsesOf(Cur, VisitedInsts))
276
36
      return false;
277
1.05M
278
1.05M
    // All inputs to a PHI node must be a reduction value.
279
1.05M
    
if (1.05M
IsAPhi && 1.05M
Cur != Phi991k
&&
!areAllUsesIn(Cur, VisitedInsts)121
)
280
29
      return false;
281
1.05M
282
1.05M
    
if (1.05M
Kind == RK_IntegerMinMax &&
283
161k
        
(isa<ICmpInst>(Cur) || 161k
isa<SelectInst>(Cur)160k
))
284
1.86k
      ++NumCmpSelectPatternInst;
285
1.05M
    if (
Kind == RK_FloatMinMax && 1.05M
(isa<FCmpInst>(Cur) || 6.33k
isa<SelectInst>(Cur)6.31k
))
286
34
      ++NumCmpSelectPatternInst;
287
1.05M
288
1.05M
    // Check  whether we found a reduction operator.
289
66.7k
    FoundReduxOp |= !IsAPhi && Cur != Start;
290
1.05M
291
1.05M
    // Process users of current instruction. Push non-PHI nodes after PHI nodes
292
1.05M
    // onto the stack. This way we are going to have seen all inputs to PHI
293
1.05M
    // nodes once we get to them.
294
1.05M
    SmallVector<Instruction *, 8> NonPHIs;
295
1.05M
    SmallVector<Instruction *, 8> PHIs;
296
2.42M
    for (User *U : Cur->users()) {
297
2.42M
      Instruction *UI = cast<Instruction>(U);
298
2.42M
299
2.42M
      // Check if we found the exit user.
300
2.42M
      BasicBlock *Parent = UI->getParent();
301
2.42M
      if (
!TheLoop->contains(Parent)2.42M
) {
302
77.4k
        // If we already know this instruction is used externally, move on to
303
77.4k
        // the next user.
304
77.4k
        if (ExitInstruction == Cur)
305
4
          continue;
306
77.4k
307
77.4k
        // Exit if you find multiple values used outside or if the header phi
308
77.4k
        // node is being used. In this case the user uses the value of the
309
77.4k
        // previous iteration, in which case we would loose "VF-1" iterations of
310
77.4k
        // the reduction operation if we vectorize.
311
77.4k
        
if (77.4k
ExitInstruction != nullptr || 77.4k
Cur == Phi77.4k
)
312
59.9k
          return false;
313
17.4k
314
17.4k
        // The instruction used by an outside user must be the last instruction
315
17.4k
        // before we feed back to the reduction phi. Otherwise, we loose VF-1
316
17.4k
        // operations on the value.
317
17.4k
        
if (17.4k
!is_contained(Phi->operands(), Cur)17.4k
)
318
94
          return false;
319
17.3k
320
17.3k
        ExitInstruction = Cur;
321
17.3k
        continue;
322
17.3k
      }
323
2.34M
324
2.34M
      // Process instructions only once (termination). Each reduction cycle
325
2.34M
      // value must only be used once, except by phi nodes and min/max
326
2.34M
      // reductions which are represented as a cmp followed by a select.
327
2.34M
      InstDesc IgnoredVal(false, nullptr);
328
2.34M
      if (
VisitedInsts.insert(UI).second2.34M
) {
329
2.28M
        if (isa<PHINode>(UI))
330
1.01k
          PHIs.push_back(UI);
331
2.28M
        else
332
2.28M
          NonPHIs.push_back(UI);
333
2.34M
      } else 
if (57.2k
!isa<PHINode>(UI) &&
334
3.81k
                 
((!isa<FCmpInst>(UI) && 3.81k
!isa<ICmpInst>(UI)3.81k
&&
335
3.81k
                   !isa<SelectInst>(UI)) ||
336
985
                  !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))
337
2.88k
        return false;
338
2.34M
339
2.34M
      // Remember that we completed the cycle.
340
2.34M
      
if (2.34M
UI == Phi2.34M
)
341
52.5k
        FoundStartPHI = true;
342
2.42M
    }
343
995k
    Worklist.append(PHIs.begin(), PHIs.end());
344
995k
    Worklist.append(NonPHIs.begin(), NonPHIs.end());
345
995k
  }
346
991k
347
991k
  // This means we have seen one but not the other instruction of the
348
991k
  // pattern or more than just a select and cmp.
349
13.3k
  
if (13.3k
(Kind == RK_IntegerMinMax || 13.3k
Kind == RK_FloatMinMax13.2k
) &&
350
179
      NumCmpSelectPatternInst != 2)
351
18
    return false;
352
13.3k
353
13.3k
  
if (13.3k
!FoundStartPHI || 13.3k
!FoundReduxOp13.3k
||
!ExitInstruction13.3k
)
354
2
    return false;
355
13.3k
356
13.3k
  // If we think Phi may have been type-promoted, we also need to ensure that
357
13.3k
  // all source operands of the reduction are either SExtInsts or ZEstInsts. If
358
13.3k
  // so, we will be able to evaluate the reduction in the narrower bit width.
359
13.3k
  
if (13.3k
Start != Phi13.3k
)
360
5
    
if (5
!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
361
5
                                IsSigned, VisitedInsts, CastInsts))
362
2
      return false;
363
13.3k
364
13.3k
  // We found a reduction var if we have reached the original phi node and we
365
13.3k
  // only have a single instruction with out-of-loop users.
366
13.3k
367
13.3k
  // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
368
13.3k
  // is saved as part of the RecurrenceDescriptor.
369
13.3k
370
13.3k
  // Save the description of this reduction variable.
371
13.3k
  RecurrenceDescriptor RD(
372
13.3k
      RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
373
13.3k
      ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
374
13.3k
  RedDes = RD;
375
13.3k
376
13.3k
  return true;
377
13.3k
}
378
379
/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
380
/// pattern corresponding to a min(X, Y) or max(X, Y).
381
RecurrenceDescriptor::InstDesc
382
6.26k
RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
383
6.26k
384
6.26k
  assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
385
6.26k
         "Expect a select instruction");
386
6.26k
  Instruction *Cmp = nullptr;
387
6.26k
  SelectInst *Select = nullptr;
388
6.26k
389
6.26k
  // We must handle the select(cmp()) as a single instruction. Advance to the
390
6.26k
  // select.
391
6.26k
  if (
(Cmp = dyn_cast<ICmpInst>(I)) || 6.26k
(Cmp = dyn_cast<FCmpInst>(I))5.08k
) {
392
1.20k
    if (
!Cmp->hasOneUse() || 1.20k
!(Select = dyn_cast<SelectInst>(*I->user_begin()))1.16k
)
393
238
      return InstDesc(false, I);
394
965
    return InstDesc(Select, Prev.getMinMaxKind());
395
965
  }
396
5.06k
397
5.06k
  // Only handle single use cases for now.
398
5.06k
  
if (5.06k
!(Select = dyn_cast<SelectInst>(I))5.06k
)
399
0
    return InstDesc(false, I);
400
5.06k
  
if (5.06k
!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
401
142
      !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
402
18
    return InstDesc(false, I);
403
5.04k
  
if (5.04k
!Cmp->hasOneUse()5.04k
)
404
152
    return InstDesc(false, I);
405
4.89k
406
4.89k
  Value *CmpLeft;
407
4.89k
  Value *CmpRight;
408
4.89k
409
4.89k
  // Look for a min/max pattern.
410
4.89k
  if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
411
1.39k
    return InstDesc(Select, MRK_UIntMin);
412
3.50k
  else 
if (3.50k
m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.50k
)
413
58
    return InstDesc(Select, MRK_UIntMax);
414
3.44k
  else 
if (3.44k
m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.44k
)
415
242
    return InstDesc(Select, MRK_SIntMax);
416
3.20k
  else 
if (3.20k
m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.20k
)
417
136
    return InstDesc(Select, MRK_SIntMin);
418
3.06k
  else 
if (3.06k
m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.06k
)
419
10
    return InstDesc(Select, MRK_FloatMin);
420
3.05k
  else 
if (3.05k
m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.05k
)
421
8
    return InstDesc(Select, MRK_FloatMax);
422
3.04k
  else 
if (3.04k
m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.04k
)
423
8
    return InstDesc(Select, MRK_FloatMin);
424
3.03k
  else 
if (3.03k
m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)3.03k
)
425
8
    return InstDesc(Select, MRK_FloatMax);
426
3.03k
427
3.03k
  return InstDesc(false, I);
428
3.03k
}
429
430
RecurrenceDescriptor::InstDesc
431
RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
432
648k
                                        InstDesc &Prev, bool HasFunNoNaNAttr) {
433
648k
  bool FP = I->getType()->isFloatingPointTy();
434
648k
  Instruction *UAI = Prev.getUnsafeAlgebraInst();
435
648k
  if (
!UAI && 648k
FP645k
&&
!I->hasUnsafeAlgebra()24.2k
)
436
24.1k
    UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
437
648k
438
648k
  switch (I->getOpcode()) {
439
295k
  default:
440
295k
    return InstDesc(false, I);
441
121
  case Instruction::PHI:
442
121
    return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
443
236k
  case Instruction::Sub:
444
236k
  case Instruction::Add:
445
236k
    return InstDesc(Kind == RK_IntegerAdd, I);
446
18.9k
  case Instruction::Mul:
447
18.9k
    return InstDesc(Kind == RK_IntegerMult, I);
448
17.7k
  case Instruction::And:
449
17.7k
    return InstDesc(Kind == RK_IntegerAnd, I);
450
17.9k
  case Instruction::Or:
451
17.9k
    return InstDesc(Kind == RK_IntegerOr, I);
452
610
  case Instruction::Xor:
453
610
    return InstDesc(Kind == RK_IntegerXor, I);
454
8.84k
  case Instruction::FMul:
455
8.84k
    return InstDesc(Kind == RK_FloatMult, I, UAI);
456
6.73k
  case Instruction::FSub:
457
6.73k
  case Instruction::FAdd:
458
6.73k
    return InstDesc(Kind == RK_FloatAdd, I, UAI);
459
46.4k
  case Instruction::FCmp:
460
46.4k
  case Instruction::ICmp:
461
46.4k
  case Instruction::Select:
462
46.4k
    if (Kind != RK_IntegerMinMax &&
463
41.2k
        
(!HasFunNoNaNAttr || 41.2k
Kind != RK_FloatMinMax68
))
464
41.2k
      return InstDesc(false, I);
465
5.28k
    return isMinMaxSelectCmpPattern(I, Prev);
466
648k
  }
467
648k
}
468
469
bool RecurrenceDescriptor::hasMultipleUsesOf(
470
64.8k
    Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) {
471
64.8k
  unsigned NumUses = 0;
472
194k
  for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
473
129k
       
++Use129k
) {
474
129k
    if (Insts.count(dyn_cast<Instruction>(*Use)))
475
64.9k
      ++NumUses;
476
129k
    if (NumUses > 1)
477
36
      return true;
478
129k
  }
479
64.8k
480
64.8k
  return false;
481
64.8k
}
482
bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
483
178k
                                          RecurrenceDescriptor &RedDes) {
484
178k
485
178k
  BasicBlock *Header = TheLoop->getHeader();
486
178k
  Function &F = *Header->getParent();
487
178k
  bool HasFunNoNaNAttr =
488
178k
      F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
489
178k
490
178k
  if (
AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)178k
) {
491
10.4k
    DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
492
10.4k
    return true;
493
10.4k
  }
494
168k
  
if (168k
AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)168k
) {
495
47
    DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
496
47
    return true;
497
47
  }
498
168k
  
if (168k
AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)168k
) {
499
149
    DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
500
149
    return true;
501
149
  }
502
168k
  
if (168k
AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)168k
) {
503
37
    DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
504
37
    return true;
505
37
  }
506
168k
  
if (168k
AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)168k
) {
507
12
    DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
508
12
    return true;
509
12
  }
510
168k
  
if (168k
AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr,
511
168k
                      RedDes)) {
512
144
    DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
513
144
    return true;
514
144
  }
515
168k
  
if (168k
AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)168k
) {
516
61
    DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
517
61
    return true;
518
61
  }
519
168k
  
if (168k
AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)168k
) {
520
2.48k
    DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
521
2.48k
    return true;
522
2.48k
  }
523
165k
  
if (165k
AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)165k
) {
524
17
    DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n");
525
17
    return true;
526
17
  }
527
165k
  // Not a reduction of known type.
528
165k
  return false;
529
165k
}
530
531
bool RecurrenceDescriptor::isFirstOrderRecurrence(
532
    PHINode *Phi, Loop *TheLoop,
533
57.4k
    DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
534
57.4k
535
57.4k
  // Ensure the phi node is in the loop header and has two incoming values.
536
57.4k
  if (Phi->getParent() != TheLoop->getHeader() ||
537
57.4k
      Phi->getNumIncomingValues() != 2)
538
0
    return false;
539
57.4k
540
57.4k
  // Ensure the loop has a preheader and a single latch block. The loop
541
57.4k
  // vectorizer will need the latch to set up the next iteration of the loop.
542
57.4k
  auto *Preheader = TheLoop->getLoopPreheader();
543
57.4k
  auto *Latch = TheLoop->getLoopLatch();
544
57.4k
  if (
!Preheader || 57.4k
!Latch57.4k
)
545
0
    return false;
546
57.4k
547
57.4k
  // Ensure the phi node's incoming blocks are the loop preheader and latch.
548
57.4k
  
if (57.4k
Phi->getBasicBlockIndex(Preheader) < 0 ||
549
57.4k
      Phi->getBasicBlockIndex(Latch) < 0)
550
0
    return false;
551
57.4k
552
57.4k
  // Get the previous value. The previous value comes from the latch edge while
553
57.4k
  // the initial value comes form the preheader edge.
554
57.4k
  auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
555
57.4k
  if (
!Previous || 57.4k
!TheLoop->contains(Previous)57.4k
||
isa<PHINode>(Previous)57.4k
||
556
57.3k
      SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
557
174
    return false;
558
57.3k
559
57.3k
  // Ensure every user of the phi node is dominated by the previous value.
560
57.3k
  // The dominance requirement ensures the loop vectorizer will not need to
561
57.3k
  // vectorize the initial value prior to the first iteration of the loop.
562
57.3k
  // TODO: Consider extending this sinking to handle other kinds of instructions
563
57.3k
  // and expressions, beyond sinking a single cast past Previous.
564
57.3k
  
if (57.3k
Phi->hasOneUse()57.3k
) {
565
35.6k
    auto *I = Phi->user_back();
566
35.6k
    if (
I->isCast() && 35.6k
(I->getParent() == Phi->getParent())724
&&
I->hasOneUse()718
&&
567
35.6k
        
DT->dominates(Previous, I->user_back())596
) {
568
42
      if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
569
24
        SinkAfter[I] = Previous;
570
42
      return true;
571
42
    }
572
57.2k
  }
573
57.2k
574
57.2k
  for (User *U : Phi->users())
575
76.3k
    
if (auto *76.3k
I76.3k
= dyn_cast<Instruction>(U)) {
576
76.3k
      if (!DT->dominates(Previous, I))
577
57.0k
        return false;
578
185
    }
579
185
580
185
  return true;
581
185
}
582
583
/// This function returns the identity element (or neutral element) for
584
/// the operation K.
585
Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
586
1.40k
                                                      Type *Tp) {
587
1.40k
  switch (K) {
588
1.31k
  case RK_IntegerXor:
589
1.31k
  case RK_IntegerAdd:
590
1.31k
  case RK_IntegerOr:
591
1.31k
    // Adding, Xoring, Oring zero to a number does not change it.
592
1.31k
    return ConstantInt::get(Tp, 0);
593
47
  case RK_IntegerMult:
594
47
    // Multiplying a number by 1 does not change it.
595
47
    return ConstantInt::get(Tp, 1);
596
27
  case RK_IntegerAnd:
597
27
    // AND-ing a number with an all-1 value does not change it.
598
27
    return ConstantInt::get(Tp, -1, true);
599
1
  case RK_FloatMult:
600
1
    // Multiplying a number by 1 does not change it.
601
1
    return ConstantFP::get(Tp, 1.0L);
602
18
  case RK_FloatAdd:
603
18
    // Adding zero to a number does not change it.
604
18
    return ConstantFP::get(Tp, 0.0L);
605
0
  default:
606
0
    llvm_unreachable("Unknown recurrence kind");
607
0
  }
608
0
}
609
610
/// This function translates the recurrence kind to an LLVM binary operator.
611
1.51k
unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
612
1.51k
  switch (Kind) {
613
1.22k
  case RK_IntegerAdd:
614
1.22k
    return Instruction::Add;
615
47
  case RK_IntegerMult:
616
47
    return Instruction::Mul;
617
85
  case RK_IntegerOr:
618
85
    return Instruction::Or;
619
27
  case RK_IntegerAnd:
620
27
    return Instruction::And;
621
10
  case RK_IntegerXor:
622
10
    return Instruction::Xor;
623
1
  case RK_FloatMult:
624
1
    return Instruction::FMul;
625
18
  case RK_FloatAdd:
626
18
    return Instruction::FAdd;
627
84
  case RK_IntegerMinMax:
628
84
    return Instruction::ICmp;
629
17
  case RK_FloatMinMax:
630
17
    return Instruction::FCmp;
631
0
  default:
632
0
    llvm_unreachable("Unknown recurrence operation");
633
0
  }
634
0
}
635
636
Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
637
                                            MinMaxRecurrenceKind RK,
638
230
                                            Value *Left, Value *Right) {
639
230
  CmpInst::Predicate P = CmpInst::ICMP_NE;
640
230
  switch (RK) {
641
0
  default:
642
0
    llvm_unreachable("Unknown min/max recurrence kind");
643
8
  case MRK_UIntMin:
644
8
    P = CmpInst::ICMP_ULT;
645
8
    break;
646
24
  case MRK_UIntMax:
647
24
    P = CmpInst::ICMP_UGT;
648
24
    break;
649
43
  case MRK_SIntMin:
650
43
    P = CmpInst::ICMP_SLT;
651
43
    break;
652
96
  case MRK_SIntMax:
653
96
    P = CmpInst::ICMP_SGT;
654
96
    break;
655
10
  case MRK_FloatMin:
656
10
    P = CmpInst::FCMP_OLT;
657
10
    break;
658
49
  case MRK_FloatMax:
659
49
    P = CmpInst::FCMP_OGT;
660
49
    break;
661
230
  }
662
230
663
230
  // We only match FP sequences with unsafe algebra, so we can unconditionally
664
230
  // set it on any generated instructions.
665
230
  IRBuilder<>::FastMathFlagGuard FMFG(Builder);
666
230
  FastMathFlags FMF;
667
230
  FMF.setUnsafeAlgebra();
668
230
  Builder.setFastMathFlags(FMF);
669
230
670
230
  Value *Cmp;
671
230
  if (
RK == MRK_FloatMin || 230
RK == MRK_FloatMax220
)
672
59
    Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
673
230
  else
674
171
    Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
675
230
676
230
  Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
677
230
  return Select;
678
230
}
679
680
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
681
                                         const SCEV *Step, BinaryOperator *BOp)
682
108k
  : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
683
108k
  assert(IK != IK_NoInduction && "Not an induction");
684
108k
685
108k
  // Start value type should match the induction kind and the value
686
108k
  // itself should not be null.
687
108k
  assert(StartValue && "StartValue is null");
688
108k
  assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
689
108k
         "StartValue is not a pointer for pointer induction");
690
108k
  assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
691
108k
         "StartValue is not an integer for integer induction");
692
108k
693
108k
  // Check the Step Value. It should be non-zero integer value.
694
108k
  assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
695
108k
         "Step value is zero");
696
108k
697
108k
  assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
698
108k
         "Step value should be constant for pointer induction");
699
108k
  assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
700
108k
         "StepValue is not an integer");
701
108k
702
108k
  assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
703
108k
         "StepValue is not FP for FpInduction");
704
108k
  assert((IK != IK_FpInduction || (InductionBinOp &&
705
108k
          (InductionBinOp->getOpcode() == Instruction::FAdd ||
706
108k
           InductionBinOp->getOpcode() == Instruction::FSub))) &&
707
108k
         "Binary opcode should be specified for FP induction");
708
108k
}
709
710
0
int InductionDescriptor::getConsecutiveDirection() const {
711
0
  ConstantInt *ConstStep = getConstIntStepValue();
712
0
  if (
ConstStep && 0
(ConstStep->isOne() || 0
ConstStep->isMinusOne()0
))
713
0
    return ConstStep->getSExtValue();
714
0
  return 0;
715
0
}
716
717
221k
ConstantInt *InductionDescriptor::getConstIntStepValue() const {
718
221k
  if (isa<SCEVConstant>(Step))
719
219k
    return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
720
2.05k
  return nullptr;
721
2.05k
}
722
723
Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
724
                                      ScalarEvolution *SE,
725
22.8k
                                      const DataLayout& DL) const {
726
22.8k
727
22.8k
  SCEVExpander Exp(*SE, DL, "induction");
728
22.8k
  assert(Index->getType() == Step->getType() &&
729
22.8k
         "Index type does not match StepValue type");
730
22.8k
  switch (IK) {
731
11.3k
  case IK_IntInduction: {
732
11.3k
    assert(Index->getType() == StartValue->getType() &&
733
11.3k
           "Index type does not match StartValue type");
734
11.3k
735
11.3k
    // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
736
11.3k
    // and calculate (Start + Index * Step) for all cases, without
737
11.3k
    // special handling for "isOne" and "isMinusOne".
738
11.3k
    // But in the real life the result code getting worse. We mix SCEV
739
11.3k
    // expressions and ADD/SUB operations and receive redundant
740
11.3k
    // intermediate values being calculated in different ways and
741
11.3k
    // Instcombine is unable to reduce them all.
742
11.3k
743
11.3k
    if (getConstIntStepValue() &&
744
10.6k
        getConstIntStepValue()->isMinusOne())
745
2.35k
      return B.CreateSub(StartValue, Index);
746
8.96k
    
if (8.96k
getConstIntStepValue() &&
747
8.30k
        getConstIntStepValue()->isOne())
748
4.23k
      return B.CreateAdd(StartValue, Index);
749
4.72k
    const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
750
4.72k
                                   SE->getMulExpr(Step, SE->getSCEV(Index)));
751
4.72k
    return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
752
4.72k
  }
753
11.4k
  case IK_PtrInduction: {
754
11.4k
    assert(isa<SCEVConstant>(Step) &&
755
11.4k
           "Expected constant step for pointer induction");
756
11.4k
    const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
757
11.4k
    Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
758
11.4k
    return B.CreateGEP(nullptr, StartValue, Index);
759
4.72k
  }
760
36
  case IK_FpInduction: {
761
36
    assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
762
36
    assert(InductionBinOp &&
763
36
           (InductionBinOp->getOpcode() == Instruction::FAdd ||
764
36
            InductionBinOp->getOpcode() == Instruction::FSub) &&
765
36
           "Original bin op should be defined for FP induction");
766
36
767
36
    Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
768
36
769
36
    // Floating point operations had to be 'fast' to enable the induction.
770
36
    FastMathFlags Flags;
771
36
    Flags.setUnsafeAlgebra();
772
36
773
36
    Value *MulExp = B.CreateFMul(StepValue, Index);
774
36
    if (isa<Instruction>(MulExp))
775
36
      // We have to check, the MulExp may be a constant.
776
36
      cast<Instruction>(MulExp)->setFastMathFlags(Flags);
777
36
778
36
    Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue,
779
36
                               MulExp, "induction");
780
36
    if (isa<Instruction>(BOp))
781
36
      cast<Instruction>(BOp)->setFastMathFlags(Flags);
782
36
783
36
    return BOp;
784
4.72k
  }
785
0
  case IK_NoInduction:
786
0
    return nullptr;
787
0
  }
788
0
  
llvm_unreachable0
("invalid enum");
789
0
}
790
791
bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
792
                                           ScalarEvolution *SE,
793
12.4k
                                           InductionDescriptor &D) {
794
12.4k
795
12.4k
  // Here we only handle FP induction variables.
796
12.4k
  assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
797
12.4k
798
12.4k
  if (TheLoop->getHeader() != Phi->getParent())
799
0
    return false;
800
12.4k
801
12.4k
  // The loop may have multiple entrances or multiple exits; we can analyze
802
12.4k
  // this phi if it has a unique entry value and a unique backedge value.
803
12.4k
  
if (12.4k
Phi->getNumIncomingValues() != 212.4k
)
804
0
    return false;
805
12.4k
  Value *BEValue = nullptr, *StartValue = nullptr;
806
12.4k
  if (
TheLoop->contains(Phi->getIncomingBlock(0))12.4k
) {
807
11.2k
    BEValue = Phi->getIncomingValue(0);
808
11.2k
    StartValue = Phi->getIncomingValue(1);
809
12.4k
  } else {
810
1.23k
    assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
811
1.23k
           "Unexpected Phi node in the loop"); 
812
1.23k
    BEValue = Phi->getIncomingValue(1);
813
1.23k
    StartValue = Phi->getIncomingValue(0);
814
1.23k
  }
815
12.4k
816
12.4k
  BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
817
12.4k
  if (!BOp)
818
462
    return false;
819
11.9k
820
11.9k
  Value *Addend = nullptr;
821
11.9k
  if (
BOp->getOpcode() == Instruction::FAdd11.9k
) {
822
622
    if (BOp->getOperand(0) == Phi)
823
424
      Addend = BOp->getOperand(1);
824
198
    else 
if (198
BOp->getOperand(1) == Phi198
)
825
74
      Addend = BOp->getOperand(0);
826
11.9k
  } else 
if (11.3k
BOp->getOpcode() == Instruction::FSub11.3k
)
827
88
    
if (88
BOp->getOperand(0) == Phi88
)
828
40
      Addend = BOp->getOperand(1);
829
11.9k
830
11.9k
  if (!Addend)
831
11.4k
    return false;
832
538
833
538
  // The addend should be loop invariant
834
538
  
if (auto *538
I538
= dyn_cast<Instruction>(Addend))
835
472
    
if (472
TheLoop->contains(I)472
)
836
450
      return false;
837
88
838
88
  // FP Step has unknown SCEV
839
88
  const SCEV *Step = SE->getUnknown(Addend);
840
88
  D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
841
88
  return true;
842
88
}
843
844
bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
845
                                         PredicatedScalarEvolution &PSE,
846
                                         InductionDescriptor &D,
847
222k
                                         bool Assume) {
848
222k
  Type *PhiTy = Phi->getType();
849
222k
850
222k
  // Handle integer and pointer inductions variables.
851
222k
  // Now we handle also FP induction but not trying to make a
852
222k
  // recurrent expression from the PHI node in-place.
853
222k
854
222k
  if (
!PhiTy->isIntegerTy() && 222k
!PhiTy->isPointerTy()85.4k
&&
855
222k
      
!PhiTy->isFloatTy()12.4k
&&
!PhiTy->isDoubleTy()12.0k
&&
!PhiTy->isHalfTy()16
)
856
16
    return false;
857
222k
858
222k
  
if (222k
PhiTy->isFloatingPointTy()222k
)
859
12.4k
    return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
860
210k
861
210k
  const SCEV *PhiScev = PSE.getSCEV(Phi);
862
210k
  const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
863
210k
864
210k
  // We need this expression to be an AddRecExpr.
865
210k
  if (
Assume && 210k
!AR51.0k
)
866
50.5k
    AR = PSE.getAsAddRec(Phi);
867
210k
868
210k
  if (
!AR210k
) {
869
101k
    DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
870
101k
    return false;
871
101k
  }
872
108k
873
108k
  return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
874
108k
}
875
876
bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
877
                                         ScalarEvolution *SE,
878
                                         InductionDescriptor &D,
879
109k
                                         const SCEV *Expr) {
880
109k
  Type *PhiTy = Phi->getType();
881
109k
  // We only handle integer and pointer inductions variables.
882
109k
  if (
!PhiTy->isIntegerTy() && 109k
!PhiTy->isPointerTy()17.4k
)
883
0
    return false;
884
109k
885
109k
  // Check that the PHI is consecutive.
886
109k
  
const SCEV *PhiScev = Expr ? 109k
Expr108k
:
SE->getSCEV(Phi)56
;
887
109k
  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
888
109k
889
109k
  if (
!AR109k
) {
890
7
    DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
891
7
    return false;
892
7
  }
893
109k
894
109k
  
if (109k
AR->getLoop() != TheLoop109k
) {
895
0
    // FIXME: We should treat this as a uniform. Unfortunately, we
896
0
    // don't currently know how to handled uniform PHIs.
897
0
    DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
898
0
    return false;    
899
0
  }
900
109k
901
109k
  Value *StartValue =
902
109k
    Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
903
109k
  const SCEV *Step = AR->getStepRecurrence(*SE);
904
109k
  // Calculate the pointer stride and check if it is consecutive.
905
109k
  // The stride may be a constant or a loop invariant integer value.
906
109k
  const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
907
109k
  if (
!ConstStep && 109k
!SE->isLoopInvariant(Step, TheLoop)1.69k
)
908
8
    return false;
909
109k
910
109k
  
if (109k
PhiTy->isIntegerTy()109k
) {
911
91.5k
    D = InductionDescriptor(StartValue, IK_IntInduction, Step);
912
91.5k
    return true;
913
91.5k
  }
914
17.4k
915
109k
  assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
916
17.4k
  // Pointer induction should be a constant.
917
17.4k
  if (!ConstStep)
918
960
    return false;
919
16.5k
920
16.5k
  ConstantInt *CV = ConstStep->getValue();
921
16.5k
  Type *PointerElementType = PhiTy->getPointerElementType();
922
16.5k
  // The pointer stride cannot be determined if the pointer element type is not
923
16.5k
  // sized.
924
16.5k
  if (!PointerElementType->isSized())
925
2
    return false;
926
16.5k
927
16.5k
  const DataLayout &DL = Phi->getModule()->getDataLayout();
928
16.5k
  int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
929
16.5k
  if (!Size)
930
2
    return false;
931
16.5k
932
16.5k
  int64_t CVSize = CV->getSExtValue();
933
16.5k
  if (CVSize % Size)
934
4
    return false;
935
16.5k
  auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
936
16.5k
                                    true /* signed */);
937
16.5k
  D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
938
16.5k
  return true;
939
16.5k
}
940
941
bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
942
3.32M
                                   bool PreserveLCSSA) {
943
3.32M
  bool Changed = false;
944
3.32M
945
3.32M
  // We re-use a vector for the in-loop predecesosrs.
946
3.32M
  SmallVector<BasicBlock *, 4> InLoopPredecessors;
947
3.32M
948
4.19M
  auto RewriteExit = [&](BasicBlock *BB) {
949
4.19M
    assert(InLoopPredecessors.empty() &&
950
4.19M
           "Must start with an empty predecessors list!");
951
4.19M
    auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
952
4.19M
953
4.19M
    // See if there are any non-loop predecessors of this exit block and
954
4.19M
    // keep track of the in-loop predecessors.
955
4.19M
    bool IsDedicatedExit = true;
956
4.19M
    for (auto *PredBB : predecessors(BB))
957
14.3M
      
if (14.3M
L->contains(PredBB)14.3M
) {
958
4.61M
        if (isa<IndirectBrInst>(PredBB->getTerminator()))
959
4.61M
          // We cannot rewrite exiting edges from an indirectbr.
960
365
          return false;
961
4.61M
962
4.61M
        InLoopPredecessors.push_back(PredBB);
963
14.3M
      } else {
964
9.70M
        IsDedicatedExit = false;
965
9.70M
      }
966
4.19M
967
4.19M
    assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
968
4.19M
969
4.19M
    // Nothing to do if this is already a dedicated exit.
970
4.19M
    if (IsDedicatedExit)
971
2.83M
      return false;
972
1.35M
973
1.35M
    auto *NewExitBB = SplitBlockPredecessors(
974
1.35M
        BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);
975
1.35M
976
1.35M
    if (!NewExitBB)
977
1.35M
      DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
978
1.35M
                   << *L << "\n");
979
1.35M
    else
980
1.35M
      DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
981
4.19M
                   << NewExitBB->getName() << "\n");
982
4.19M
    return true;
983
4.19M
  };
984
3.32M
985
3.32M
  // Walk the exit blocks directly rather than building up a data structure for
986
3.32M
  // them, but only visit each one once.
987
3.32M
  SmallPtrSet<BasicBlock *, 4> Visited;
988
3.32M
  for (auto *BB : L->blocks())
989
14.7M
    
for (auto *SuccBB : successors(BB)) 14.7M
{
990
25.0M
      // We're looking for exit blocks so skip in-loop successors.
991
25.0M
      if (L->contains(SuccBB))
992
20.6M
        continue;
993
4.42M
994
4.42M
      // Visit each exit block exactly once.
995
4.42M
      
if (4.42M
!Visited.insert(SuccBB).second4.42M
)
996
231k
        continue;
997
4.19M
998
4.19M
      Changed |= RewriteExit(SuccBB);
999
4.19M
    }
1000
3.32M
1001
3.32M
  return Changed;
1002
3.32M
}
1003
1004
/// \brief Returns the instructions that use values defined in the loop.
1005
47
SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
1006
47
  SmallVector<Instruction *, 8> UsedOutside;
1007
47
1008
47
  for (auto *Block : L->getBlocks())
1009
47
    // FIXME: I believe that this could use copy_if if the Inst reference could
1010
47
    // be adapted into a pointer.
1011
49
    
for (auto &Inst : *Block) 49
{
1012
731
      auto Users = Inst.users();
1013
731
      if (
any_of(Users, [&](User *U) 731
{
1014
856
            auto *Use = cast<Instruction>(U);
1015
856
            return !L->contains(Use->getParent());
1016
856
          }))
1017
6
        UsedOutside.push_back(&Inst);
1018
49
    }
1019
47
1020
47
  return UsedOutside;
1021
47
}
1022
1023
209k
void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
1024
209k
  // By definition, all loop passes need the LoopInfo analysis and the
1025
209k
  // Dominator tree it depends on. Because they all participate in the loop
1026
209k
  // pass manager, they must also preserve these.
1027
209k
  AU.addRequired<DominatorTreeWrapperPass>();
1028
209k
  AU.addPreserved<DominatorTreeWrapperPass>();
1029
209k
  AU.addRequired<LoopInfoWrapperPass>();
1030
209k
  AU.addPreserved<LoopInfoWrapperPass>();
1031
209k
1032
209k
  // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
1033
209k
  // here because users shouldn't directly get them from this header.
1034
209k
  extern char &LoopSimplifyID;
1035
209k
  extern char &LCSSAID;
1036
209k
  AU.addRequiredID(LoopSimplifyID);
1037
209k
  AU.addPreservedID(LoopSimplifyID);
1038
209k
  AU.addRequiredID(LCSSAID);
1039
209k
  AU.addPreservedID(LCSSAID);
1040
209k
  // This is used in the LPPassManager to perform LCSSA verification on passes
1041
209k
  // which preserve lcssa form
1042
209k
  AU.addRequired<LCSSAVerificationPass>();
1043
209k
  AU.addPreserved<LCSSAVerificationPass>();
1044
209k
1045
209k
  // Loop passes are designed to run inside of a loop pass manager which means
1046
209k
  // that any function analyses they require must be required by the first loop
1047
209k
  // pass in the manager (so that it is computed before the loop pass manager
1048
209k
  // runs) and preserved by all loop pasess in the manager. To make this
1049
209k
  // reasonably robust, the set needed for most loop passes is maintained here.
1050
209k
  // If your loop pass requires an analysis not listed here, you will need to
1051
209k
  // carefully audit the loop pass manager nesting structure that results.
1052
209k
  AU.addRequired<AAResultsWrapperPass>();
1053
209k
  AU.addPreserved<AAResultsWrapperPass>();
1054
209k
  AU.addPreserved<BasicAAWrapperPass>();
1055
209k
  AU.addPreserved<GlobalsAAWrapperPass>();
1056
209k
  AU.addPreserved<SCEVAAWrapperPass>();
1057
209k
  AU.addRequired<ScalarEvolutionWrapperPass>();
1058
209k
  AU.addPreserved<ScalarEvolutionWrapperPass>();
1059
209k
}
1060
1061
/// Manually defined generic "LoopPass" dependency initialization. This is used
1062
/// to initialize the exact set of passes from above in \c
1063
/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
1064
/// with:
1065
///
1066
///   INITIALIZE_PASS_DEPENDENCY(LoopPass)
1067
///
1068
/// As-if "LoopPass" were a pass.
1069
481k
void llvm::initializeLoopPassPass(PassRegistry &Registry) {
1070
481k
  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1071
481k
  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1072
481k
  INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
1073
481k
  INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
1074
481k
  INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1075
481k
  INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
1076
481k
  INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
1077
481k
  INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
1078
481k
  INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
1079
481k
}
1080
1081
/// \brief Find string metadata for loop
1082
///
1083
/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
1084
/// operand or null otherwise.  If the string metadata is not found return
1085
/// Optional's not-a-value.
1086
Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
1087
252k
                                                            StringRef Name) {
1088
252k
  MDNode *LoopID = TheLoop->getLoopID();
1089
252k
  // Return none if LoopID is false.
1090
252k
  if (!LoopID)
1091
237k
    return None;
1092
14.9k
1093
14.9k
  // First operand should refer to the loop id itself.
1094
252k
  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
1095
14.9k
  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
1096
14.9k
1097
14.9k
  // Iterate over LoopID operands and look for MDString Metadata
1098
44.3k
  for (unsigned i = 1, e = LoopID->getNumOperands(); 
i < e44.3k
;
++i29.4k
) {
1099
29.4k
    MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
1100
29.4k
    if (!MD)
1101
0
      continue;
1102
29.4k
    MDString *S = dyn_cast<MDString>(MD->getOperand(0));
1103
29.4k
    if (!S)
1104
6.68k
      continue;
1105
22.7k
    // Return true if MDString holds expected MetaData.
1106
22.7k
    
if (22.7k
Name.equals(S->getString())22.7k
)
1107
13
      switch (MD->getNumOperands()) {
1108
1
      case 1:
1109
1
        return nullptr;
1110
12
      case 2:
1111
12
        return &MD->getOperand(1);
1112
0
      default:
1113
0
        llvm_unreachable("loop metadata has 0 or 1 operand");
1114
13
      }
1115
29.4k
  }
1116
14.9k
  return None;
1117
252k
}
1118
1119
/// Does a BFS from a given node to all of its children inside a given loop.
1120
/// The returned vector of nodes includes the starting point.
1121
SmallVector<DomTreeNode *, 16>
1122
2.05M
llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
1123
2.05M
  SmallVector<DomTreeNode *, 16> Worklist;
1124
12.2M
  auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
1125
12.2M
    // Only include subregions in the top level loop.
1126
12.2M
    BasicBlock *BB = DTN->getBlock();
1127
12.2M
    if (CurLoop->contains(BB))
1128
9.54M
      Worklist.push_back(DTN);
1129
12.2M
  };
1130
2.05M
1131
2.05M
  AddRegionToWorklist(N);
1132
2.05M
1133
11.6M
  for (size_t I = 0; 
I < Worklist.size()11.6M
;
I++9.54M
)
1134
9.54M
    for (DomTreeNode *Child : Worklist[I]->getChildren())
1135
10.1M
      AddRegionToWorklist(Child);
1136
2.05M
1137
2.05M
  return Worklist;
1138
2.05M
}
1139
1140
/// Returns true if the instruction in a loop is guaranteed to execute at least
1141
/// once.
1142
bool llvm::isGuaranteedToExecute(const Instruction &Inst,
1143
                                 const DominatorTree *DT, const Loop *CurLoop,
1144
52.1k
                                 const LoopSafetyInfo *SafetyInfo) {
1145
52.1k
  // We have to check to make sure that the instruction dominates all
1146
52.1k
  // of the exit blocks.  If it doesn't, then there is a path out of the loop
1147
52.1k
  // which does not execute this instruction, so we can't hoist it.
1148
52.1k
1149
52.1k
  // If the instruction is in the header block for the loop (which is very
1150
52.1k
  // common), it is always guaranteed to dominate the exit blocks.  Since this
1151
52.1k
  // is a common case, and can save some work, check it now.
1152
52.1k
  if (Inst.getParent() == CurLoop->getHeader())
1153
52.1k
    // If there's a throw in the header block, we can't guarantee we'll reach
1154
52.1k
    // Inst.
1155
31.1k
    return !SafetyInfo->HeaderMayThrow;
1156
21.0k
1157
21.0k
  // Somewhere in this loop there is an instruction which may throw and make us
1158
21.0k
  // exit the loop.
1159
21.0k
  
if (21.0k
SafetyInfo->MayThrow21.0k
)
1160
934
    return false;
1161
20.1k
1162
20.1k
  // Get the exit blocks for the current loop.
1163
20.1k
  SmallVector<BasicBlock *, 8> ExitBlocks;
1164
20.1k
  CurLoop->getExitBlocks(ExitBlocks);
1165
20.1k
1166
20.1k
  // Verify that the block dominates each of the exit blocks of the loop.
1167
20.1k
  for (BasicBlock *ExitBlock : ExitBlocks)
1168
21.7k
    
if (21.7k
!DT->dominates(Inst.getParent(), ExitBlock)21.7k
)
1169
16.2k
      return false;
1170
3.88k
1171
3.88k
  // As a degenerate case, if the loop is statically infinite then we haven't
1172
3.88k
  // proven anything since there are no exit blocks.
1173
3.88k
  
if (3.88k
ExitBlocks.empty()3.88k
)
1174
5
    return false;
1175
3.87k
1176
3.87k
  // FIXME: In general, we have to prove that the loop isn't an infinite loop.
1177
3.87k
  // See http::llvm.org/PR24078 .  (The "ExitBlocks.empty()" check above is
1178
3.87k
  // just a special case of this.)
1179
3.87k
  return true;
1180
3.87k
}
1181
1182
20
Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
1183
20
  // Only support loops with a unique exiting block, and a latch.
1184
20
  if (!L->getExitingBlock())
1185
2
    return None;
1186
18
1187
18
  // Get the branch weights for the the loop's backedge.
1188
18
  BranchInst *LatchBR =
1189
18
      dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
1190
18
  if (
!LatchBR || 18
LatchBR->getNumSuccessors() != 218
)
1191
0
    return None;
1192
18
1193
18
  assert((LatchBR->getSuccessor(0) == L->getHeader() ||
1194
18
          LatchBR->getSuccessor(1) == L->getHeader()) &&
1195
18
         "At least one edge out of the latch must go to the header");
1196
18
1197
18
  // To estimate the number of times the loop body was executed, we want to
1198
18
  // know the number of times the backedge was taken, vs. the number of times
1199
18
  // we exited the loop.
1200
18
  uint64_t TrueVal, FalseVal;
1201
18
  if (!LatchBR->extractProfMetadata(TrueVal, FalseVal))
1202
6
    return None;
1203
12
1204
12
  
if (12
!TrueVal || 12
!FalseVal12
)
1205
4
    return 0;
1206
8
1207
8
  // Divide the count of the backedge by the count of the edge exiting the loop,
1208
8
  // rounding to nearest.
1209
8
  
if (8
LatchBR->getSuccessor(0) == L->getHeader()8
)
1210
4
    return (TrueVal + (FalseVal / 2)) / FalseVal;
1211
8
  else
1212
4
    return (FalseVal + (TrueVal / 2)) / TrueVal;
1213
0
}
1214
1215
/// \brief Adds a 'fast' flag to floating point operations.
1216
647
static Value *addFastMathFlag(Value *V) {
1217
647
  if (
isa<FPMathOperator>(V)647
) {
1218
149
    FastMathFlags Flags;
1219
149
    Flags.setUnsafeAlgebra();
1220
149
    cast<Instruction>(V)->setFastMathFlags(Flags);
1221
149
  }
1222
647
  return V;
1223
647
}
1224
1225
// Helper to generate a log2 shuffle reduction.
1226
Value *
1227
llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
1228
                          RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
1229
439
                          ArrayRef<Value *> RedOps) {
1230
439
  unsigned VF = Src->getType()->getVectorNumElements();
1231
439
  // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
1232
439
  // and vector ops, reducing the set of values being computed by half each
1233
439
  // round.
1234
439
  assert(isPowerOf2_32(VF) &&
1235
439
         "Reduction emission only supported for pow2 vectors!");
1236
439
  Value *TmpVec = Src;
1237
439
  SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
1238
1.24k
  for (unsigned i = VF; 
i != 11.24k
;
i >>= 1810
) {
1239
810
    // Move the upper half of the vector to the lower half.
1240
2.43k
    for (unsigned j = 0; 
j != i / 22.43k
;
++j1.62k
)
1241
1.62k
      ShuffleMask[j] = Builder.getInt32(i / 2 + j);
1242
810
1243
810
    // Fill the rest of the mask with undef.
1244
810
    std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
1245
810
              UndefValue::get(Builder.getInt32Ty()));
1246
810
1247
810
    Value *Shuf = Builder.CreateShuffleVector(
1248
810
        TmpVec, UndefValue::get(TmpVec->getType()),
1249
810
        ConstantVector::get(ShuffleMask), "rdx.shuf");
1250
810
1251
810
    if (
Op != Instruction::ICmp && 810
Op != Instruction::FCmp706
) {
1252
647
      // Floating point operations had to be 'fast' to enable the reduction.
1253
647
      TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
1254
647
                                                   TmpVec, Shuf, "bin.rdx"));
1255
810
    } else {
1256
163
      assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
1257
163
             "Invalid min/max");
1258
163
      TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec,
1259
163
                                                    Shuf);
1260
163
    }
1261
810
    if (!RedOps.empty())
1262
258
      propagateIRFlags(TmpVec, RedOps);
1263
810
  }
1264
439
  // The result is in the first element of the vector.
1265
439
  return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
1266
439
}
1267
1268
/// Create a simple vector reduction specified by an opcode and some
1269
/// flags (if generating min/max reductions).
1270
Value *llvm::createSimpleTargetReduction(
1271
    IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
1272
    Value *Src, TargetTransformInfo::ReductionFlags Flags,
1273
1.28k
    ArrayRef<Value *> RedOps) {
1274
1.28k
  assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
1275
1.28k
1276
1.28k
  Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
1277
1.28k
  std::function<Value*()> BuildFunc;
1278
1.28k
  using RD = RecurrenceDescriptor;
1279
1.28k
  RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
1280
1.28k
  // TODO: Support creating ordered reductions.
1281
1.28k
  FastMathFlags FMFUnsafe;
1282
1.28k
  FMFUnsafe.setUnsafeAlgebra();
1283
1.28k
1284
1.28k
  switch (Opcode) {
1285
971
  case Instruction::Add:
1286
839
    BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
1287
971
    break;
1288
45
  case Instruction::Mul:
1289
0
    BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
1290
45
    break;
1291
25
  case Instruction::And:
1292
0
    BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
1293
25
    break;
1294
75
  case Instruction::Or:
1295
0
    BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
1296
75
    break;
1297
8
  case Instruction::Xor:
1298
0
    BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
1299
8
    break;
1300
60
  case Instruction::FAdd:
1301
0
    BuildFunc = [&]() {
1302
0
      auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
1303
0
      cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe);
1304
0
      return Rdx;
1305
0
    };
1306
60
    break;
1307
1
  case Instruction::FMul:
1308
0
    BuildFunc = [&]() {
1309
0
      auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
1310
0
      cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe);
1311
0
      return Rdx;
1312
0
    };
1313
1
    break;
1314
68
  case Instruction::ICmp:
1315
68
    if (
Flags.IsMaxOp68
) {
1316
42
      MinMaxKind = Flags.IsSigned ? 
RD::MRK_SIntMax39
:
RD::MRK_UIntMax3
;
1317
10
      BuildFunc = [&]() {
1318
10
        return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
1319
10
      };
1320
68
    } else {
1321
26
      MinMaxKind = Flags.IsSigned ? 
RD::MRK_SIntMin23
:
RD::MRK_UIntMin3
;
1322
6
      BuildFunc = [&]() {
1323
6
        return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
1324
6
      };
1325
26
    }
1326
68
    break;
1327
28
  case Instruction::FCmp:
1328
28
    if (
Flags.IsMaxOp28
) {
1329
19
      MinMaxKind = RD::MRK_FloatMax;
1330
0
      BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); };
1331
28
    } else {
1332
9
      MinMaxKind = RD::MRK_FloatMin;
1333
0
      BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); };
1334
9
    }
1335
28
    break;
1336
0
  default:
1337
0
    llvm_unreachable("Unhandled opcode");
1338
0
    break;
1339
1.28k
  }
1340
1.28k
  
if (1.28k
TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)1.28k
)
1341
855
    return BuildFunc();
1342
426
  return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
1343
426
}
1344
1345
/// Create a vector reduction using a given recurrence descriptor.
1346
Value *llvm::createTargetReduction(IRBuilder<> &Builder,
1347
                                   const TargetTransformInfo *TTI,
1348
                                   RecurrenceDescriptor &Desc, Value *Src,
1349
1.16k
                                   bool NoNaN) {
1350
1.16k
  // TODO: Support in-order reductions based on the recurrence descriptor.
1351
1.16k
  RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind();
1352
1.16k
  TargetTransformInfo::ReductionFlags Flags;
1353
1.16k
  Flags.NoNaN = NoNaN;
1354
1.16k
  auto getSimpleRdx = [&](unsigned Opc) {
1355
1.16k
    return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags);
1356
1.16k
  };
1357
1.16k
  switch (RecKind) {
1358
18
  case RecurrenceDescriptor::RK_FloatAdd:
1359
18
    return getSimpleRdx(Instruction::FAdd);
1360
1
  case RecurrenceDescriptor::RK_FloatMult:
1361
1
    return getSimpleRdx(Instruction::FMul);
1362
931
  case RecurrenceDescriptor::RK_IntegerAdd:
1363
931
    return getSimpleRdx(Instruction::Add);
1364
45
  case RecurrenceDescriptor::RK_IntegerMult:
1365
45
    return getSimpleRdx(Instruction::Mul);
1366
25
  case RecurrenceDescriptor::RK_IntegerAnd:
1367
25
    return getSimpleRdx(Instruction::And);
1368
75
  case RecurrenceDescriptor::RK_IntegerOr:
1369
75
    return getSimpleRdx(Instruction::Or);
1370
8
  case RecurrenceDescriptor::RK_IntegerXor:
1371
8
    return getSimpleRdx(Instruction::Xor);
1372
49
  case RecurrenceDescriptor::RK_IntegerMinMax: {
1373
49
    switch (Desc.getMinMaxRecurrenceKind()) {
1374
20
    case RecurrenceDescriptor::MRK_SIntMax:
1375
20
      Flags.IsSigned = true;
1376
20
      Flags.IsMaxOp = true;
1377
20
      break;
1378
3
    case RecurrenceDescriptor::MRK_UIntMax:
1379
3
      Flags.IsMaxOp = true;
1380
3
      break;
1381
23
    case RecurrenceDescriptor::MRK_SIntMin:
1382
23
      Flags.IsSigned = true;
1383
23
      break;
1384
3
    case RecurrenceDescriptor::MRK_UIntMin:
1385
3
      break;
1386
0
    default:
1387
0
      llvm_unreachable("Unhandled MRK");
1388
49
    }
1389
49
    return getSimpleRdx(Instruction::ICmp);
1390
49
  }
1391
17
  case RecurrenceDescriptor::RK_FloatMinMax: {
1392
17
    Flags.IsMaxOp =
1393
17
        Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax;
1394
17
    return getSimpleRdx(Instruction::FCmp);
1395
49
  }
1396
0
  default:
1397
0
    llvm_unreachable("Unhandled RecKind");
1398
0
  }
1399
0
}
1400
1401
6.72k
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
1402
6.72k
  auto *VecOp = dyn_cast<Instruction>(I);
1403
6.72k
  if (!VecOp)
1404
14
    return;
1405
6.71k
  
auto *Intersection = (OpValue == nullptr) ? 6.71k
dyn_cast<Instruction>(VL[0])443
1406
6.26k
                                            : dyn_cast<Instruction>(OpValue);
1407
6.71k
  if (!Intersection)
1408
0
    return;
1409
6.71k
  const unsigned Opcode = Intersection->getOpcode();
1410
6.71k
  VecOp->copyIRFlags(Intersection);
1411
31.7k
  for (auto *V : VL) {
1412
31.7k
    auto *Instr = dyn_cast<Instruction>(V);
1413
31.7k
    if (!Instr)
1414
0
      continue;
1415
31.7k
    
if (31.7k
OpValue == nullptr || 31.7k
Opcode == Instr->getOpcode()25.7k
)
1416
31.7k
      VecOp->andIRFlags(V);
1417
31.7k
  }
1418
6.72k
}