Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- LoopVectorizationLegality.cpp --------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file provides loop vectorization legality analysis. Original code
10
// resided in LoopVectorize.cpp for a long time.
11
//
12
// At this point, it is implemented as a utility class, not as an analysis
13
// pass. It should be easy to create an analysis pass around it if there
14
// is a need (but D45420 needs to happen first).
15
//
16
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
17
#include "llvm/Analysis/VectorUtils.h"
18
#include "llvm/IR/IntrinsicInst.h"
19
20
using namespace llvm;
21
22
574k
#define LV_NAME "loop-vectorize"
23
423k
#define DEBUG_TYPE LV_NAME
24
25
extern cl::opt<bool> EnableVPlanPredication;
26
27
static cl::opt<bool>
28
    EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
29
                       cl::desc("Enable if-conversion during vectorization."));
30
31
static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
32
    "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
33
    cl::desc("The maximum allowed number of runtime memory checks with a "
34
             "vectorize(enable) pragma."));
35
36
static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
37
    "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
38
    cl::desc("The maximum number of SCEV checks allowed."));
39
40
static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
41
    "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
42
    cl::desc("The maximum number of SCEV checks allowed with a "
43
             "vectorize(enable) pragma"));
44
45
/// Maximum vectorization interleave count.
46
static const unsigned MaxInterleaveFactor = 16;
47
48
namespace llvm {
49
50
#ifndef NDEBUG
51
static void debugVectorizationFailure(const StringRef DebugMsg,
52
    Instruction *I) {
53
  dbgs() << "LV: Not vectorizing: " << DebugMsg;
54
  if (I != nullptr)
55
    dbgs() << " " << *I;
56
  else
57
    dbgs() << '.';
58
  dbgs() << '\n';
59
}
60
#endif
61
62
OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
63
                                                  StringRef RemarkName,
64
                                                  Loop *TheLoop,
65
114k
                                                  Instruction *I) {
66
114k
  Value *CodeRegion = TheLoop->getHeader();
67
114k
  DebugLoc DL = TheLoop->getStartLoc();
68
114k
69
114k
  if (I) {
70
67.4k
    CodeRegion = I->getParent();
71
67.4k
    // If there is no debug location attached to the instruction, revert back to
72
67.4k
    // using the loop's.
73
67.4k
    if (I->getDebugLoc())
74
2.55k
      DL = I->getDebugLoc();
75
67.4k
  }
76
114k
77
114k
  OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
78
114k
  R << "loop not vectorized: ";
79
114k
  return R;
80
114k
}
81
82
11.2k
bool LoopVectorizeHints::Hint::validate(unsigned Val) {
83
11.2k
  switch (Kind) {
84
11.2k
  case HK_WIDTH:
85
5.59k
    return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
86
11.2k
  case HK_UNROLL:
87
5.56k
    return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
88
11.2k
  case HK_FORCE:
89
104
    return (Val <= 1);
90
11.2k
  case HK_ISVECTORIZED:
91
1
    return (Val == 0 || Val == 1);
92
0
  }
93
0
  return false;
94
0
}
95
96
LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
97
                                       bool InterleaveOnlyWhenForced,
98
                                       OptimizationRemarkEmitter &ORE)
99
    : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
100
      Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
101
      Force("vectorize.enable", FK_Undefined, HK_FORCE),
102
163k
      IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
103
163k
  // Populate values with existing loop metadata.
104
163k
  getHintsFromMetadata();
105
163k
106
163k
  // force-vector-interleave overrides DisableInterleaving.
107
163k
  if (VectorizerParams::isInterleaveForced())
108
1.48k
    Interleave.Value = VectorizerParams::VectorizationInterleave;
109
163k
110
163k
  if (IsVectorized.Value != 1)
111
163k
    // If the vectorization width and interleaving count are both 1 then
112
163k
    // consider the loop to have been already vectorized because there's
113
163k
    // nothing more that we can do.
114
163k
    IsVectorized.Value = Width.Value == 1 && 
Interleave.Value == 15.64k
;
115
163k
  LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
116
163k
             << "LV: Interleaving disabled by the pass manager\n");
117
163k
}
118
119
34.1k
void LoopVectorizeHints::setAlreadyVectorized() {
120
34.1k
  LLVMContext &Context = TheLoop->getHeader()->getContext();
121
34.1k
122
34.1k
  MDNode *IsVectorizedMD = MDNode::get(
123
34.1k
      Context,
124
34.1k
      {MDString::get(Context, "llvm.loop.isvectorized"),
125
34.1k
       ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
126
34.1k
  MDNode *LoopID = TheLoop->getLoopID();
127
34.1k
  MDNode *NewLoopID =
128
34.1k
      makePostTransformationMetadata(Context, LoopID,
129
34.1k
                                     {Twine(Prefix(), "vectorize.").str(),
130
34.1k
                                      Twine(Prefix(), "interleave.").str()},
131
34.1k
                                     {IsVectorizedMD});
132
34.1k
  TheLoop->setLoopID(NewLoopID);
133
34.1k
134
34.1k
  // Update internal cache.
135
34.1k
  IsVectorized.Value = 1;
136
34.1k
}
137
138
bool LoopVectorizeHints::allowVectorization(
139
146k
    Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
140
146k
  if (getForce() == LoopVectorizeHints::FK_Disabled) {
141
14
    LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
142
14
    emitRemarkWithHints();
143
14
    return false;
144
14
  }
145
146k
146
146k
  if (VectorizeOnlyWhenForced && 
getForce() != LoopVectorizeHints::FK_Enabled136
) {
147
118
    LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
148
118
    emitRemarkWithHints();
149
118
    return false;
150
118
  }
151
146k
152
146k
  if (getIsVectorized() == 1) {
153
5.56k
    LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
154
5.56k
    // FIXME: Add interleave.disable metadata. This will allow
155
5.56k
    // vectorize.disable to be used without disabling the pass and errors
156
5.56k
    // to differentiate between disabled vectorization and a width of 1.
157
5.56k
    ORE.emit([&]() {
158
6
      return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
159
6
                                        "AllDisabled", L->getStartLoc(),
160
6
                                        L->getHeader())
161
6
             << "loop not vectorized: vectorization and interleaving are "
162
6
                "explicitly disabled, or the loop has already been "
163
6
                "vectorized";
164
6
    });
165
5.56k
    return false;
166
5.56k
  }
167
141k
168
141k
  return true;
169
141k
}
170
171
122k
void LoopVectorizeHints::emitRemarkWithHints() const {
172
122k
  using namespace ore;
173
122k
174
122k
  ORE.emit([&]() {
175
53
    if (Force.Value == LoopVectorizeHints::FK_Disabled)
176
1
      return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
177
1
                                      TheLoop->getStartLoc(),
178
1
                                      TheLoop->getHeader())
179
1
             << "loop not vectorized: vectorization is explicitly disabled";
180
52
    else {
181
52
      OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
182
52
                                 TheLoop->getStartLoc(), TheLoop->getHeader());
183
52
      R << "loop not vectorized";
184
52
      if (Force.Value == LoopVectorizeHints::FK_Enabled) {
185
5
        R << " (Force=" << NV("Force", true);
186
5
        if (Width.Value != 0)
187
1
          R << ", Vector Width=" << NV("VectorWidth", Width.Value);
188
5
        if (Interleave.Value != 0)
189
0
          R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
190
5
        R << ")";
191
5
      }
192
52
      return R;
193
52
    }
194
53
  });
195
122k
}
196
197
152k
const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
198
152k
  if (getWidth() == 1)
199
83
    return LV_NAME;
200
152k
  if (getForce() == LoopVectorizeHints::FK_Disabled)
201
0
    return LV_NAME;
202
152k
  if (getForce() == LoopVectorizeHints::FK_Undefined && 
getWidth() == 0152k
)
203
150k
    return LV_NAME;
204
1.39k
  return OptimizationRemarkAnalysis::AlwaysPrint;
205
1.39k
}
206
207
163k
void LoopVectorizeHints::getHintsFromMetadata() {
208
163k
  MDNode *LoopID = TheLoop->getLoopID();
209
163k
  if (!LoopID)
210
147k
    return;
211
16.8k
212
16.8k
  // First operand should refer to the loop id itself.
213
16.8k
  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
214
16.8k
  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
215
16.8k
216
50.4k
  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; 
++i33.6k
) {
217
33.6k
    const MDString *S = nullptr;
218
33.6k
    SmallVector<Metadata *, 4> Args;
219
33.6k
220
33.6k
    // The expected hint is either a MDString or a MDNode with the first
221
33.6k
    // operand a MDString.
222
33.6k
    if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
223
33.6k
      if (!MD || MD->getNumOperands() == 0)
224
0
        continue;
225
33.6k
      S = dyn_cast<MDString>(MD->getOperand(0));
226
57.2k
      for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; 
++i23.6k
)
227
23.6k
        Args.push_back(MD->getOperand(i));
228
33.6k
    } else {
229
0
      S = dyn_cast<MDString>(LoopID->getOperand(i));
230
0
      assert(Args.size() == 0 && "too many arguments for MDString");
231
0
    }
232
33.6k
233
33.6k
    if (!S)
234
22.3k
      continue;
235
11.3k
236
11.3k
    // Check if the hint starts with the loop metadata prefix.
237
11.3k
    StringRef Name = S->getString();
238
11.3k
    if (Args.size() == 1)
239
11.2k
      setHint(Name, Args[0]);
240
11.3k
  }
241
16.8k
}
242
243
11.2k
void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
244
11.2k
  if (!Name.startswith(Prefix()))
245
0
    return;
246
11.2k
  Name = Name.substr(Prefix().size(), StringRef::npos);
247
11.2k
248
11.2k
  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
249
11.2k
  if (!C)
250
22
    return;
251
11.2k
  unsigned Val = C->getZExtValue();
252
11.2k
253
11.2k
  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};
254
17.0k
  for (auto H : Hints) {
255
17.0k
    if (Name == H->Name) {
256
11.2k
      if (H->validate(Val))
257
11.2k
        H->Value = Val;
258
11.2k
      else
259
11.2k
        LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
260
11.2k
      break;
261
11.2k
    }
262
17.0k
  }
263
11.2k
}
264
265
bool LoopVectorizationRequirements::doesNotMeet(
266
19.9k
    Function *F, Loop *L, const LoopVectorizeHints &Hints) {
267
19.9k
  const char *PassName = Hints.vectorizeAnalysisPassName();
268
19.9k
  bool Failed = false;
269
19.9k
  if (UnsafeAlgebraInst && 
!Hints.allowReordering()1.35k
) {
270
1.35k
    ORE.emit([&]() {
271
4
      return OptimizationRemarkAnalysisFPCommute(
272
4
                 PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
273
4
                 UnsafeAlgebraInst->getParent())
274
4
             << "loop not vectorized: cannot prove it is safe to reorder "
275
4
                "floating-point operations";
276
4
    });
277
1.35k
    Failed = true;
278
1.35k
  }
279
19.9k
280
19.9k
  // Test if runtime memcheck thresholds are exceeded.
281
19.9k
  bool PragmaThresholdReached =
282
19.9k
      NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
283
19.9k
  bool ThresholdReached =
284
19.9k
      NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
285
19.9k
  if ((ThresholdReached && 
!Hints.allowReordering()144
) ||
286
19.9k
      
PragmaThresholdReached19.8k
) {
287
142
    ORE.emit([&]() {
288
5
      return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
289
5
                                                L->getStartLoc(),
290
5
                                                L->getHeader())
291
5
             << "loop not vectorized: cannot prove it is safe to reorder "
292
5
                "memory operations";
293
5
    });
294
142
    LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
295
142
    Failed = true;
296
142
  }
297
19.9k
298
19.9k
  return Failed;
299
19.9k
}
300
301
// Return true if the inner loop \p Lp is uniform with regard to the outer loop
302
// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
303
// executing the inner loop will execute the same iterations). This check is
304
// very constrained for now but it will be relaxed in the future. \p Lp is
305
// considered uniform if it meets all the following conditions:
306
//   1) it has a canonical IV (starting from 0 and with stride 1),
307
//   2) its latch terminator is a conditional branch and,
308
//   3) its latch condition is a compare instruction whose operands are the
309
//      canonical IV and an OuterLp invariant.
310
// This check doesn't take into account the uniformity of other conditions not
311
// related to the loop latch because they don't affect the loop uniformity.
312
//
313
// NOTE: We decided to keep all these checks and its associated documentation
314
// together so that we can easily have a picture of the current supported loop
315
// nests. However, some of the current checks don't depend on \p OuterLp and
316
// would be redundantly executed for each \p Lp if we invoked this function for
317
// different candidate outer loops. This is not the case for now because we
318
// don't currently have the infrastructure to evaluate multiple candidate outer
319
// loops and \p OuterLp will be a fixed parameter while we only support explicit
320
// outer loop vectorization. It's also very likely that these checks go away
321
// before introducing the aforementioned infrastructure. However, if this is not
322
// the case, we should move the \p OuterLp independent checks to a separate
323
// function that is only executed once for each \p Lp.
324
14
static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
325
14
  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
326
14
327
14
  // If Lp is the outer loop, it's uniform by definition.
328
14
  if (Lp == OuterLp)
329
7
    return true;
330
7
  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
331
7
332
7
  // 1.
333
7
  PHINode *IV = Lp->getCanonicalInductionVariable();
334
7
  if (!IV) {
335
0
    LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
336
0
    return false;
337
0
  }
338
7
339
7
  // 2.
340
7
  BasicBlock *Latch = Lp->getLoopLatch();
341
7
  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
342
7
  if (!LatchBr || LatchBr->isUnconditional()) {
343
0
    LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
344
0
    return false;
345
0
  }
346
7
347
7
  // 3.
348
7
  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
349
7
  if (!LatchCmp) {
350
0
    LLVM_DEBUG(
351
0
        dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
352
0
    return false;
353
0
  }
354
7
355
7
  Value *CondOp0 = LatchCmp->getOperand(0);
356
7
  Value *CondOp1 = LatchCmp->getOperand(1);
357
7
  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
358
7
  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
359
7
      
!(0
CondOp1 == IVUpdate0
&&
OuterLp->isLoopInvariant(CondOp0)0
)) {
360
0
    LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
361
0
    return false;
362
0
  }
363
7
364
7
  return true;
365
7
}
366
367
// Return true if \p Lp and all its nested loops are uniform with regard to \p
368
// OuterLp.
369
14
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
370
14
  if (!isUniformLoop(Lp, OuterLp))
371
0
    return false;
372
14
373
14
  // Check if nested loops are uniform.
374
14
  for (Loop *SubLp : *Lp)
375
7
    if (!isUniformLoopNest(SubLp, OuterLp))
376
0
      return false;
377
14
378
14
  return true;
379
14
}
380
381
/// Check whether it is safe to if-convert this phi node.
382
///
383
/// Phi nodes with constant expressions that can trap are not safe to if
384
/// convert.
385
4.37k
static bool canIfConvertPHINodes(BasicBlock *BB) {
386
4.62k
  for (PHINode &Phi : BB->phis()) {
387
4.62k
    for (Value *V : Phi.incoming_values())
388
11.5k
      if (auto *C = dyn_cast<Constant>(V))
389
1.14k
        if (C->canTrap())
390
0
          return false;
391
4.62k
  }
392
4.37k
  return true;
393
4.37k
}
394
395
72.7k
static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
396
72.7k
  if (Ty->isPointerTy())
397
11.6k
    return DL.getIntPtrType(Ty);
398
61.0k
399
61.0k
  // It is possible that char's or short's overflow when we ask for the loop's
400
61.0k
  // trip count, work around this by changing the type size.
401
61.0k
  if (Ty->getScalarSizeInBits() < 32)
402
66
    return Type::getInt32Ty(Ty->getContext());
403
60.9k
404
60.9k
  return Ty;
405
60.9k
}
406
407
7.01k
static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
408
7.01k
  Ty0 = convertPointerToIntegerType(DL, Ty0);
409
7.01k
  Ty1 = convertPointerToIntegerType(DL, Ty1);
410
7.01k
  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
411
284
    return Ty0;
412
6.73k
  return Ty1;
413
6.73k
}
414
415
/// Check that the instruction has outside loop users and is not an
416
/// identified reduction variable.
417
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
418
448k
                               SmallPtrSetImpl<Value *> &AllowedExit) {
419
448k
  // Reductions, Inductions and non-header phis are allowed to have exit users. All
420
448k
  // other instructions must not have external users.
421
448k
  if (!AllowedExit.count(Inst))
422
409k
    // Check that all of the users of the loop are inside the BB.
423
409k
    for (User *U : Inst->users()) {
424
370k
      Instruction *UI = cast<Instruction>(U);
425
370k
      // This user may be a reduction exit value.
426
370k
      if (!TheLoop->contains(UI)) {
427
666
        LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
428
666
        return true;
429
666
      }
430
370k
    }
431
448k
  
return false447k
;
432
448k
}
433
434
159k
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
435
159k
  const ValueToValueMap &Strides =
436
159k
      getSymbolicStrides() ? *getSymbolicStrides() : 
ValueToValueMap()0
;
437
159k
438
159k
  int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
439
159k
  if (Stride == 1 || 
Stride == -131.7k
)
440
136k
    return Stride;
441
22.6k
  return 0;
442
22.6k
}
443
444
157k
bool LoopVectorizationLegality::isUniform(Value *V) {
445
157k
  return LAI->isUniform(V);
446
157k
}
447
448
void LoopVectorizationLegality::reportVectorizationFailure(
449
    const StringRef DebugMsg, const StringRef OREMsg,
450
113k
    const StringRef ORETag, Instruction *I) const {
451
113k
  LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
452
113k
  ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
453
113k
      ORETag, TheLoop, I) << OREMsg);
454
113k
}
455
456
7
bool LoopVectorizationLegality::canVectorizeOuterLoop() {
457
7
  assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
458
7
  // Store the result and return it at the end instead of exiting early, in case
459
7
  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
460
7
  bool Result = true;
461
7
  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
462
7
463
23
  for (BasicBlock *BB : TheLoop->blocks()) {
464
23
    // Check whether the BB terminator is a BranchInst. Any other terminator is
465
23
    // not supported yet.
466
23
    auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
467
23
    if (!Br) {
468
0
      reportVectorizationFailure("Unsupported basic block terminator",
469
0
          "loop control flow is not understood by vectorizer",
470
0
          "CFGNotUnderstood");
471
0
      if (DoExtraAnalysis)
472
0
        Result = false;
473
0
      else
474
0
        return false;
475
23
    }
476
23
477
23
    // Check whether the BranchInst is a supported one. Only unconditional
478
23
    // branches, conditional branches with an outer loop invariant condition or
479
23
    // backedges are supported.
480
23
    // FIXME: We skip these checks when VPlan predication is enabled as we
481
23
    // want to allow divergent branches. This whole check will be removed
482
23
    // once VPlan predication is on by default.
483
23
    if (!EnableVPlanPredication && Br && Br->isConditional() &&
484
23
        
!TheLoop->isLoopInvariant(Br->getCondition())15
&&
485
23
        
!LI->isLoopHeader(Br->getSuccessor(0))14
&&
486
23
        
!LI->isLoopHeader(Br->getSuccessor(1))14
) {
487
0
      reportVectorizationFailure("Unsupported conditional branch",
488
0
          "loop control flow is not understood by vectorizer",
489
0
          "CFGNotUnderstood");
490
0
      if (DoExtraAnalysis)
491
0
        Result = false;
492
0
      else
493
0
        return false;
494
0
    }
495
23
  }
496
7
497
7
  // Check whether inner loops are uniform. At this point, we only support
498
7
  // simple outer loops scenarios with uniform nested loops.
499
7
  if (!isUniformLoopNest(TheLoop /*loop nest*/,
500
7
                         TheLoop /*context outer loop*/)) {
501
0
    reportVectorizationFailure("Outer loop contains divergent loops",
502
0
        "loop control flow is not understood by vectorizer",
503
0
        "CFGNotUnderstood");
504
0
    if (DoExtraAnalysis)
505
0
      Result = false;
506
0
    else
507
0
      return false;
508
7
  }
509
7
510
7
  // Check whether we are able to set up outer loop induction.
511
7
  if (!setupOuterLoopInductions()) {
512
0
    reportVectorizationFailure("Unsupported outer loop Phi(s)",
513
0
                               "Unsupported outer loop Phi(s)",
514
0
                               "UnsupportedPhi");
515
0
    if (DoExtraAnalysis)
516
0
      Result = false;
517
0
    else
518
0
      return false;
519
7
  }
520
7
521
7
  return Result;
522
7
}
523
524
void LoopVectorizationLegality::addInductionPhi(
525
    PHINode *Phi, const InductionDescriptor &ID,
526
65.7k
    SmallPtrSetImpl<Value *> &AllowedExit) {
527
65.7k
  Inductions[Phi] = ID;
528
65.7k
529
65.7k
  // In case this induction also comes with casts that we know we can ignore
530
65.7k
  // in the vectorized loop body, record them here. All casts could be recorded
531
65.7k
  // here for ignoring, but suffices to record only the first (as it is the
532
65.7k
  // only one that may bw used outside the cast sequence).
533
65.7k
  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
534
65.7k
  if (!Casts.empty())
535
15
    InductionCastsToIgnore.insert(*Casts.begin());
536
65.7k
537
65.7k
  Type *PhiTy = Phi->getType();
538
65.7k
  const DataLayout &DL = Phi->getModule()->getDataLayout();
539
65.7k
540
65.7k
  // Get the widest type.
541
65.7k
  if (!PhiTy->isFloatingPointTy()) {
542
65.7k
    if (!WidestIndTy)
543
58.7k
      WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
544
7.01k
    else
545
7.01k
      WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
546
65.7k
  }
547
65.7k
548
65.7k
  // Int inductions are special because we only allow one IV.
549
65.7k
  if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
550
65.7k
      
ID.getConstIntStepValue()54.0k
&&
ID.getConstIntStepValue()->isOne()53.6k
&&
551
65.7k
      
isa<Constant>(ID.getStartValue())46.9k
&&
552
65.7k
      
cast<Constant>(ID.getStartValue())->isNullValue()44.8k
) {
553
41.9k
554
41.9k
    // Use the phi node with the widest type as induction. Use the last
555
41.9k
    // one if there are multiple (no good reason for doing this other
556
41.9k
    // than it is expedient). We've checked that it begins at zero and
557
41.9k
    // steps by one, so this is a canonical induction variable.
558
41.9k
    if (!PrimaryInduction || 
PhiTy == WidestIndTy140
)
559
41.8k
      PrimaryInduction = Phi;
560
41.9k
  }
561
65.7k
562
65.7k
  // Both the PHI node itself, and the "post-increment" value feeding
563
65.7k
  // back into the PHI node may have external users.
564
65.7k
  // We can allow those uses, except if the SCEVs we have for them rely
565
65.7k
  // on predicates that only hold within the loop, since allowing the exit
566
65.7k
  // currently means re-using this SCEV outside the loop (see PR33706 for more
567
65.7k
  // details).
568
65.7k
  if (PSE.getUnionPredicate().isAlwaysTrue()) {
569
65.6k
    AllowedExit.insert(Phi);
570
65.6k
    AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
571
65.6k
  }
572
65.7k
573
65.7k
  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
574
65.7k
}
575
576
7
bool LoopVectorizationLegality::setupOuterLoopInductions() {
577
7
  BasicBlock *Header = TheLoop->getHeader();
578
7
579
7
  // Returns true if a given Phi is a supported induction.
580
7
  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
581
7
    InductionDescriptor ID;
582
7
    if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
583
7
        ID.getKind() == InductionDescriptor::IK_IntInduction) {
584
7
      addInductionPhi(&Phi, ID, AllowedExit);
585
7
      return true;
586
7
    } else {
587
0
      // Bail out for any Phi in the outer loop header that is not a supported
588
0
      // induction.
589
0
      LLVM_DEBUG(
590
0
          dbgs()
591
0
          << "LV: Found unsupported PHI for outer loop vectorization.\n");
592
0
      return false;
593
0
    }
594
7
  };
595
7
596
7
  if (llvm::all_of(Header->phis(), isSupportedPhi))
597
7
    return true;
598
0
  else
599
0
    return false;
600
7
}
601
602
92.7k
bool LoopVectorizationLegality::canVectorizeInstrs() {
603
92.7k
  BasicBlock *Header = TheLoop->getHeader();
604
92.7k
605
92.7k
  // Look for the attribute signaling the absence of NaNs.
606
92.7k
  Function &F = *Header->getParent();
607
92.7k
  HasFunNoNaNAttr =
608
92.7k
      F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
609
92.7k
610
92.7k
  // For each block in the loop.
611
98.7k
  for (BasicBlock *BB : TheLoop->blocks()) {
612
98.7k
    // Scan the instructions in the block and look for hazards.
613
582k
    for (Instruction &I : *BB) {
614
582k
      if (auto *Phi = dyn_cast<PHINode>(&I)) {
615
108k
        Type *PhiTy = Phi->getType();
616
108k
        // Check that this PHI type is allowed.
617
108k
        if (!PhiTy->isIntegerTy() && 
!PhiTy->isFloatingPointTy()33.7k
&&
618
108k
            
!PhiTy->isPointerTy()28.2k
) {
619
4
          reportVectorizationFailure("Found a non-int non-pointer PHI",
620
4
                                     "loop control flow is not understood by vectorizer",
621
4
                                     "CFGNotUnderstood");
622
4
          return false;
623
4
        }
624
108k
625
108k
        // If this PHINode is not in the header block, then we know that we
626
108k
        // can convert it to select during if-conversion. No need to check if
627
108k
        // the PHIs in this block are induction or reduction variables.
628
108k
        if (BB != Header) {
629
1.64k
          // Non-header phi nodes that have outside uses can be vectorized. Add
630
1.64k
          // them to the list of allowed exits.
631
1.64k
          // Unsafe cyclic dependencies with header phis are identified during
632
1.64k
          // legalization for reduction, induction and first order
633
1.64k
          // recurrences.
634
1.64k
          continue;
635
1.64k
        }
636
106k
637
106k
        // We only allow if-converted PHIs with exactly two incoming values.
638
106k
        if (Phi->getNumIncomingValues() != 2) {
639
0
          reportVectorizationFailure("Found an invalid PHI",
640
0
              "loop control flow is not understood by vectorizer",
641
0
              "CFGNotUnderstood", Phi);
642
0
          return false;
643
0
        }
644
106k
645
106k
        RecurrenceDescriptor RedDes;
646
106k
        if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
647
106k
                                                 DT)) {
648
8.23k
          if (RedDes.hasUnsafeAlgebra())
649
1.77k
            Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
650
8.23k
          AllowedExit.insert(RedDes.getLoopExitInstr());
651
8.23k
          Reductions[Phi] = RedDes;
652
8.23k
          continue;
653
8.23k
        }
654
98.5k
655
98.5k
        // TODO: Instead of recording the AllowedExit, it would be good to record the
656
98.5k
        // complementary set: NotAllowedExit. These include (but may not be
657
98.5k
        // limited to):
658
98.5k
        // 1. Reduction phis as they represent the one-before-last value, which
659
98.5k
        // is not available when vectorized 
660
98.5k
        // 2. Induction phis and increment when SCEV predicates cannot be used
661
98.5k
        // outside the loop - see addInductionPhi
662
98.5k
        // 3. Non-Phis with outside uses when SCEV predicates cannot be used
663
98.5k
        // outside the loop - see call to hasOutsideLoopUser in the non-phi
664
98.5k
        // handling below
665
98.5k
        // 4. FirstOrderRecurrence phis that can possibly be handled by
666
98.5k
        // extraction.
667
98.5k
        // By recording these, we can then reason about ways to vectorize each
668
98.5k
        // of these NotAllowedExit. 
669
98.5k
        InductionDescriptor ID;
670
98.5k
        if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
671
65.6k
          addInductionPhi(Phi, ID, AllowedExit);
672
65.6k
          if (ID.hasUnsafeAlgebra() && 
!HasFunNoNaNAttr44
)
673
43
            Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
674
65.6k
          continue;
675
65.6k
        }
676
32.9k
677
32.9k
        if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
678
32.9k
                                                         SinkAfter, DT)) {
679
277
          FirstOrderRecurrences.insert(Phi);
680
277
          continue;
681
277
        }
682
32.6k
683
32.6k
        // As a last resort, coerce the PHI to a AddRec expression
684
32.6k
        // and re-try classifying it a an induction PHI.
685
32.6k
        if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
686
96
          addInductionPhi(Phi, ID, AllowedExit);
687
96
          continue;
688
96
        }
689
32.5k
690
32.5k
        reportVectorizationFailure("Found an unidentified PHI",
691
32.5k
            "value that could not be identified as "
692
32.5k
            "reduction is used outside the loop",
693
32.5k
            "NonReductionValueUsedOutsideLoop", Phi);
694
32.5k
        return false;
695
32.5k
      } // end of PHI handling
696
474k
697
474k
      // We handle calls that:
698
474k
      //   * Are debug info intrinsics.
699
474k
      //   * Have a mapping to an IR intrinsic.
700
474k
      //   * Have a vector version available.
701
474k
      auto *CI = dyn_cast<CallInst>(&I);
702
474k
      if (CI && 
!getVectorIntrinsicIDForCall(CI, TLI)25.6k
&&
703
474k
          
!isa<DbgInfoIntrinsic>(CI)24.5k
&&
704
474k
          
!(24.5k
CI->getCalledFunction()24.5k
&&
TLI24.3k
&&
705
24.5k
            
TLI->isFunctionVectorizable(CI->getCalledFunction()->getName())24.3k
)) {
706
24.4k
        // If the call is a recognized math libary call, it is likely that
707
24.4k
        // we can vectorize it given loosened floating-point constraints.
708
24.4k
        LibFunc Func;
709
24.4k
        bool IsMathLibCall =
710
24.4k
            TLI && CI->getCalledFunction() &&
711
24.4k
            
CI->getType()->isFloatingPointTy()24.2k
&&
712
24.4k
            
TLI->getLibFunc(CI->getCalledFunction()->getName(), Func)70
&&
713
24.4k
            
TLI->hasOptimizedCodeGen(Func)10
;
714
24.4k
715
24.4k
        if (IsMathLibCall) {
716
4
          // TODO: Ideally, we should not use clang-specific language here,
717
4
          // but it's hard to provide meaningful yet generic advice.
718
4
          // Also, should this be guarded by allowExtraAnalysis() and/or be part
719
4
          // of the returned info from isFunctionVectorizable()?
720
4
          reportVectorizationFailure("Found a non-intrinsic callsite",
721
4
              "library call cannot be vectorized. "
722
4
              "Try compiling with -fno-math-errno, -ffast-math, "
723
4
              "or similar flags",
724
4
              "CantVectorizeLibcall", CI);
725
24.4k
        } else {
726
24.4k
          reportVectorizationFailure("Found a non-intrinsic callsite",
727
24.4k
                                     "call instruction cannot be vectorized",
728
24.4k
                                     "CantVectorizeLibcall", CI);
729
24.4k
        }
730
24.4k
        return false;
731
24.4k
      }
732
449k
733
449k
      // Some intrinsics have scalar arguments and should be same in order for
734
449k
      // them to be vectorized (i.e. loop invariant).
735
449k
      if (CI) {
736
1.17k
        auto *SE = PSE.getSE();
737
1.17k
        Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
738
2.96k
        for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; 
++i1.78k
)
739
1.78k
          if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
740
5
            if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
741
1
              reportVectorizationFailure("Found unvectorizable intrinsic",
742
1
                  "intrinsic instruction cannot be vectorized",
743
1
                  "CantVectorizeIntrinsic", CI);
744
1
              return false;
745
1
            }
746
5
          }
747
1.17k
      }
748
449k
749
449k
      // Check that the instruction return type is vectorizable.
750
449k
      // Also, we can't vectorize extractelement instructions.
751
449k
      
if (449k
(449k
!VectorType::isValidElementType(I.getType())449k
&&
752
449k
           
!I.getType()->isVoidTy()81.2k
) ||
753
449k
          
isa<ExtractElementInst>(I)448k
) {
754
1.22k
        reportVectorizationFailure("Found unvectorizable type",
755
1.22k
            "instruction return type cannot be vectorized",
756
1.22k
            "CantVectorizeInstructionReturnType", &I);
757
1.22k
        return false;
758
1.22k
      }
759
448k
760
448k
      // Check that the stored type is vectorizable.
761
448k
      if (auto *ST = dyn_cast<StoreInst>(&I)) {
762
37.6k
        Type *T = ST->getValueOperand()->getType();
763
37.6k
        if (!VectorType::isValidElementType(T)) {
764
27
          reportVectorizationFailure("Store instruction cannot be vectorized",
765
27
                                     "store instruction cannot be vectorized",
766
27
                                     "CantVectorizeStore", ST);
767
27
          return false;
768
27
        }
769
37.6k
770
37.6k
        // For nontemporal stores, check that a nontemporal vector version is
771
37.6k
        // supported on the target.
772
37.6k
        if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
773
1
          // Arbitrarily try a vector of 2 elements.
774
1
          Type *VecTy = VectorType::get(T, /*NumElements=*/2);
775
1
          assert(VecTy && "did not find vectorized version of stored type");
776
1
          unsigned Alignment = getLoadStoreAlignment(ST);
777
1
          if (!TTI->isLegalNTStore(VecTy, Alignment)) {
778
1
            reportVectorizationFailure(
779
1
                "nontemporal store instruction cannot be vectorized",
780
1
                "nontemporal store instruction cannot be vectorized",
781
1
                "CantVectorizeNontemporalStore", ST);
782
1
            return false;
783
1
          }
784
410k
        }
785
410k
786
410k
      } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
787
72.3k
        if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
788
2
          // For nontemporal loads, check that a nontemporal vector version is
789
2
          // supported on the target (arbitrarily try a vector of 2 elements).
790
2
          Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
791
2
          assert(VecTy && "did not find vectorized version of load type");
792
2
          unsigned Alignment = getLoadStoreAlignment(LD);
793
2
          if (!TTI->isLegalNTLoad(VecTy, Alignment)) {
794
2
            reportVectorizationFailure(
795
2
                "nontemporal load instruction cannot be vectorized",
796
2
                "nontemporal load instruction cannot be vectorized",
797
2
                "CantVectorizeNontemporalLoad", LD);
798
2
            return false;
799
2
          }
800
338k
        }
801
338k
802
338k
        // FP instructions can allow unsafe algebra, thus vectorizable by
803
338k
        // non-IEEE-754 compliant SIMD units.
804
338k
        // This applies to floating-point math operations and calls, not memory
805
338k
        // operations, shuffles, or casts, as they don't change precision or
806
338k
        // semantics.
807
338k
      } else if (I.getType()->isFloatingPointTy() && 
(47.8k
CI47.8k
||
I.isBinaryOp()47.2k
) &&
808
338k
                 
!I.isFast()35.8k
) {
809
35.5k
        LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
810
35.5k
        Hints->setPotentiallyUnsafe();
811
35.5k
      }
812
448k
813
448k
      // Reduction instructions are allowed to have exit users.
814
448k
      // All other instructions must not have external users.
815
448k
      
if (448k
hasOutsideLoopUser(TheLoop, &I, AllowedExit)448k
) {
816
666
        // We can safely vectorize loops where instructions within the loop are
817
666
        // used outside the loop only if the SCEV predicates within the loop is
818
666
        // same as outside the loop. Allowing the exit means reusing the SCEV
819
666
        // outside the loop.
820
666
        if (PSE.getUnionPredicate().isAlwaysTrue()) {
821
663
          AllowedExit.insert(&I);
822
663
          continue;
823
663
        }
824
3
        reportVectorizationFailure("Value cannot be used outside the loop",
825
3
                                   "value cannot be used outside the loop",
826
3
                                   "ValueUsedOutsideLoop", &I);
827
3
        return false;
828
3
      }
829
448k
    } // next instr.
830
98.7k
  }
831
92.7k
832
92.7k
  
if (34.4k
!PrimaryInduction34.4k
) {
833
17.2k
    if (Inductions.empty()) {
834
7.00k
      reportVectorizationFailure("Did not find one integer induction var",
835
7.00k
          "loop induction variable could not be identified",
836
7.00k
          "NoInductionVariable");
837
7.00k
      return false;
838
10.2k
    } else if (!WidestIndTy) {
839
16
      reportVectorizationFailure("Did not find one integer induction var",
840
16
          "integer loop induction variable could not be identified",
841
16
          "NoIntegerInductionVariable");
842
16
      return false;
843
10.2k
    } else {
844
10.2k
      LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
845
10.2k
    }
846
17.2k
  }
847
34.4k
848
34.4k
  // Now we know the widest induction type, check if our found induction
849
34.4k
  // is the same size. If it's not, unset it here and InnerLoopVectorizer
850
34.4k
  // will create another.
851
34.4k
  
if (27.3k
PrimaryInduction27.3k
&&
WidestIndTy != PrimaryInduction->getType()17.1k
)
852
320
    PrimaryInduction = nullptr;
853
27.3k
854
27.3k
  return true;
855
34.4k
}
856
857
27.4k
bool LoopVectorizationLegality::canVectorizeMemory() {
858
27.4k
  LAI = &(*GetLAA)(*TheLoop);
859
27.4k
  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
860
27.4k
  if (LAR) {
861
7.43k
    ORE->emit([&]() {
862
35
      return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
863
35
                                        "loop not vectorized: ", *LAR);
864
35
    });
865
7.43k
  }
866
27.4k
  if (!LAI->canVectorizeMemory())
867
7.43k
    return false;
868
19.9k
869
19.9k
  if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
870
4
    reportVectorizationFailure("Stores to a uniform address",
871
4
        "write to a loop invariant address could not be vectorized",
872
4
        "CantVectorizeStoreToLoopInvariantAddress");
873
4
    return false;
874
4
  }
875
19.9k
  Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
876
19.9k
  PSE.addPredicate(LAI->getPSE().getUnionPredicate());
877
19.9k
878
19.9k
  return true;
879
19.9k
}
880
881
50.1k
bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
882
50.1k
  Value *In0 = const_cast<Value *>(V);
883
50.1k
  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
884
50.1k
  if (!PN)
885
30.5k
    return false;
886
19.5k
887
19.5k
  return Inductions.count(PN);
888
19.5k
}
889
890
6.34k
bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
891
6.34k
  auto *Inst = dyn_cast<Instruction>(V);
892
6.34k
  return (Inst && InductionCastsToIgnore.count(Inst));
893
6.34k
}
894
895
12.3k
bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
896
12.3k
  return isInductionPhi(V) || 
isCastedInductionVariable(V)6.34k
;
897
12.3k
}
898
899
86.7k
bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
900
86.7k
  return FirstOrderRecurrences.count(Phi);
901
86.7k
}
902
903
987k
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
904
987k
  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
905
987k
}
906
907
bool LoopVectorizationLegality::blockCanBePredicated(
908
25.5k
    BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
909
25.5k
  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
910
25.5k
911
140k
  for (Instruction &I : *BB) {
912
140k
    // Check that we don't have a constant expression that can trap as operand.
913
272k
    for (Value *Operand : I.operands()) {
914
272k
      if (auto *C = dyn_cast<Constant>(Operand))
915
79.7k
        if (C->canTrap())
916
4
          return false;
917
272k
    }
918
140k
    // We might be able to hoist the load.
919
140k
    
if (140k
I.mayReadFromMemory()140k
) {
920
36.1k
      auto *LI = dyn_cast<LoadInst>(&I);
921
36.1k
      if (!LI)
922
8.84k
        return false;
923
27.3k
      if (!SafePtrs.count(LI->getPointerOperand())) {
924
26.5k
        // !llvm.mem.parallel_loop_access implies if-conversion safety.
925
26.5k
        // Otherwise, record that the load needs (real or emulated) masking
926
26.5k
        // and let the cost model decide.
927
26.5k
        if (!IsAnnotatedParallel)
928
26.5k
          MaskedOp.insert(LI);
929
26.5k
        continue;
930
26.5k
      }
931
105k
    }
932
105k
933
105k
    if (I.mayWriteToMemory()) {
934
12.4k
      auto *SI = dyn_cast<StoreInst>(&I);
935
12.4k
      if (!SI)
936
4
        return false;
937
12.4k
      // Predicated store requires some form of masking:
938
12.4k
      // 1) masked store HW instruction,
939
12.4k
      // 2) emulation via load-blend-store (only if safe and legal to do so,
940
12.4k
      //    be aware on the race conditions), or
941
12.4k
      // 3) element-by-element predicate check and scalar store.
942
12.4k
      MaskedOp.insert(SI);
943
12.4k
      continue;
944
12.4k
    }
945
92.8k
    if (I.mayThrow())
946
0
      return false;
947
92.8k
  }
948
25.5k
949
25.5k
  
return true16.6k
;
950
25.5k
}
951
952
12.0k
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
953
12.0k
  if (!EnableIfConversion) {
954
0
    reportVectorizationFailure("If-conversion is disabled",
955
0
                               "if-conversion is disabled",
956
0
                               "IfConversionDisabled");
957
0
    return false;
958
0
  }
959
12.0k
960
12.0k
  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
961
12.0k
962
12.0k
  // A list of pointers that we can safely read and write to.
963
12.0k
  SmallPtrSet<Value *, 8> SafePointes;
964
12.0k
965
12.0k
  // Collect safe addresses.
966
102k
  for (BasicBlock *BB : TheLoop->blocks()) {
967
102k
    if (blockNeedsPredication(BB))
968
74.0k
      continue;
969
28.5k
970
28.5k
    for (Instruction &I : *BB)
971
191k
      if (auto *Ptr = getLoadStorePointerOperand(&I))
972
37.4k
        SafePointes.insert(Ptr);
973
28.5k
  }
974
12.0k
975
12.0k
  // Collect the blocks that need predication.
976
12.0k
  BasicBlock *Header = TheLoop->getHeader();
977
42.0k
  for (BasicBlock *BB : TheLoop->blocks()) {
978
42.0k
    // We don't support switch statements inside loops.
979
42.0k
    if (!isa<BranchInst>(BB->getTerminator())) {
980
273
      reportVectorizationFailure("Loop contains a switch statement",
981
273
                                 "loop contains a switch statement",
982
273
                                 "LoopContainsSwitch", BB->getTerminator());
983
273
      return false;
984
273
    }
985
41.7k
986
41.7k
    // We must be able to predicate all blocks that need to be predicated.
987
41.7k
    if (blockNeedsPredication(BB)) {
988
25.4k
      if (!blockCanBePredicated(BB, SafePointes)) {
989
8.85k
        reportVectorizationFailure(
990
8.85k
            "Control flow cannot be substituted for a select",
991
8.85k
            "control flow cannot be substituted for a select",
992
8.85k
            "NoCFGForSelect", BB->getTerminator());
993
8.85k
        return false;
994
8.85k
      }
995
16.2k
    } else if (BB != Header && 
!canIfConvertPHINodes(BB)4.37k
) {
996
0
      reportVectorizationFailure(
997
0
          "Control flow cannot be substituted for a select",
998
0
          "control flow cannot be substituted for a select",
999
0
          "NoCFGForSelect", BB->getTerminator());
1000
0
      return false;
1001
0
    }
1002
41.7k
  }
1003
12.0k
1004
12.0k
  // We can if-convert this loop.
1005
12.0k
  
return true2.94k
;
1006
12.0k
}
1007
1008
// Helper function to canVectorizeLoopNestCFG.
1009
bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1010
141k
                                                    bool UseVPlanNativePath) {
1011
141k
  assert((UseVPlanNativePath || Lp->empty()) &&
1012
141k
         "VPlan-native path is not enabled.");
1013
141k
1014
141k
  // TODO: ORE should be improved to show more accurate information when an
1015
141k
  // outer loop can't be vectorized because a nested loop is not understood or
1016
141k
  // legal. Something like: "outer_loop_location: loop not vectorized:
1017
141k
  // (inner_loop_location) loop control flow is not understood by vectorizer".
1018
141k
1019
141k
  // Store the result and return it at the end instead of exiting early, in case
1020
141k
  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1021
141k
  bool Result = true;
1022
141k
  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1023
141k
1024
141k
  // We must have a loop in canonical form. Loops with indirectbr in them cannot
1025
141k
  // be canonicalized.
1026
141k
  if (!Lp->getLoopPreheader()) {
1027
1
    reportVectorizationFailure("Loop doesn't have a legal pre-header",
1028
1
        "loop control flow is not understood by vectorizer",
1029
1
        "CFGNotUnderstood");
1030
1
    if (DoExtraAnalysis)
1031
0
      Result = false;
1032
1
    else
1033
1
      return false;
1034
141k
  }
1035
141k
1036
141k
  // We must have a single backedge.
1037
141k
  if (Lp->getNumBackEdges() != 1) {
1038
0
    reportVectorizationFailure("The loop must have a single backedge",
1039
0
        "loop control flow is not understood by vectorizer",
1040
0
        "CFGNotUnderstood");
1041
0
    if (DoExtraAnalysis)
1042
0
      Result = false;
1043
0
    else
1044
0
      return false;
1045
141k
  }
1046
141k
1047
141k
  // We must have a single exiting block.
1048
141k
  if (!Lp->getExitingBlock()) {
1049
35.0k
    reportVectorizationFailure("The loop must have an exiting block",
1050
35.0k
        "loop control flow is not understood by vectorizer",
1051
35.0k
        "CFGNotUnderstood");
1052
35.0k
    if (DoExtraAnalysis)
1053
1
      Result = false;
1054
35.0k
    else
1055
35.0k
      return false;
1056
106k
  }
1057
106k
1058
106k
  // We only handle bottom-tested loops, i.e. loop in which the condition is
1059
106k
  // checked at the end of each iteration. With that we can assume that all
1060
106k
  // instructions in the loop are executed the same number of times.
1061
106k
  if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1062
4.38k
    reportVectorizationFailure("The exiting block is not the loop latch",
1063
4.38k
        "loop control flow is not understood by vectorizer",
1064
4.38k
        "CFGNotUnderstood");
1065
4.38k
    if (DoExtraAnalysis)
1066
1
      Result = false;
1067
4.38k
    else
1068
4.38k
      return false;
1069
101k
  }
1070
101k
1071
101k
  return Result;
1072
101k
}
1073
1074
bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1075
141k
    Loop *Lp, bool UseVPlanNativePath) {
1076
141k
  // Store the result and return it at the end instead of exiting early, in case
1077
141k
  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1078
141k
  bool Result = true;
1079
141k
  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1080
141k
  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1081
39.3k
    if (DoExtraAnalysis)
1082
1
      Result = false;
1083
39.3k
    else
1084
39.3k
      return false;
1085
101k
  }
1086
101k
1087
101k
  // Recursively check whether the loop control flow of nested loops is
1088
101k
  // understood.
1089
101k
  for (Loop *SubLp : *Lp)
1090
7
    if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1091
0
      if (DoExtraAnalysis)
1092
0
        Result = false;
1093
0
      else
1094
0
        return false;
1095
0
    }
1096
101k
1097
101k
  return Result;
1098
101k
}
1099
1100
141k
bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1101
141k
  // Store the result and return it at the end instead of exiting early, in case
1102
141k
  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1103
141k
  bool Result = true;
1104
141k
1105
141k
  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1106
141k
  // Check whether the loop-related control flow in the loop nest is expected by
1107
141k
  // vectorizer.
1108
141k
  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1109
39.3k
    if (DoExtraAnalysis)
1110
1
      Result = false;
1111
39.3k
    else
1112
39.3k
      return false;
1113
101k
  }
1114
101k
1115
101k
  // We need to have a loop header.
1116
101k
  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1117
101k
                    << '\n');
1118
101k
1119
101k
  // Specific checks for outer loops. We skip the remaining legal checks at this
1120
101k
  // point because they don't support outer loops.
1121
101k
  if (!TheLoop->empty()) {
1122
7
    assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1123
7
1124
7
    if (!canVectorizeOuterLoop()) {
1125
0
      reportVectorizationFailure("Unsupported outer loop",
1126
0
                                 "unsupported outer loop",
1127
0
                                 "UnsupportedOuterLoop");
1128
0
      // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1129
0
      // outer loops.
1130
0
      return false;
1131
0
    }
1132
7
1133
7
    LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1134
7
    return Result;
1135
7
  }
1136
101k
1137
101k
  assert(TheLoop->empty() && "Inner loop expected.");
1138
101k
  // Check if we can if-convert non-single-bb loops.
1139
101k
  unsigned NumBlocks = TheLoop->getNumBlocks();
1140
101k
  if (NumBlocks != 1 && 
!canVectorizeWithIfConvert()12.0k
) {
1141
9.13k
    LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1142
9.13k
    if (DoExtraAnalysis)
1143
5
      Result = false;
1144
9.12k
    else
1145
9.12k
      return false;
1146
92.7k
  }
1147
92.7k
1148
92.7k
  // Check if we can vectorize the instructions and CFG in this loop.
1149
92.7k
  if (!canVectorizeInstrs()) {
1150
65.3k
    LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1151
65.3k
    if (DoExtraAnalysis)
1152
9
      Result = false;
1153
65.3k
    else
1154
65.3k
      return false;
1155
27.4k
  }
1156
27.4k
1157
27.4k
  // Go over each instruction and look at memory deps.
1158
27.4k
  if (!canVectorizeMemory()) {
1159
7.43k
    LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1160
7.43k
    if (DoExtraAnalysis)
1161
35
      Result = false;
1162
7.40k
    else
1163
7.40k
      return false;
1164
20.0k
  }
1165
20.0k
1166
20.0k
  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1167
20.0k
                    << (LAI->getRuntimePointerChecking()->Need
1168
20.0k
                            ? " (with a runtime bound check)"
1169
20.0k
                            : "")
1170
20.0k
                    << "!\n");
1171
20.0k
1172
20.0k
  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1173
20.0k
  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1174
31
    SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1175
20.0k
1176
20.0k
  if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1177
5
    reportVectorizationFailure("Too many SCEV checks needed",
1178
5
        "Too many SCEV assumptions need to be made and checked at runtime",
1179
5
        "TooManySCEVRunTimeChecks");
1180
5
    if (DoExtraAnalysis)
1181
0
      Result = false;
1182
5
    else
1183
5
      return false;
1184
19.9k
  }
1185
19.9k
1186
19.9k
  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1187
19.9k
  // we can vectorize, and at this point we don't have any other mem analysis
1188
19.9k
  // which may limit our maximum vectorization factor, so just return true with
1189
19.9k
  // no restrictions.
1190
19.9k
  return Result;
1191
19.9k
}
1192
1193
75
bool LoopVectorizationLegality::canFoldTailByMasking() {
1194
75
1195
75
  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1196
75
1197
75
  if (!PrimaryInduction) {
1198
33
    reportVectorizationFailure(
1199
33
        "No primary induction, cannot fold tail by masking",
1200
33
        "Missing a primary induction variable in the loop, which is "
1201
33
        "needed in order to fold tail by masking as required.",
1202
33
        "NoPrimaryInduction");
1203
33
    return false;
1204
33
  }
1205
42
1206
42
  // TODO: handle reductions when tail is folded by masking.
1207
42
  if (!Reductions.empty()) {
1208
15
    reportVectorizationFailure(
1209
15
        "Loop has reductions, cannot fold tail by masking",
1210
15
        "Cannot fold tail by masking in the presence of reductions.",
1211
15
        "ReductionFoldingTailByMasking");
1212
15
    return false;
1213
15
  }
1214
27
1215
27
  // TODO: handle outside users when tail is folded by masking.
1216
60
  
for (auto *AE : AllowedExit)27
{
1217
60
    // Check that all users of allowed exit values are inside the loop.
1218
182
    for (User *U : AE->users()) {
1219
182
      Instruction *UI = cast<Instruction>(U);
1220
182
      if (TheLoop->contains(UI))
1221
181
        continue;
1222
1
      reportVectorizationFailure(
1223
1
          "Cannot fold tail by masking, loop has an outside user for",
1224
1
          "Cannot fold tail by masking in the presence of live outs.",
1225
1
          "LiveOutFoldingTailByMasking", UI);
1226
1
      return false;
1227
1
    }
1228
60
  }
1229
27
1230
27
  // The list of pointers that we can safely read and write to remains empty.
1231
27
  SmallPtrSet<Value *, 8> SafePointers;
1232
26
1233
26
  // Check and mark all blocks for predication, including those that ordinarily
1234
26
  // do not need predication such as the header block.
1235
44
  for (BasicBlock *BB : TheLoop->blocks()) {
1236
44
    if (!blockCanBePredicated(BB, SafePointers)) {
1237
0
      reportVectorizationFailure(
1238
0
          "Cannot fold tail by masking as required",
1239
0
          "control flow cannot be substituted for a select",
1240
0
          "NoCFGForSelect", BB->getTerminator());
1241
0
      return false;
1242
0
    }
1243
44
  }
1244
26
1245
26
  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1246
26
  return true;
1247
26
}
1248
1249
} // namespace llvm