Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
Line
Count
Source (jump to first uncovered line)
1
//===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This file defines the LoopVectorizationLegality class. Original code
11
/// in Loop Vectorizer has been moved out to its own file for modularity
12
/// and reusability.
13
///
14
/// Currently, it works for innermost loop vectorization. Extending this to
15
/// outer loop vectorization is a TODO item.
16
///
17
/// Also provides:
18
/// 1) LoopVectorizeHints class which keeps a number of loop annotations
19
/// locally for easy look up. It has the ability to write them back as
20
/// loop metadata, upon request.
21
/// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22
/// of reporting useful failure to vectorize message.
23
//
24
//===----------------------------------------------------------------------===//
25
26
#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27
#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28
29
#include "llvm/ADT/MapVector.h"
30
#include "llvm/Analysis/LoopAccessAnalysis.h"
31
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
32
#include "llvm/Transforms/Utils/LoopUtils.h"
33
34
namespace llvm {
35
36
/// Create an analysis remark that explains why vectorization failed
37
///
38
/// \p PassName is the name of the pass (e.g. can be AlwaysPrint).  \p
39
/// RemarkName is the identifier for the remark.  If \p I is passed it is an
40
/// instruction that prevents vectorization.  Otherwise \p TheLoop is used for
41
/// the location of the remark.  \return the remark object that can be
42
/// streamed to.
43
OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
44
                                                  StringRef RemarkName,
45
                                                  Loop *TheLoop,
46
                                                  Instruction *I = nullptr);
47
48
/// Utility class for getting and setting loop vectorizer hints in the form
49
/// of loop metadata.
50
/// This class keeps a number of loop annotations locally (as member variables)
51
/// and can, upon request, write them back as metadata on the loop. It will
52
/// initially scan the loop for existing metadata, and will update the local
53
/// values based on information in the loop.
54
/// We cannot write all values to metadata, as the mere presence of some info,
55
/// for example 'force', means a decision has been made. So, we need to be
56
/// careful NOT to add them if the user hasn't specifically asked so.
57
class LoopVectorizeHints {
58
  enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
59
60
  /// Hint - associates name and validation with the hint value.
61
  struct Hint {
62
    const char *Name;
63
    unsigned Value; // This may have to change for non-numeric values.
64
    HintKind Kind;
65
66
    Hint(const char *Name, unsigned Value, HintKind Kind)
67
655k
        : Name(Name), Value(Value), Kind(Kind) {}
68
69
    bool validate(unsigned Val);
70
  };
71
72
  /// Vectorization width.
73
  Hint Width;
74
75
  /// Vectorization interleave factor.
76
  Hint Interleave;
77
78
  /// Vectorization forced
79
  Hint Force;
80
81
  /// Already Vectorized
82
  Hint IsVectorized;
83
84
  /// Return the loop metadata prefix.
85
90.8k
  static StringRef Prefix() { return "llvm.loop."; }
86
87
  /// True if there is any unsafe math in the loop.
88
  bool PotentiallyUnsafe = false;
89
90
public:
91
  enum ForceKind {
92
    FK_Undefined = -1, ///< Not selected.
93
    FK_Disabled = 0,   ///< Forcing disabled.
94
    FK_Enabled = 1,    ///< Forcing enabled.
95
  };
96
97
  LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
98
                     OptimizationRemarkEmitter &ORE);
99
100
  /// Mark the loop L as already vectorized by setting the width to 1.
101
  void setAlreadyVectorized();
102
103
  bool allowVectorization(Function *F, Loop *L,
104
                          bool VectorizeOnlyWhenForced) const;
105
106
  /// Dumps all the hint information.
107
  void emitRemarkWithHints() const;
108
109
326k
  unsigned getWidth() const { return Width.Value; }
110
19.9k
  unsigned getInterleave() const { return Interleave.Value; }
111
146k
  unsigned getIsVectorized() const { return IsVectorized.Value; }
112
532k
  enum ForceKind getForce() const {
113
532k
    if ((ForceKind)Force.Value == FK_Undefined &&
114
532k
        
hasDisableAllTransformsHint(TheLoop)532k
)
115
1
      return FK_Disabled;
116
532k
    return (ForceKind)Force.Value;
117
532k
  }
118
119
  /// If hints are provided that force vectorization, use the AlwaysPrint
120
  /// pass name to force the frontend to print the diagnostic.
121
  const char *vectorizeAnalysisPassName() const;
122
123
1.50k
  bool allowReordering() const {
124
1.50k
    // When enabling loop hints are provided we allow the vectorizer to change
125
1.50k
    // the order of operations that is given by the scalar loop. This is not
126
1.50k
    // enabled by default because can be unsafe or inefficient. For example,
127
1.50k
    // reordering floating-point operations will change the way round-off
128
1.50k
    // error accumulates in the loop.
129
1.50k
    return getForce() == LoopVectorizeHints::FK_Enabled || 
getWidth() > 11.49k
;
130
1.50k
  }
131
132
19.9k
  bool isPotentiallyUnsafe() const {
133
19.9k
    // Avoid FP vectorization if the target is unsure about proper support.
134
19.9k
    // This may be related to the SIMD unit in the target not handling
135
19.9k
    // IEEE 754 FP ops properly, or bad single-to-double promotions.
136
19.9k
    // Otherwise, a sequence of vectorized loops, even without reduction,
137
19.9k
    // could lead to different end results on the destination vectors.
138
19.9k
    return getForce() != LoopVectorizeHints::FK_Enabled && 
PotentiallyUnsafe19.9k
;
139
19.9k
  }
140
141
35.5k
  void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
142
143
private:
144
  /// Find hints specified in the loop metadata and update local values.
145
  void getHintsFromMetadata();
146
147
  /// Checks string hint with one operand and set value if valid.
148
  void setHint(StringRef Name, Metadata *Arg);
149
150
  /// The loop these hints belong to.
151
  const Loop *TheLoop;
152
153
  /// Interface to emit optimization remarks.
154
  OptimizationRemarkEmitter &ORE;
155
};
156
157
/// This holds vectorization requirements that must be verified late in
158
/// the process. The requirements are set by legalize and costmodel. Once
159
/// vectorization has been determined to be possible and profitable the
160
/// requirements can be verified by looking for metadata or compiler options.
161
/// For example, some loops require FP commutativity which is only allowed if
162
/// vectorization is explicitly specified or if the fast-math compiler option
163
/// has been provided.
164
/// Late evaluation of these requirements allows helpful diagnostics to be
165
/// composed that tells the user what need to be done to vectorize the loop. For
166
/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
167
/// evaluation should be used only when diagnostics can generated that can be
168
/// followed by a non-expert user.
169
class LoopVectorizationRequirements {
170
public:
171
141k
  LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
172
173
1.81k
  void addUnsafeAlgebraInst(Instruction *I) {
174
1.81k
    // First unsafe algebra instruction.
175
1.81k
    if (!UnsafeAlgebraInst)
176
1.42k
      UnsafeAlgebraInst = I;
177
1.81k
  }
178
179
19.9k
  void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
180
181
  bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
182
183
private:
184
  unsigned NumRuntimePointerChecks = 0;
185
  Instruction *UnsafeAlgebraInst = nullptr;
186
187
  /// Interface to emit optimization remarks.
188
  OptimizationRemarkEmitter &ORE;
189
};
190
191
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
192
/// to what vectorization factor.
193
/// This class does not look at the profitability of vectorization, only the
194
/// legality. This class has two main kinds of checks:
195
/// * Memory checks - The code in canVectorizeMemory checks if vectorization
196
///   will change the order of memory accesses in a way that will change the
197
///   correctness of the program.
198
/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
199
/// checks for a number of different conditions, such as the availability of a
200
/// single induction variable, that all types are supported and vectorize-able,
201
/// etc. This code reflects the capabilities of InnerLoopVectorizer.
202
/// This class is also used by InnerLoopVectorizer for identifying
203
/// induction variable and the different reduction variables.
204
class LoopVectorizationLegality {
205
public:
206
  LoopVectorizationLegality(
207
      Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
208
      TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA,
209
      Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
210
      LoopInfo *LI, OptimizationRemarkEmitter *ORE,
211
      LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
212
      AssumptionCache *AC)
213
      : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
214
141k
        GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
215
216
  /// ReductionList contains the reduction descriptors for all
217
  /// of the reductions that were found in the loop.
218
  using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>;
219
220
  /// InductionList saves induction variables and maps them to the
221
  /// induction descriptor.
222
  using InductionList = MapVector<PHINode *, InductionDescriptor>;
223
224
  /// RecurrenceSet contains the phi nodes that are recurrences other than
225
  /// inductions and reductions.
226
  using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
227
228
  /// Returns true if it is legal to vectorize this loop.
229
  /// This does not mean that it is profitable to vectorize this
230
  /// loop, only that it is legal to do so.
231
  /// Temporarily taking UseVPlanNativePath parameter. If true, take
232
  /// the new code path being implemented for outer loop vectorization
233
  /// (should be functional for inner loop vectorization) based on VPlan.
234
  /// If false, good old LV code.
235
  bool canVectorize(bool UseVPlanNativePath);
236
237
  /// Return true if we can vectorize this loop while folding its tail by
238
  /// masking.
239
  bool canFoldTailByMasking();
240
241
  /// Returns the primary induction variable.
242
71.6k
  PHINode *getPrimaryInduction() { return PrimaryInduction; }
243
244
  /// Returns the reduction variables found in the loop.
245
57.8k
  ReductionList *getReductionVars() { return &Reductions; }
246
247
  /// Returns the induction variables found in the loop.
248
247k
  InductionList *getInductionVars() { return &Inductions; }
249
250
  /// Return the first-order recurrences found in the loop.
251
0
  RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
252
253
  /// Return the set of instructions to sink to handle first-order recurrences.
254
38.5k
  DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
255
256
  /// Returns the widest induction type.
257
34.1k
  Type *getWidestInductionType() { return WidestIndTy; }
258
259
  /// Returns True if V is a Phi node of an induction variable in this loop.
260
  bool isInductionPhi(const Value *V);
261
262
  /// Returns True if V is a cast that is part of an induction def-use chain,
263
  /// and had been proven to be redundant under a runtime guard (in other
264
  /// words, the cast has the same SCEV expression as the induction phi).
265
  bool isCastedInductionVariable(const Value *V);
266
267
  /// Returns True if V can be considered as an induction variable in this
268
  /// loop. V can be the induction phi, or some redundant cast in the def-use
269
  /// chain of the inducion phi.
270
  bool isInductionVariable(const Value *V);
271
272
  /// Returns True if PN is a reduction variable in this loop.
273
54.6k
  bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
274
275
  /// Returns True if Phi is a first-order recurrence in this loop.
276
  bool isFirstOrderRecurrence(const PHINode *Phi);
277
278
  /// Return true if the block BB needs to be predicated in order for the loop
279
  /// to be vectorized.
280
  bool blockNeedsPredication(BasicBlock *BB);
281
282
  /// Check if this pointer is consecutive when vectorizing. This happens
283
  /// when the last index of the GEP is the induction variable, or that the
284
  /// pointer itself is an induction variable.
285
  /// This check allows us to vectorize A[idx] into a wide load/store.
286
  /// Returns:
287
  /// 0 - Stride is unknown or non-consecutive.
288
  /// 1 - Address is consecutive.
289
  /// -1 - Address is consecutive, and decreasing.
290
  /// NOTE: This method must only be used before modifying the original scalar
291
  /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
292
  int isConsecutivePtr(Value *Ptr);
293
294
  /// Returns true if the value V is uniform within the loop.
295
  bool isUniform(Value *V);
296
297
  /// Returns the information that we collected about runtime memory check.
298
23.0k
  const RuntimePointerChecking *getRuntimePointerChecking() const {
299
23.0k
    return LAI->getRuntimePointerChecking();
300
23.0k
  }
301
302
39.6k
  const LoopAccessInfo *getLAI() const { return LAI; }
303
304
38.8k
  unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
305
306
19.9k
  uint64_t getMaxSafeRegisterWidth() const {
307
19.9k
    return LAI->getDepChecker().getMaxSafeRegisterWidth();
308
19.9k
  }
309
310
212k
  bool hasStride(Value *V) { return LAI->hasStride(V); }
311
312
  /// Returns true if vector representation of the instruction \p I
313
  /// requires mask.
314
90.0k
  bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
315
316
15.6k
  unsigned getNumStores() const { return LAI->getNumStores(); }
317
15.6k
  unsigned getNumLoads() const { return LAI->getNumLoads(); }
318
319
  // Returns true if the NoNaN attribute is set on the function.
320
1.05k
  bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
321
322
private:
323
  /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
324
  /// its nested loops are considered legal for vectorization. These legal
325
  /// checks are common for inner and outer loop vectorization.
326
  /// Temporarily taking UseVPlanNativePath parameter. If true, take
327
  /// the new code path being implemented for outer loop vectorization
328
  /// (should be functional for inner loop vectorization) based on VPlan.
329
  /// If false, good old LV code.
330
  bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
331
332
  /// Set up outer loop inductions by checking Phis in outer loop header for
333
  /// supported inductions (int inductions). Return false if any of these Phis
334
  /// is not a supported induction or if we fail to find an induction.
335
  bool setupOuterLoopInductions();
336
337
  /// Return true if the pre-header, exiting and latch blocks of \p Lp
338
  /// (non-recursive) are considered legal for vectorization.
339
  /// Temporarily taking UseVPlanNativePath parameter. If true, take
340
  /// the new code path being implemented for outer loop vectorization
341
  /// (should be functional for inner loop vectorization) based on VPlan.
342
  /// If false, good old LV code.
343
  bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
344
345
  /// Check if a single basic block loop is vectorizable.
346
  /// At this point we know that this is a loop with a constant trip count
347
  /// and we only need to check individual instructions.
348
  bool canVectorizeInstrs();
349
350
  /// When we vectorize loops we may change the order in which
351
  /// we read and write from memory. This method checks if it is
352
  /// legal to vectorize the code, considering only memory constrains.
353
  /// Returns true if the loop is vectorizable
354
  bool canVectorizeMemory();
355
356
  /// Return true if we can vectorize this loop using the IF-conversion
357
  /// transformation.
358
  bool canVectorizeWithIfConvert();
359
360
  /// Return true if we can vectorize this outer loop. The method performs
361
  /// specific checks for outer loop vectorization.
362
  bool canVectorizeOuterLoop();
363
364
  /// Return true if all of the instructions in the block can be speculatively
365
  /// executed. \p SafePtrs is a list of addresses that are known to be legal
366
  /// and we know that we can read from them without segfault.
367
  bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
368
369
  /// Updates the vectorization state by adding \p Phi to the inductions list.
370
  /// This can set \p Phi as the main induction of the loop if \p Phi is a
371
  /// better choice for the main induction than the existing one.
372
  void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
373
                       SmallPtrSetImpl<Value *> &AllowedExit);
374
375
  /// If an access has a symbolic strides, this maps the pointer value to
376
  /// the stride symbol.
377
318k
  const ValueToValueMap *getSymbolicStrides() {
378
318k
    // FIXME: Currently, the set of symbolic strides is sometimes queried before
379
318k
    // it's collected.  This happens from canVectorizeWithIfConvert, when the
380
318k
    // pointer is checked to reference consecutive elements suitable for a
381
318k
    // masked access.
382
318k
    return LAI ? &LAI->getSymbolicStrides() : 
nullptr0
;
383
318k
  }
384
385
  /// Reports a vectorization illegality: print \p DebugMsg for debugging
386
  /// purposes along with the corresponding optimization remark \p RemarkName.
387
  /// If \p I is passed it is an instruction that prevents vectorization.
388
  /// Otherwise the loop is used for the location of the remark.
389
  void reportVectorizationFailure(const StringRef DebugMsg,
390
      const StringRef OREMsg, const StringRef ORETag,
391
      Instruction *I = nullptr) const;
392
393
  /// The loop that we evaluate.
394
  Loop *TheLoop;
395
396
  /// Loop Info analysis.
397
  LoopInfo *LI;
398
399
  /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
400
  /// Applies dynamic knowledge to simplify SCEV expressions in the context
401
  /// of existing SCEV assumptions. The analysis will also add a minimal set
402
  /// of new predicates if this is required to enable vectorization and
403
  /// unrolling.
404
  PredicatedScalarEvolution &PSE;
405
406
  /// Target Transform Info.
407
  TargetTransformInfo *TTI;
408
409
  /// Target Library Info.
410
  TargetLibraryInfo *TLI;
411
412
  /// Dominator Tree.
413
  DominatorTree *DT;
414
415
  // LoopAccess analysis.
416
  std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
417
418
  // And the loop-accesses info corresponding to this loop.  This pointer is
419
  // null until canVectorizeMemory sets it up.
420
  const LoopAccessInfo *LAI = nullptr;
421
422
  /// Interface to emit optimization remarks.
423
  OptimizationRemarkEmitter *ORE;
424
425
  //  ---  vectorization state --- //
426
427
  /// Holds the primary induction variable. This is the counter of the
428
  /// loop.
429
  PHINode *PrimaryInduction = nullptr;
430
431
  /// Holds the reduction variables.
432
  ReductionList Reductions;
433
434
  /// Holds all of the induction variables that we found in the loop.
435
  /// Notice that inductions don't need to start at zero and that induction
436
  /// variables can be pointers.
437
  InductionList Inductions;
438
439
  /// Holds all the casts that participate in the update chain of the induction
440
  /// variables, and that have been proven to be redundant (possibly under a
441
  /// runtime guard). These casts can be ignored when creating the vectorized
442
  /// loop body.
443
  SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
444
445
  /// Holds the phi nodes that are first-order recurrences.
446
  RecurrenceSet FirstOrderRecurrences;
447
448
  /// Holds instructions that need to sink past other instructions to handle
449
  /// first-order recurrences.
450
  DenseMap<Instruction *, Instruction *> SinkAfter;
451
452
  /// Holds the widest induction type encountered.
453
  Type *WidestIndTy = nullptr;
454
455
  /// Allowed outside users. This holds the induction and reduction
456
  /// vars which can be accessed from outside the loop.
457
  SmallPtrSet<Value *, 4> AllowedExit;
458
459
  /// Can we assume the absence of NaNs.
460
  bool HasFunNoNaNAttr = false;
461
462
  /// Vectorization requirements that will go through late-evaluation.
463
  LoopVectorizationRequirements *Requirements;
464
465
  /// Used to emit an analysis of any legality issues.
466
  LoopVectorizeHints *Hints;
467
468
  /// The demanded bits analysis is used to compute the minimum type size in
469
  /// which a reduction can be computed.
470
  DemandedBits *DB;
471
472
  /// The assumption cache analysis is used to compute the minimum type size in
473
  /// which a reduction can be computed.
474
  AssumptionCache *AC;
475
476
  /// While vectorizing these instructions we have to generate a
477
  /// call to the appropriate masked intrinsic
478
  SmallPtrSet<const Instruction *, 8> MaskedOp;
479
};
480
481
} // namespace llvm
482
483
#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H