Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Vectorize/VPlan.h
Line
Count
Source (jump to first uncovered line)
1
//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This file contains the declarations of the Vectorization Plan base classes:
11
/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12
///    VPBlockBase, together implementing a Hierarchical CFG;
13
/// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be
14
///    treated as proper graphs for generic algorithms;
15
/// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained
16
///    within VPBasicBlocks;
17
/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18
///    instruction;
19
/// 5. The VPlan class holding a candidate for vectorization;
20
/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21
/// These are documented in docs/VectorizationPlan.rst.
22
//
23
//===----------------------------------------------------------------------===//
24
25
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28
#include "VPlanLoopInfo.h"
29
#include "VPlanValue.h"
30
#include "llvm/ADT/DenseMap.h"
31
#include "llvm/ADT/DepthFirstIterator.h"
32
#include "llvm/ADT/GraphTraits.h"
33
#include "llvm/ADT/Optional.h"
34
#include "llvm/ADT/SmallPtrSet.h"
35
#include "llvm/ADT/SmallSet.h"
36
#include "llvm/ADT/SmallVector.h"
37
#include "llvm/ADT/Twine.h"
38
#include "llvm/ADT/ilist.h"
39
#include "llvm/ADT/ilist_node.h"
40
#include "llvm/Analysis/VectorUtils.h"
41
#include "llvm/IR/IRBuilder.h"
42
#include <algorithm>
43
#include <cassert>
44
#include <cstddef>
45
#include <map>
46
#include <string>
47
48
namespace llvm {
49
50
class LoopVectorizationLegality;
51
class LoopVectorizationCostModel;
52
class BasicBlock;
53
class DominatorTree;
54
class InnerLoopVectorizer;
55
template <class T> class InterleaveGroup;
56
class LoopInfo;
57
class raw_ostream;
58
class Value;
59
class VPBasicBlock;
60
class VPRegionBlock;
61
class VPlan;
62
class VPlanSlp;
63
64
/// A range of powers-of-2 vectorization factors with fixed start and
65
/// adjustable end. The range includes start and excludes end, e.g.,:
66
/// [1, 9) = {1, 2, 4, 8}
67
struct VFRange {
68
  // A power of 2.
69
  const unsigned Start;
70
71
  // Need not be a power of 2. If End <= Start range is empty.
72
  unsigned End;
73
};
74
75
using VPlanPtr = std::unique_ptr<VPlan>;
76
77
/// In what follows, the term "input IR" refers to code that is fed into the
78
/// vectorizer whereas the term "output IR" refers to code that is generated by
79
/// the vectorizer.
80
81
/// VPIteration represents a single point in the iteration space of the output
82
/// (vectorized and/or unrolled) IR loop.
83
struct VPIteration {
84
  /// in [0..UF)
85
  unsigned Part;
86
87
  /// in [0..VF)
88
  unsigned Lane;
89
};
90
91
/// This is a helper struct for maintaining vectorization state. It's used for
92
/// mapping values from the original loop to their corresponding values in
93
/// the new loop. Two mappings are maintained: one for vectorized values and
94
/// one for scalarized values. Vectorized values are represented with UF
95
/// vector values in the new loop, and scalarized values are represented with
96
/// UF x VF scalar values in the new loop. UF and VF are the unroll and
97
/// vectorization factors, respectively.
98
///
99
/// Entries can be added to either map with setVectorValue and setScalarValue,
100
/// which assert that an entry was not already added before. If an entry is to
101
/// replace an existing one, call resetVectorValue and resetScalarValue. This is
102
/// currently needed to modify the mapped values during "fix-up" operations that
103
/// occur once the first phase of widening is complete. These operations include
104
/// type truncation and the second phase of recurrence widening.
105
///
106
/// Entries from either map can be retrieved using the getVectorValue and
107
/// getScalarValue functions, which assert that the desired value exists.
108
struct VectorizerValueMap {
109
  friend struct VPTransformState;
110
111
private:
112
  /// The unroll factor. Each entry in the vector map contains UF vector values.
113
  unsigned UF;
114
115
  /// The vectorization factor. Each entry in the scalar map contains UF x VF
116
  /// scalar values.
117
  unsigned VF;
118
119
  /// The vector and scalar map storage. We use std::map and not DenseMap
120
  /// because insertions to DenseMap invalidate its iterators.
121
  using VectorParts = SmallVector<Value *, 2>;
122
  using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>;
123
  std::map<Value *, VectorParts> VectorMapStorage;
124
  std::map<Value *, ScalarParts> ScalarMapStorage;
125
126
public:
127
  /// Construct an empty map with the given unroll and vectorization factors.
128
17.0k
  VectorizerValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {}
129
130
  /// \return True if the map has any vector entry for \p Key.
131
198k
  bool hasAnyVectorValue(Value *Key) const {
132
198k
    return VectorMapStorage.count(Key);
133
198k
  }
134
135
  /// \return True if the map has a vector entry for \p Key and \p Part.
136
198k
  bool hasVectorValue(Value *Key, unsigned Part) const {
137
198k
    assert(Part < UF && "Queried Vector Part is too large.");
138
198k
    if (!hasAnyVectorValue(Key))
139
25.7k
      return false;
140
172k
    const VectorParts &Entry = VectorMapStorage.find(Key)->second;
141
172k
    assert(Entry.size() == UF && "VectorParts has wrong dimensions.");
142
172k
    return Entry[Part] != nullptr;
143
172k
  }
144
145
  /// \return True if the map has any scalar entry for \p Key.
146
146k
  bool hasAnyScalarValue(Value *Key) const {
147
146k
    return ScalarMapStorage.count(Key);
148
146k
  }
149
150
  /// \return True if the map has a scalar entry for \p Key and \p Instance.
151
98.1k
  bool hasScalarValue(Value *Key, const VPIteration &Instance) const {
152
98.1k
    assert(Instance.Part < UF && "Queried Scalar Part is too large.");
153
98.1k
    assert(Instance.Lane < VF && "Queried Scalar Lane is too large.");
154
98.1k
    if (!hasAnyScalarValue(Key))
155
5.90k
      return false;
156
92.2k
    const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
157
92.2k
    assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
158
92.2k
    assert(Entry[Instance.Part].size() == VF &&
159
92.2k
           "ScalarParts has wrong dimensions.");
160
92.2k
    return Entry[Instance.Part][Instance.Lane] != nullptr;
161
92.2k
  }
162
163
  /// Retrieve the existing vector value that corresponds to \p Key and
164
  /// \p Part.
165
155k
  Value *getVectorValue(Value *Key, unsigned Part) {
166
155k
    assert(hasVectorValue(Key, Part) && "Getting non-existent value.");
167
155k
    return VectorMapStorage[Key][Part];
168
155k
  }
169
170
  /// Retrieve the existing scalar value that corresponds to \p Key and
171
  /// \p Instance.
172
96.6k
  Value *getScalarValue(Value *Key, const VPIteration &Instance) {
173
96.6k
    assert(hasScalarValue(Key, Instance) && "Getting non-existent value.");
174
96.6k
    return ScalarMapStorage[Key][Instance.Part][Instance.Lane];
175
96.6k
  }
176
177
  /// Set a vector value associated with \p Key and \p Part. Assumes such a
178
  /// value is not already set. If it is, use resetVectorValue() instead.
179
195k
  void setVectorValue(Value *Key, unsigned Part, Value *Vector) {
180
195k
    assert(!hasVectorValue(Key, Part) && "Vector value already set for part");
181
195k
    if (!VectorMapStorage.count(Key)) {
182
104k
      VectorParts Entry(UF);
183
104k
      VectorMapStorage[Key] = Entry;
184
104k
    }
185
195k
    VectorMapStorage[Key][Part] = Vector;
186
195k
  }
187
188
  /// Set a scalar value associated with \p Key and \p Instance. Assumes such a
189
  /// value is not already set.
190
138k
  void setScalarValue(Value *Key, const VPIteration &Instance, Value *Scalar) {
191
138k
    assert(!hasScalarValue(Key, Instance) && "Scalar value already set");
192
138k
    if (!ScalarMapStorage.count(Key)) {
193
55.0k
      ScalarParts Entry(UF);
194
55.0k
      // TODO: Consider storing uniform values only per-part, as they occupy
195
55.0k
      //       lane 0 only, keeping the other VF-1 redundant entries null.
196
159k
      for (unsigned Part = 0; Part < UF; 
++Part104k
)
197
104k
        Entry[Part].resize(VF, nullptr);
198
55.0k
      ScalarMapStorage[Key] = Entry;
199
55.0k
    }
200
138k
    ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
201
138k
  }
202
203
  /// Reset the vector value associated with \p Key for the given \p Part.
204
  /// This function can be used to update values that have already been
205
  /// vectorized. This is the case for "fix-up" operations including type
206
  /// truncation and the second phase of recurrence vectorization.
207
2.47k
  void resetVectorValue(Value *Key, unsigned Part, Value *Vector) {
208
2.47k
    assert(hasVectorValue(Key, Part) && "Vector value not set for part");
209
2.47k
    VectorMapStorage[Key][Part] = Vector;
210
2.47k
  }
211
212
  /// Reset the scalar value associated with \p Key for \p Part and \p Lane.
213
  /// This function can be used to update values that have already been
214
  /// scalarized. This is the case for "fix-up" operations including scalar phi
215
  /// nodes for scalarized and predicated instructions.
216
  void resetScalarValue(Value *Key, const VPIteration &Instance,
217
330
                        Value *Scalar) {
218
330
    assert(hasScalarValue(Key, Instance) &&
219
330
           "Scalar value not set for part and lane");
220
330
    ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
221
330
  }
222
};
223
224
/// This class is used to enable the VPlan to invoke a method of ILV. This is
225
/// needed until the method is refactored out of ILV and becomes reusable.
226
struct VPCallback {
227
17.0k
  virtual ~VPCallback() {}
228
  virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
229
};
230
231
/// VPTransformState holds information passed down when "executing" a VPlan,
232
/// needed for generating the output IR.
233
struct VPTransformState {
234
  VPTransformState(unsigned VF, unsigned UF, LoopInfo *LI, DominatorTree *DT,
235
                   IRBuilder<> &Builder, VectorizerValueMap &ValueMap,
236
                   InnerLoopVectorizer *ILV, VPCallback &Callback)
237
      : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder),
238
17.0k
        ValueMap(ValueMap), ILV(ILV), Callback(Callback) {}
239
240
  /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
241
  unsigned VF;
242
  unsigned UF;
243
244
  /// Hold the indices to generate specific scalar instructions. Null indicates
245
  /// that all instances are to be generated, using either scalar or vector
246
  /// instructions.
247
  Optional<VPIteration> Instance;
248
249
  struct DataState {
250
    /// A type for vectorized values in the new loop. Each value from the
251
    /// original loop, when vectorized, is represented by UF vector values in
252
    /// the new unrolled loop, where UF is the unroll factor.
253
    typedef SmallVector<Value *, 2> PerPartValuesTy;
254
255
    DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
256
  } Data;
257
258
  /// Get the generated Value for a given VPValue and a given Part. Note that
259
  /// as some Defs are still created by ILV and managed in its ValueMap, this
260
  /// method will delegate the call to ILV in such cases in order to provide
261
  /// callers a consistent API.
262
  /// \see set.
263
2.17k
  Value *get(VPValue *Def, unsigned Part) {
264
2.17k
    // If Values have been set for this Def return the one relevant for \p Part.
265
2.17k
    if (Data.PerPartOutput.count(Def))
266
842
      return Data.PerPartOutput[Def][Part];
267
1.32k
    // Def is managed by ILV: bring the Values from ValueMap.
268
1.32k
    return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part);
269
1.32k
  }
270
271
  /// Set the generated Value for a given VPValue and a given Part.
272
495
  void set(VPValue *Def, Value *V, unsigned Part) {
273
495
    if (!Data.PerPartOutput.count(Def)) {
274
288
      DataState::PerPartValuesTy Entry(UF);
275
288
      Data.PerPartOutput[Def] = Entry;
276
288
    }
277
495
    Data.PerPartOutput[Def][Part] = V;
278
495
  }
279
280
  /// Hold state information used when constructing the CFG of the output IR,
281
  /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
282
  struct CFGState {
283
    /// The previous VPBasicBlock visited. Initially set to null.
284
    VPBasicBlock *PrevVPBB = nullptr;
285
286
    /// The previous IR BasicBlock created or used. Initially set to the new
287
    /// header BasicBlock.
288
    BasicBlock *PrevBB = nullptr;
289
290
    /// The last IR BasicBlock in the output IR. Set to the new latch
291
    /// BasicBlock, used for placing the newly created BasicBlocks.
292
    BasicBlock *LastBB = nullptr;
293
294
    /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
295
    /// of replication, maps the BasicBlock of the last replica created.
296
    SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
297
298
    /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed
299
    /// up at the end of vector code generation.
300
    SmallVector<VPBasicBlock *, 8> VPBBsToFix;
301
302
17.0k
    CFGState() = default;
303
  } CFG;
304
305
  /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
306
  LoopInfo *LI;
307
308
  /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
309
  DominatorTree *DT;
310
311
  /// Hold a reference to the IRBuilder used to generate output IR code.
312
  IRBuilder<> &Builder;
313
314
  /// Hold a reference to the Value state information used when generating the
315
  /// Values of the output IR.
316
  VectorizerValueMap &ValueMap;
317
318
  /// Hold a reference to a mapping between VPValues in VPlan and original
319
  /// Values they correspond to.
320
  VPValue2ValueTy VPValue2Value;
321
322
  /// Hold the trip count of the scalar loop.
323
  Value *TripCount = nullptr;
324
325
  /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
326
  InnerLoopVectorizer *ILV;
327
328
  VPCallback &Callback;
329
};
330
331
/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
332
/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
333
class VPBlockBase {
334
  friend class VPBlockUtils;
335
336
private:
337
  const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
338
339
  /// An optional name for the block.
340
  std::string Name;
341
342
  /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
343
  /// it is a topmost VPBlockBase.
344
  VPRegionBlock *Parent = nullptr;
345
346
  /// List of predecessor blocks.
347
  SmallVector<VPBlockBase *, 1> Predecessors;
348
349
  /// List of successor blocks.
350
  SmallVector<VPBlockBase *, 1> Successors;
351
352
  /// Successor selector, null for zero or single successor blocks.
353
  VPValue *CondBit = nullptr;
354
355
  /// Current block predicate - null if the block does not need a predicate.
356
  VPValue *Predicate = nullptr;
357
358
  /// Add \p Successor as the last successor to this block.
359
56.5k
  void appendSuccessor(VPBlockBase *Successor) {
360
56.5k
    assert(Successor && "Cannot add nullptr successor!");
361
56.5k
    Successors.push_back(Successor);
362
56.5k
  }
363
364
  /// Add \p Predecessor as the last predecessor to this block.
365
56.5k
  void appendPredecessor(VPBlockBase *Predecessor) {
366
56.5k
    assert(Predecessor && "Cannot add nullptr predecessor!");
367
56.5k
    Predecessors.push_back(Predecessor);
368
56.5k
  }
369
370
  /// Remove \p Predecessor from the predecessors of this block.
371
38.5k
  void removePredecessor(VPBlockBase *Predecessor) {
372
38.5k
    auto Pos = std::find(Predecessors.begin(), Predecessors.end(), Predecessor);
373
38.5k
    assert(Pos && "Predecessor does not exist");
374
38.5k
    Predecessors.erase(Pos);
375
38.5k
  }
376
377
  /// Remove \p Successor from the successors of this block.
378
38.5k
  void removeSuccessor(VPBlockBase *Successor) {
379
38.5k
    auto Pos = std::find(Successors.begin(), Successors.end(), Successor);
380
38.5k
    assert(Pos && "Successor does not exist");
381
38.5k
    Successors.erase(Pos);
382
38.5k
  }
383
384
protected:
385
  VPBlockBase(const unsigned char SC, const std::string &N)
386
95.0k
      : SubclassID(SC), Name(N) {}
387
388
public:
389
  /// An enumeration for keeping track of the concrete subclass of VPBlockBase
390
  /// that are actually instantiated. Values of this enumeration are kept in the
391
  /// SubclassID field of the VPBlockBase objects. They are used for concrete
392
  /// type identification.
393
  using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
394
395
  using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
396
397
95.0k
  virtual ~VPBlockBase() = default;
398
399
1.91k
  const std::string &getName() const { return Name; }
400
401
2.78k
  void setName(const Twine &newName) { Name = newName.str(); }
402
403
  /// \return an ID for the concrete type of this object.
404
  /// This is used to implement the classof checks. This should not be used
405
  /// for any other purpose, as the values may change as LLVM evolves.
406
6.25k
  unsigned getVPBlockID() const { return SubclassID; }
407
408
53.6k
  VPRegionBlock *getParent() { return Parent; }
409
0
  const VPRegionBlock *getParent() const { return Parent; }
410
411
59.3k
  void setParent(VPRegionBlock *P) { Parent = P; }
412
413
  /// \return the VPBasicBlock that is the entry of this VPBlockBase,
414
  /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
415
  /// VPBlockBase is a VPBasicBlock, it is returned.
416
  const VPBasicBlock *getEntryBasicBlock() const;
417
  VPBasicBlock *getEntryBasicBlock();
418
419
  /// \return the VPBasicBlock that is the exit of this VPBlockBase,
420
  /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
421
  /// VPBlockBase is a VPBasicBlock, it is returned.
422
  const VPBasicBlock *getExitBasicBlock() const;
423
  VPBasicBlock *getExitBasicBlock();
424
425
0
  const VPBlocksTy &getSuccessors() const { return Successors; }
426
176k
  VPBlocksTy &getSuccessors() { return Successors; }
427
428
0
  const VPBlocksTy &getPredecessors() const { return Predecessors; }
429
4.14k
  VPBlocksTy &getPredecessors() { return Predecessors; }
430
431
  /// \return the successor of this VPBlockBase if it has a single successor.
432
  /// Otherwise return a null pointer.
433
40.8k
  VPBlockBase *getSingleSuccessor() const {
434
40.8k
    return (Successors.size() == 1 ? 
*Successors.begin()39.8k
:
nullptr956
);
435
40.8k
  }
436
437
  /// \return the predecessor of this VPBlockBase if it has a single
438
  /// predecessor. Otherwise return a null pointer.
439
3.82k
  VPBlockBase *getSinglePredecessor() const {
440
3.82k
    return (Predecessors.size() == 1 ? 
*Predecessors.begin()2.87k
:
nullptr956
);
441
3.82k
  }
442
443
68
  size_t getNumSuccessors() const { return Successors.size(); }
444
84
  size_t getNumPredecessors() const { return Predecessors.size(); }
445
446
  /// An Enclosing Block of a block B is any block containing B, including B
447
  /// itself. \return the closest enclosing block starting from "this", which
448
  /// has successors. \return the root enclosing block if all enclosing blocks
449
  /// have no successors.
450
  VPBlockBase *getEnclosingBlockWithSuccessors();
451
452
  /// \return the closest enclosing block starting from "this", which has
453
  /// predecessors. \return the root enclosing block if all enclosing blocks
454
  /// have no predecessors.
455
  VPBlockBase *getEnclosingBlockWithPredecessors();
456
457
  /// \return the successors either attached directly to this VPBlockBase or, if
458
  /// this VPBlockBase is the exit block of a VPRegionBlock and has no
459
  /// successors of its own, search recursively for the first enclosing
460
  /// VPRegionBlock that has successors and return them. If no such
461
  /// VPRegionBlock exists, return the (empty) successors of the topmost
462
  /// VPBlockBase reached.
463
7
  const VPBlocksTy &getHierarchicalSuccessors() {
464
7
    return getEnclosingBlockWithSuccessors()->getSuccessors();
465
7
  }
466
467
  /// \return the hierarchical successor of this VPBlockBase if it has a single
468
  /// hierarchical successor. Otherwise return a null pointer.
469
2.25k
  VPBlockBase *getSingleHierarchicalSuccessor() {
470
2.25k
    return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
471
2.25k
  }
472
473
  /// \return the predecessors either attached directly to this VPBlockBase or,
474
  /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
475
  /// predecessors of its own, search recursively for the first enclosing
476
  /// VPRegionBlock that has predecessors and return them. If no such
477
  /// VPRegionBlock exists, return the (empty) predecessors of the topmost
478
  /// VPBlockBase reached.
479
1.91k
  const VPBlocksTy &getHierarchicalPredecessors() {
480
1.91k
    return getEnclosingBlockWithPredecessors()->getPredecessors();
481
1.91k
  }
482
483
  /// \return the hierarchical predecessor of this VPBlockBase if it has a
484
  /// single hierarchical predecessor. Otherwise return a null pointer.
485
3.82k
  VPBlockBase *getSingleHierarchicalPredecessor() {
486
3.82k
    return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
487
3.82k
  }
488
489
  /// \return the condition bit selecting the successor.
490
77
  VPValue *getCondBit() { return CondBit; }
491
492
0
  const VPValue *getCondBit() const { return CondBit; }
493
494
17
  void setCondBit(VPValue *CV) { CondBit = CV; }
495
496
24
  VPValue *getPredicate() { return Predicate; }
497
498
0
  const VPValue *getPredicate() const { return Predicate; }
499
500
18
  void setPredicate(VPValue *Pred) { Predicate = Pred; }
501
502
  /// Set a given VPBlockBase \p Successor as the single successor of this
503
  /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
504
  /// This VPBlockBase must have no successors.
505
48.1k
  void setOneSuccessor(VPBlockBase *Successor) {
506
48.1k
    assert(Successors.empty() && "Setting one successor when others exist.");
507
48.1k
    appendSuccessor(Successor);
508
48.1k
  }
509
510
  /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
511
  /// successors of this VPBlockBase. \p Condition is set as the successor
512
  /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p
513
  /// IfFalse. This VPBlockBase must have no successors.
514
  void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
515
2.82k
                        VPValue *Condition) {
516
2.82k
    assert(Successors.empty() && "Setting two successors when others exist.");
517
2.82k
    assert(Condition && "Setting two successors without condition!");
518
2.82k
    CondBit = Condition;
519
2.82k
    appendSuccessor(IfTrue);
520
2.82k
    appendSuccessor(IfFalse);
521
2.82k
  }
522
523
  /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
524
  /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
525
  /// as successor of any VPBasicBlock in \p NewPreds.
526
53.7k
  void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
527
53.7k
    assert(Predecessors.empty() && "Block predecessors already set.");
528
53.7k
    for (auto *Pred : NewPreds)
529
53.7k
      appendPredecessor(Pred);
530
53.7k
  }
531
532
  /// Remove all the predecessor of this block.
533
8
  void clearPredecessors() { Predecessors.clear(); }
534
535
  /// Remove all the successors of this block and set to null its condition bit
536
8
  void clearSuccessors() {
537
8
    Successors.clear();
538
8
    CondBit = nullptr;
539
8
  }
540
541
  /// The method which generates the output IR that correspond to this
542
  /// VPBlockBase, thereby "executing" the VPlan.
543
  virtual void execute(struct VPTransformState *State) = 0;
544
545
  /// Delete all blocks reachable from a given VPBlockBase, inclusive.
546
  static void deleteCFG(VPBlockBase *Entry);
547
548
0
  void printAsOperand(raw_ostream &OS, bool PrintType) const {
549
0
    OS << getName();
550
0
  }
551
552
0
  void print(raw_ostream &OS) const {
553
0
    // TODO: Only printing VPBB name for now since we only have dot printing
554
0
    // support for VPInstructions/Recipes.
555
0
    printAsOperand(OS, false);
556
0
  }
557
558
  /// Return true if it is legal to hoist instructions into this block.
559
  bool isLegalToHoistInto() {
560
    // There are currently no constraints that prevent an instruction to be
561
    // hoisted into a VPBlockBase.
562
    return true;
563
  }
564
};
565
566
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
567
/// instructions.
568
class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
569
  friend VPBasicBlock;
570
571
private:
572
  const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
573
574
  /// Each VPRecipe belongs to a single VPBasicBlock.
575
  VPBasicBlock *Parent = nullptr;
576
577
public:
578
  /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
579
  /// that is actually instantiated. Values of this enumeration are kept in the
580
  /// SubclassID field of the VPRecipeBase objects. They are used for concrete
581
  /// type identification.
582
  using VPRecipeTy = enum {
583
    VPBlendSC,
584
    VPBranchOnMaskSC,
585
    VPInstructionSC,
586
    VPInterleaveSC,
587
    VPPredInstPHISC,
588
    VPReplicateSC,
589
    VPWidenIntOrFpInductionSC,
590
    VPWidenMemoryInstructionSC,
591
    VPWidenPHISC,
592
    VPWidenSC,
593
  };
594
595
335k
  VPRecipeBase(const unsigned char SC) : SubclassID(SC) {}
596
335k
  virtual ~VPRecipeBase() = default;
597
598
  /// \return an ID for the concrete type of this object.
599
  /// This is used to implement the classof checks. This should not be used
600
  /// for any other purpose, as the values may change as LLVM evolves.
601
77.7k
  unsigned getVPRecipeID() const { return SubclassID; }
602
603
  /// \return the VPBasicBlock which this VPRecipe belongs to.
604
318
  VPBasicBlock *getParent() { return Parent; }
605
0
  const VPBasicBlock *getParent() const { return Parent; }
606
607
  /// The method which generates the output IR instructions that correspond to
608
  /// this VPRecipe, thereby "executing" the VPlan.
609
  virtual void execute(struct VPTransformState &State) = 0;
610
611
  /// Each recipe prints itself.
612
  virtual void print(raw_ostream &O, const Twine &Indent) const = 0;
613
614
  /// Insert an unlinked recipe into a basic block immediately before
615
  /// the specified recipe.
616
  void insertBefore(VPRecipeBase *InsertPos);
617
618
  /// This method unlinks 'this' from the containing basic block and deletes it.
619
  ///
620
  /// \returns an iterator pointing to the element after the erased one
621
  iplist<VPRecipeBase>::iterator eraseFromParent();
622
};
623
624
/// This is a concrete Recipe that models a single VPlan-level instruction.
625
/// While as any Recipe it may generate a sequence of IR instructions when
626
/// executed, these instructions would always form a single-def expression as
627
/// the VPInstruction is also a single def-use vertex.
628
class VPInstruction : public VPUser, public VPRecipeBase {
629
  friend class VPlanHCFGTransforms;
630
  friend class VPlanSlp;
631
632
public:
633
  /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
634
  enum {
635
    Not = Instruction::OtherOpsEnd + 1,
636
    ICmpULE,
637
    SLPLoad,
638
    SLPStore,
639
  };
640
641
private:
642
  typedef unsigned char OpcodeTy;
643
  OpcodeTy Opcode;
644
645
  /// Utility method serving execute(): generates a single instance of the
646
  /// modeled instruction.
647
  void generateInstruction(VPTransformState &State, unsigned Part);
648
649
protected:
650
  Instruction *getUnderlyingInstr() {
651
    return cast_or_null<Instruction>(getUnderlyingValue());
652
  }
653
654
  void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
655
656
public:
657
  VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
658
      : VPUser(VPValue::VPInstructionSC, Operands),
659
3.98k
        VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {}
660
661
  VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
662
0
      : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {}
663
664
  /// Method to support type inquiry through isa, cast, and dyn_cast.
665
190
  static inline bool classof(const VPValue *V) {
666
190
    return V->getVPValueID() == VPValue::VPInstructionSC;
667
190
  }
668
669
0
  VPInstruction *clone() const {
670
0
    SmallVector<VPValue *, 2> Operands(operands());
671
0
    return new VPInstruction(Opcode, Operands);
672
0
  }
673
674
  /// Method to support type inquiry through isa, cast, and dyn_cast.
675
  static inline bool classof(const VPRecipeBase *R) {
676
    return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
677
  }
678
679
2.09k
  unsigned getOpcode() const { return Opcode; }
680
681
  /// Generate the instruction.
682
  /// TODO: We currently execute only per-part unless a specific instance is
683
  /// provided.
684
  void execute(VPTransformState &State) override;
685
686
  /// Print the Recipe.
687
  void print(raw_ostream &O, const Twine &Indent) const override;
688
689
  /// Print the VPInstruction.
690
  void print(raw_ostream &O) const;
691
692
  /// Return true if this instruction may modify memory.
693
  bool mayWriteToMemory() const {
694
    // TODO: we can use attributes of the called function to rule out memory
695
    //       modifications.
696
    return Opcode == Instruction::Store || Opcode == Instruction::Call ||
697
           Opcode == Instruction::Invoke || Opcode == SLPStore;
698
  }
699
};
700
701
/// VPWidenRecipe is a recipe for producing a copy of vector type for each
702
/// Instruction in its ingredients independently, in order. This recipe covers
703
/// most of the traditional vectorization cases where each ingredient transforms
704
/// into a vectorized version of itself.
705
class VPWidenRecipe : public VPRecipeBase {
706
private:
707
  /// Hold the ingredients by pointing to their original BasicBlock location.
708
  BasicBlock::iterator Begin;
709
  BasicBlock::iterator End;
710
711
public:
712
25.7k
  VPWidenRecipe(Instruction *I) : VPRecipeBase(VPWidenSC) {
713
25.7k
    End = I->getIterator();
714
25.7k
    Begin = End++;
715
25.7k
  }
716
717
25.7k
  ~VPWidenRecipe() override = default;
718
719
  /// Method to support type inquiry through isa, cast, and dyn_cast.
720
77.7k
  static inline bool classof(const VPRecipeBase *V) {
721
77.7k
    return V->getVPRecipeID() == VPRecipeBase::VPWidenSC;
722
77.7k
  }
723
724
  /// Produce widened copies of all Ingredients.
725
  void execute(VPTransformState &State) override;
726
727
  /// Augment the recipe to include Instr, if it lies at its End.
728
53.6k
  bool appendInstruction(Instruction *Instr) {
729
53.6k
    if (End != Instr->getIterator())
730
251
      return false;
731
53.3k
    End++;
732
53.3k
    return true;
733
53.3k
  }
734
735
  /// Print the recipe.
736
  void print(raw_ostream &O, const Twine &Indent) const override;
737
};
738
739
/// A recipe for handling phi nodes of integer and floating-point inductions,
740
/// producing their vector and scalar values.
741
class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {
742
private:
743
  PHINode *IV;
744
  TruncInst *Trunc;
745
746
public:
747
  VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr)
748
42.8k
      : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {}
749
42.8k
  ~VPWidenIntOrFpInductionRecipe() override = default;
750
751
  /// Method to support type inquiry through isa, cast, and dyn_cast.
752
0
  static inline bool classof(const VPRecipeBase *V) {
753
0
    return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC;
754
0
  }
755
756
  /// Generate the vectorized and scalarized versions of the phi node as
757
  /// needed by their users.
758
  void execute(VPTransformState &State) override;
759
760
  /// Print the recipe.
761
  void print(raw_ostream &O, const Twine &Indent) const override;
762
};
763
764
/// A recipe for handling all phi nodes except for integer and FP inductions.
765
class VPWidenPHIRecipe : public VPRecipeBase {
766
private:
767
  PHINode *Phi;
768
769
public:
770
14.9k
  VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {}
771
14.9k
  ~VPWidenPHIRecipe() override = default;
772
773
  /// Method to support type inquiry through isa, cast, and dyn_cast.
774
  static inline bool classof(const VPRecipeBase *V) {
775
    return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC;
776
  }
777
778
  /// Generate the phi/select nodes.
779
  void execute(VPTransformState &State) override;
780
781
  /// Print the recipe.
782
  void print(raw_ostream &O, const Twine &Indent) const override;
783
};
784
785
/// A recipe for vectorizing a phi-node as a sequence of mask-based select
786
/// instructions.
787
class VPBlendRecipe : public VPRecipeBase {
788
private:
789
  PHINode *Phi;
790
791
  /// The blend operation is a User of a mask, if not null.
792
  std::unique_ptr<VPUser> User;
793
794
public:
795
  VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Masks)
796
1.08k
      : VPRecipeBase(VPBlendSC), Phi(Phi) {
797
1.08k
    assert((Phi->getNumIncomingValues() == 1 ||
798
1.08k
            Phi->getNumIncomingValues() == Masks.size()) &&
799
1.08k
           "Expected the same number of incoming values and masks");
800
1.08k
    if (!Masks.empty())
801
1.08k
      User.reset(new VPUser(Masks));
802
1.08k
  }
803
804
  /// Method to support type inquiry through isa, cast, and dyn_cast.
805
0
  static inline bool classof(const VPRecipeBase *V) {
806
0
    return V->getVPRecipeID() == VPRecipeBase::VPBlendSC;
807
0
  }
808
809
  /// Generate the phi/select nodes.
810
  void execute(VPTransformState &State) override;
811
812
  /// Print the recipe.
813
  void print(raw_ostream &O, const Twine &Indent) const override;
814
};
815
816
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
817
/// or stores into one wide load/store and shuffles.
818
class VPInterleaveRecipe : public VPRecipeBase {
819
private:
820
  const InterleaveGroup<Instruction> *IG;
821
  std::unique_ptr<VPUser> User;
822
823
public:
824
  VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Mask)
825
1.48k
      : VPRecipeBase(VPInterleaveSC), IG(IG) {
826
1.48k
    if (Mask) // Create a VPInstruction to register as a user of the mask.
827
11
      User.reset(new VPUser({Mask}));
828
1.48k
  }
829
1.48k
  ~VPInterleaveRecipe() override = default;
830
831
  /// Method to support type inquiry through isa, cast, and dyn_cast.
832
0
  static inline bool classof(const VPRecipeBase *V) {
833
0
    return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC;
834
0
  }
835
836
  /// Generate the wide load or store, and shuffles.
837
  void execute(VPTransformState &State) override;
838
839
  /// Print the recipe.
840
  void print(raw_ostream &O, const Twine &Indent) const override;
841
842
0
  const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
843
};
844
845
/// VPReplicateRecipe replicates a given instruction producing multiple scalar
846
/// copies of the original scalar type, one per lane, instead of producing a
847
/// single copy of widened type for all lanes. If the instruction is known to be
848
/// uniform only one copy, per lane zero, will be generated.
849
class VPReplicateRecipe : public VPRecipeBase {
850
private:
851
  /// The instruction being replicated.
852
  Instruction *Ingredient;
853
854
  /// Indicator if only a single replica per lane is needed.
855
  bool IsUniform;
856
857
  /// Indicator if the replicas are also predicated.
858
  bool IsPredicated;
859
860
  /// Indicator if the scalar values should also be packed into a vector.
861
  bool AlsoPack;
862
863
public:
864
  VPReplicateRecipe(Instruction *I, bool IsUniform, bool IsPredicated = false)
865
      : VPRecipeBase(VPReplicateSC), Ingredient(I), IsUniform(IsUniform),
866
213k
        IsPredicated(IsPredicated) {
867
213k
    // Retain the previous behavior of predicateInstructions(), where an
868
213k
    // insert-element of a predicated instruction got hoisted into the
869
213k
    // predicated basic block iff it was its only user. This is achieved by
870
213k
    // having predicated instructions also pack their values into a vector by
871
213k
    // default unless they have a replicated user which uses their scalar value.
872
213k
    AlsoPack = IsPredicated && 
!I->use_empty()2.78k
;
873
213k
  }
874
875
213k
  ~VPReplicateRecipe() override = default;
876
877
  /// Method to support type inquiry through isa, cast, and dyn_cast.
878
0
  static inline bool classof(const VPRecipeBase *V) {
879
0
    return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC;
880
0
  }
881
882
  /// Generate replicas of the desired Ingredient. Replicas will be generated
883
  /// for all parts and lanes unless a specific part and lane are specified in
884
  /// the \p State.
885
  void execute(VPTransformState &State) override;
886
887
1.08k
  void setAlsoPack(bool Pack) { AlsoPack = Pack; }
888
889
  /// Print the recipe.
890
  void print(raw_ostream &O, const Twine &Indent) const override;
891
};
892
893
/// A recipe for generating conditional branches on the bits of a mask.
894
class VPBranchOnMaskRecipe : public VPRecipeBase {
895
private:
896
  std::unique_ptr<VPUser> User;
897
898
public:
899
2.78k
  VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) {
900
2.78k
    if (BlockInMask) // nullptr means all-one mask.
901
2.78k
      User.reset(new VPUser({BlockInMask}));
902
2.78k
  }
903
904
  /// Method to support type inquiry through isa, cast, and dyn_cast.
905
0
  static inline bool classof(const VPRecipeBase *V) {
906
0
    return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC;
907
0
  }
908
909
  /// Generate the extraction of the appropriate bit from the block mask and the
910
  /// conditional branch.
911
  void execute(VPTransformState &State) override;
912
913
  /// Print the recipe.
914
0
  void print(raw_ostream &O, const Twine &Indent) const override {
915
0
    O << " +\n" << Indent << "\"BRANCH-ON-MASK ";
916
0
    if (User)
917
0
      O << *User->getOperand(0);
918
0
    else
919
0
      O << " All-One";
920
0
    O << "\\l\"";
921
0
  }
922
};
923
924
/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
925
/// control converges back from a Branch-on-Mask. The phi nodes are needed in
926
/// order to merge values that are set under such a branch and feed their uses.
927
/// The phi nodes can be scalar or vector depending on the users of the value.
928
/// This recipe works in concert with VPBranchOnMaskRecipe.
929
class VPPredInstPHIRecipe : public VPRecipeBase {
930
private:
931
  Instruction *PredInst;
932
933
public:
934
  /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
935
  /// nodes after merging back from a Branch-on-Mask.
936
  VPPredInstPHIRecipe(Instruction *PredInst)
937
1.65k
      : VPRecipeBase(VPPredInstPHISC), PredInst(PredInst) {}
938
1.65k
  ~VPPredInstPHIRecipe() override = default;
939
940
  /// Method to support type inquiry through isa, cast, and dyn_cast.
941
0
  static inline bool classof(const VPRecipeBase *V) {
942
0
    return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC;
943
0
  }
944
945
  /// Generates phi nodes for live-outs as needed to retain SSA form.
946
  void execute(VPTransformState &State) override;
947
948
  /// Print the recipe.
949
  void print(raw_ostream &O, const Twine &Indent) const override;
950
};
951
952
/// A Recipe for widening load/store operations.
953
/// TODO: We currently execute only per-part unless a specific instance is
954
/// provided.
955
class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
956
private:
957
  Instruction &Instr;
958
  std::unique_ptr<VPUser> User;
959
960
public:
961
  VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Mask)
962
27.6k
      : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr) {
963
27.6k
    if (Mask) // Create a VPInstruction to register as a user of the mask.
964
106
      User.reset(new VPUser({Mask}));
965
27.6k
  }
966
967
  /// Method to support type inquiry through isa, cast, and dyn_cast.
968
  static inline bool classof(const VPRecipeBase *V) {
969
    return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
970
  }
971
972
  /// Generate the wide load/store.
973
  void execute(VPTransformState &State) override;
974
975
  /// Print the recipe.
976
  void print(raw_ostream &O, const Twine &Indent) const override;
977
};
978
979
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
980
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
981
/// output IR instructions.
982
class VPBasicBlock : public VPBlockBase {
983
public:
984
  using RecipeListTy = iplist<VPRecipeBase>;
985
986
private:
987
  /// The VPRecipes held in the order of output instructions to generate.
988
  RecipeListTy Recipes;
989
990
public:
991
  VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
992
92.2k
      : VPBlockBase(VPBasicBlockSC, Name.str()) {
993
92.2k
    if (Recipe)
994
7.21k
      appendRecipe(Recipe);
995
92.2k
  }
996
997
92.2k
  ~VPBasicBlock() override { Recipes.clear(); }
998
999
  /// Instruction iterators...
1000
  using iterator = RecipeListTy::iterator;
1001
  using const_iterator = RecipeListTy::const_iterator;
1002
  using reverse_iterator = RecipeListTy::reverse_iterator;
1003
  using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
1004
1005
  //===--------------------------------------------------------------------===//
1006
  /// Recipe iterator methods
1007
  ///
1008
155
  inline iterator begin() { return Recipes.begin(); }
1009
0
  inline const_iterator begin() const { return Recipes.begin(); }
1010
373k
  inline iterator end() { return Recipes.end(); }
1011
0
  inline const_iterator end() const { return Recipes.end(); }
1012
1013
0
  inline reverse_iterator rbegin() { return Recipes.rbegin(); }
1014
0
  inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
1015
0
  inline reverse_iterator rend() { return Recipes.rend(); }
1016
0
  inline const_reverse_iterator rend() const { return Recipes.rend(); }
1017
1018
  inline size_t size() const { return Recipes.size(); }
1019
79.0k
  inline bool empty() const { return Recipes.empty(); }
1020
0
  inline const VPRecipeBase &front() const { return Recipes.front(); }
1021
0
  inline VPRecipeBase &front() { return Recipes.front(); }
1022
0
  inline const VPRecipeBase &back() const { return Recipes.back(); }
1023
77.6k
  inline VPRecipeBase &back() { return Recipes.back(); }
1024
1025
  /// Returns a reference to the list of recipes.
1026
191
  RecipeListTy &getRecipeList() { return Recipes; }
1027
1028
  /// Returns a pointer to a member of the recipe list.
1029
0
  static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
1030
0
    return &VPBasicBlock::Recipes;
1031
0
  }
1032
1033
  /// Method to support type inquiry through isa, cast, and dyn_cast.
1034
38
  static inline bool classof(const VPBlockBase *V) {
1035
38
    return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
1036
38
  }
1037
1038
335k
  void insert(VPRecipeBase *Recipe, iterator InsertPt) {
1039
335k
    assert(Recipe && "No recipe to append.");
1040
335k
    assert(!Recipe->Parent && "Recipe already in VPlan");
1041
335k
    Recipe->Parent = this;
1042
335k
    Recipes.insert(InsertPt, Recipe);
1043
335k
  }
1044
1045
  /// Augment the existing recipes of a VPBasicBlock with an additional
1046
  /// \p Recipe as the last recipe.
1047
331k
  void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
1048
1049
  /// The method which generates the output IR instructions that correspond to
1050
  /// this VPBasicBlock, thereby "executing" the VPlan.
1051
  void execute(struct VPTransformState *State) override;
1052
1053
private:
1054
  /// Create an IR BasicBlock to hold the output instructions generated by this
1055
  /// VPBasicBlock, and return it. Update the CFGState accordingly.
1056
  BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
1057
};
1058
1059
/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
1060
/// which form a Single-Entry-Single-Exit subgraph of the output IR CFG.
1061
/// A VPRegionBlock may indicate that its contents are to be replicated several
1062
/// times. This is designed to support predicated scalarization, in which a
1063
/// scalar if-then code structure needs to be generated VF * UF times. Having
1064
/// this replication indicator helps to keep a single model for multiple
1065
/// candidate VF's. The actual replication takes place only once the desired VF
1066
/// and UF have been determined.
1067
class VPRegionBlock : public VPBlockBase {
1068
private:
1069
  /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
1070
  VPBlockBase *Entry;
1071
1072
  /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock.
1073
  VPBlockBase *Exit;
1074
1075
  /// An indicator whether this region is to generate multiple replicated
1076
  /// instances of output IR corresponding to its VPBlockBases.
1077
  bool IsReplicator;
1078
1079
public:
1080
  VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit,
1081
                const std::string &Name = "", bool IsReplicator = false)
1082
      : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit),
1083
2.78k
        IsReplicator(IsReplicator) {
1084
2.78k
    assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
1085
2.78k
    assert(Exit->getSuccessors().empty() && "Exit block has successors.");
1086
2.78k
    Entry->setParent(this);
1087
2.78k
    Exit->setParent(this);
1088
2.78k
  }
1089
  VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
1090
      : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr),
1091
25
        IsReplicator(IsReplicator) {}
1092
1093
2.80k
  ~VPRegionBlock() override {
1094
2.80k
    if (Entry)
1095
2.80k
      deleteCFG(Entry);
1096
2.80k
  }
1097
1098
  /// Method to support type inquiry through isa, cast, and dyn_cast.
1099
6.21k
  static inline bool classof(const VPBlockBase *V) {
1100
6.21k
    return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
1101
6.21k
  }
1102
1103
0
  const VPBlockBase *getEntry() const { return Entry; }
1104
71
  VPBlockBase *getEntry() { return Entry; }
1105
1106
  /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
1107
  /// EntryBlock must have no predecessors.
1108
25
  void setEntry(VPBlockBase *EntryBlock) {
1109
25
    assert(EntryBlock->getPredecessors().empty() &&
1110
25
           "Entry block cannot have predecessors.");
1111
25
    Entry = EntryBlock;
1112
25
    EntryBlock->setParent(this);
1113
25
  }
1114
1115
  // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
1116
  // specific interface of llvm::Function, instead of using
1117
  // GraphTraints::getEntryNode. We should add a new template parameter to
1118
  // DominatorTreeBase representing the Graph type.
1119
  VPBlockBase &front() const { return *Entry; }
1120
1121
0
  const VPBlockBase *getExit() const { return Exit; }
1122
343
  VPBlockBase *getExit() { return Exit; }
1123
1124
  /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p
1125
  /// ExitBlock must have no successors.
1126
25
  void setExit(VPBlockBase *ExitBlock) {
1127
25
    assert(ExitBlock->getSuccessors().empty() &&
1128
25
           "Exit block cannot have successors.");
1129
25
    Exit = ExitBlock;
1130
25
    ExitBlock->setParent(this);
1131
25
  }
1132
1133
  /// An indicator whether this region is to generate multiple replicated
1134
  /// instances of output IR corresponding to its VPBlockBases.
1135
332
  bool isReplicator() const { return IsReplicator; }
1136
1137
  /// The method which generates the output IR instructions that correspond to
1138
  /// this VPRegionBlock, thereby "executing" the VPlan.
1139
  void execute(struct VPTransformState *State) override;
1140
};
1141
1142
/// VPlan models a candidate for vectorization, encoding various decisions take
1143
/// to produce efficient output IR, including which branches, basic-blocks and
1144
/// output IR instructions to generate, and their cost. VPlan holds a
1145
/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
1146
/// VPBlock.
1147
class VPlan {
1148
  friend class VPlanPrinter;
1149
1150
private:
1151
  /// Hold the single entry to the Hierarchical CFG of the VPlan.
1152
  VPBlockBase *Entry;
1153
1154
  /// Holds the VFs applicable to this VPlan.
1155
  SmallSet<unsigned, 2> VFs;
1156
1157
  /// Holds the name of the VPlan, for printing.
1158
  std::string Name;
1159
1160
  /// Holds all the external definitions created for this VPlan.
1161
  // TODO: Introduce a specific representation for external definitions in
1162
  // VPlan. External definitions must be immutable and hold a pointer to its
1163
  // underlying IR that will be used to implement its structural comparison
1164
  // (operators '==' and '<').
1165
  SmallPtrSet<VPValue *, 16> VPExternalDefs;
1166
1167
  /// Represents the backedge taken count of the original loop, for folding
1168
  /// the tail.
1169
  VPValue *BackedgeTakenCount = nullptr;
1170
1171
  /// Holds a mapping between Values and their corresponding VPValue inside
1172
  /// VPlan.
1173
  Value2VPValueTy Value2VPValue;
1174
1175
  /// Holds the VPLoopInfo analysis for this VPlan.
1176
  VPLoopInfo VPLInfo;
1177
1178
  /// Holds the condition bit values built during VPInstruction to VPRecipe transformation.
1179
  SmallVector<VPValue *, 4> VPCBVs;
1180
1181
public:
1182
38.5k
  VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {}
1183
1184
38.5k
  ~VPlan() {
1185
38.5k
    if (Entry)
1186
38.5k
      VPBlockBase::deleteCFG(Entry);
1187
38.5k
    for (auto &MapEntry : Value2VPValue)
1188
1.97k
      if (MapEntry.second != BackedgeTakenCount)
1189
1.96k
        delete MapEntry.second;
1190
38.5k
    if (BackedgeTakenCount)
1191
35
      delete BackedgeTakenCount; // Delete once, if in Value2VPValue or not.
1192
38.5k
    for (VPValue *Def : VPExternalDefs)
1193
182
      delete Def;
1194
38.5k
    for (VPValue *CBV : VPCBVs)
1195
17
      delete CBV;
1196
38.5k
  }
1197
1198
  /// Generate the IR code for this VPlan.
1199
  void execute(struct VPTransformState *State);
1200
1201
38.5k
  VPBlockBase *getEntry() { return Entry; }
1202
0
  const VPBlockBase *getEntry() const { return Entry; }
1203
1204
38.5k
  VPBlockBase *setEntry(VPBlockBase *Block) { return Entry = Block; }
1205
1206
  /// The backedge taken count of the original loop.
1207
35
  VPValue *getOrCreateBackedgeTakenCount() {
1208
35
    if (!BackedgeTakenCount)
1209
35
      BackedgeTakenCount = new VPValue();
1210
35
    return BackedgeTakenCount;
1211
35
  }
1212
1213
51.6k
  void addVF(unsigned VF) { VFs.insert(VF); }
1214
1215
32.9k
  bool hasVF(unsigned VF) { return VFs.count(VF); }
1216
1217
0
  const std::string &getName() const { return Name; }
1218
1219
38.5k
  void setName(const Twine &newName) { Name = newName.str(); }
1220
1221
  /// Add \p VPVal to the pool of external definitions if it's not already
1222
  /// in the pool.
1223
182
  void addExternalDef(VPValue *VPVal) {
1224
182
    VPExternalDefs.insert(VPVal);
1225
182
  }
1226
1227
  /// Add \p CBV to the vector of condition bit values.
1228
17
  void addCBV(VPValue *CBV) {
1229
17
    VPCBVs.push_back(CBV);
1230
17
  }
1231
1232
1.96k
  void addVPValue(Value *V) {
1233
1.96k
    assert(V && "Trying to add a null Value to VPlan");
1234
1.96k
    assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
1235
1.96k
    Value2VPValue[V] = new VPValue();
1236
1.96k
  }
1237
1238
3.33k
  VPValue *getVPValue(Value *V) {
1239
3.33k
    assert(V && "Trying to get the VPValue of a null Value");
1240
3.33k
    assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
1241
3.33k
    return Value2VPValue[V];
1242
3.33k
  }
1243
1244
  /// Return the VPLoopInfo analysis for this VPlan.
1245
26
  VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
1246
0
  const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
1247
1248
private:
1249
  /// Add to the given dominator tree the header block and every new basic block
1250
  /// that was created between it and the latch block, inclusive.
1251
  static void updateDominatorTree(DominatorTree *DT,
1252
                                  BasicBlock *LoopPreHeaderBB,
1253
                                  BasicBlock *LoopLatchBB);
1254
};
1255
1256
/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
1257
/// indented and follows the dot format.
1258
class VPlanPrinter {
1259
  friend inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan);
1260
  friend inline raw_ostream &operator<<(raw_ostream &OS,
1261
                                        const struct VPlanIngredient &I);
1262
1263
private:
1264
  raw_ostream &OS;
1265
  VPlan &Plan;
1266
  unsigned Depth;
1267
  unsigned TabWidth = 2;
1268
  std::string Indent;
1269
  unsigned BID = 0;
1270
  SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
1271
1272
0
  VPlanPrinter(raw_ostream &O, VPlan &P) : OS(O), Plan(P) {}
1273
1274
  /// Handle indentation.
1275
0
  void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
1276
1277
  /// Print a given \p Block of the Plan.
1278
  void dumpBlock(const VPBlockBase *Block);
1279
1280
  /// Print the information related to the CFG edges going out of a given
1281
  /// \p Block, followed by printing the successor blocks themselves.
1282
  void dumpEdges(const VPBlockBase *Block);
1283
1284
  /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
1285
  /// its successor blocks.
1286
  void dumpBasicBlock(const VPBasicBlock *BasicBlock);
1287
1288
  /// Print a given \p Region of the Plan.
1289
  void dumpRegion(const VPRegionBlock *Region);
1290
1291
0
  unsigned getOrCreateBID(const VPBlockBase *Block) {
1292
0
    return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
1293
0
  }
1294
1295
  const Twine getOrCreateName(const VPBlockBase *Block);
1296
1297
  const Twine getUID(const VPBlockBase *Block);
1298
1299
  /// Print the information related to a CFG edge between two VPBlockBases.
1300
  void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
1301
                const Twine &Label);
1302
1303
  void dump();
1304
1305
  static void printAsIngredient(raw_ostream &O, Value *V);
1306
};
1307
1308
struct VPlanIngredient {
1309
  Value *V;
1310
1311
0
  VPlanIngredient(Value *V) : V(V) {}
1312
};
1313
1314
0
inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
1315
0
  VPlanPrinter::printAsIngredient(OS, I.V);
1316
0
  return OS;
1317
0
}
1318
1319
0
inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) {
1320
0
  VPlanPrinter Printer(OS, Plan);
1321
0
  Printer.dump();
1322
0
  return OS;
1323
0
}
1324
1325
//===----------------------------------------------------------------------===//
1326
// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs     //
1327
//===----------------------------------------------------------------------===//
1328
1329
// The following set of template specializations implement GraphTraits to treat
1330
// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
1331
// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
1332
// VPBlockBase is a VPRegionBlock, this specialization provides access to its
1333
// successors/predecessors but not to the blocks inside the region.
1334
1335
template <> struct GraphTraits<VPBlockBase *> {
1336
  using NodeRef = VPBlockBase *;
1337
  using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1338
1339
59.1k
  static NodeRef getEntryNode(NodeRef N) { return N; }
1340
1341
76.2k
  static inline ChildIteratorType child_begin(NodeRef N) {
1342
76.2k
    return N->getSuccessors().begin();
1343
76.2k
  }
1344
1345
96.8k
  static inline ChildIteratorType child_end(NodeRef N) {
1346
96.8k
    return N->getSuccessors().end();
1347
96.8k
  }
1348
};
1349
1350
template <> struct GraphTraits<const VPBlockBase *> {
1351
  using NodeRef = const VPBlockBase *;
1352
  using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator;
1353
1354
0
  static NodeRef getEntryNode(NodeRef N) { return N; }
1355
1356
0
  static inline ChildIteratorType child_begin(NodeRef N) {
1357
0
    return N->getSuccessors().begin();
1358
0
  }
1359
1360
0
  static inline ChildIteratorType child_end(NodeRef N) {
1361
0
    return N->getSuccessors().end();
1362
0
  }
1363
};
1364
1365
// Inverse order specialization for VPBasicBlocks. Predecessors are used instead
1366
// of successors for the inverse traversal.
1367
template <> struct GraphTraits<Inverse<VPBlockBase *>> {
1368
  using NodeRef = VPBlockBase *;
1369
  using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1370
1371
0
  static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
1372
1373
180
  static inline ChildIteratorType child_begin(NodeRef N) {
1374
180
    return N->getPredecessors().begin();
1375
180
  }
1376
1377
180
  static inline ChildIteratorType child_end(NodeRef N) {
1378
180
    return N->getPredecessors().end();
1379
180
  }
1380
};
1381
1382
// The following set of template specializations implement GraphTraits to
1383
// treat VPRegionBlock as a graph and recurse inside its nodes. It's important
1384
// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
1385
// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
1386
// there won't be automatic recursion into other VPBlockBases that turn to be
1387
// VPRegionBlocks.
1388
1389
template <>
1390
struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
1391
  using GraphRef = VPRegionBlock *;
1392
  using nodes_iterator = df_iterator<NodeRef>;
1393
1394
28
  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1395
1396
0
  static nodes_iterator nodes_begin(GraphRef N) {
1397
0
    return nodes_iterator::begin(N->getEntry());
1398
0
  }
1399
1400
0
  static nodes_iterator nodes_end(GraphRef N) {
1401
0
    // df_iterator::end() returns an empty iterator so the node used doesn't
1402
0
    // matter.
1403
0
    return nodes_iterator::end(N);
1404
0
  }
1405
};
1406
1407
template <>
1408
struct GraphTraits<const VPRegionBlock *>
1409
    : public GraphTraits<const VPBlockBase *> {
1410
  using GraphRef = const VPRegionBlock *;
1411
  using nodes_iterator = df_iterator<NodeRef>;
1412
1413
0
  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1414
1415
0
  static nodes_iterator nodes_begin(GraphRef N) {
1416
0
    return nodes_iterator::begin(N->getEntry());
1417
0
  }
1418
1419
0
  static nodes_iterator nodes_end(GraphRef N) {
1420
0
    // df_iterator::end() returns an empty iterator so the node used doesn't
1421
0
    // matter.
1422
0
    return nodes_iterator::end(N);
1423
0
  }
1424
};
1425
1426
template <>
1427
struct GraphTraits<Inverse<VPRegionBlock *>>
1428
    : public GraphTraits<Inverse<VPBlockBase *>> {
1429
  using GraphRef = VPRegionBlock *;
1430
  using nodes_iterator = df_iterator<NodeRef>;
1431
1432
0
  static NodeRef getEntryNode(Inverse<GraphRef> N) {
1433
0
    return N.Graph->getExit();
1434
0
  }
1435
1436
0
  static nodes_iterator nodes_begin(GraphRef N) {
1437
0
    return nodes_iterator::begin(N->getExit());
1438
0
  }
1439
1440
0
  static nodes_iterator nodes_end(GraphRef N) {
1441
0
    // df_iterator::end() returns an empty iterator so the node used doesn't
1442
0
    // matter.
1443
0
    return nodes_iterator::end(N);
1444
0
  }
1445
};
1446
1447
//===----------------------------------------------------------------------===//
1448
// VPlan Utilities
1449
//===----------------------------------------------------------------------===//
1450
1451
/// Class that provides utilities for VPBlockBases in VPlan.
1452
class VPBlockUtils {
1453
public:
1454
  VPBlockUtils() = delete;
1455
1456
  /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
1457
  /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
1458
  /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr
1459
  /// has more than one successor, its conditional bit is propagated to \p
1460
  /// NewBlock. \p NewBlock must have neither successors nor predecessors.
1461
48.0k
  static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
1462
48.0k
    assert(NewBlock->getSuccessors().empty() &&
1463
48.0k
           "Can't insert new block with successors.");
1464
48.0k
    // TODO: move successors from BlockPtr to NewBlock when this functionality
1465
48.0k
    // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr
1466
48.0k
    // already has successors.
1467
48.0k
    BlockPtr->setOneSuccessor(NewBlock);
1468
48.0k
    NewBlock->setPredecessors({BlockPtr});
1469
48.0k
    NewBlock->setParent(BlockPtr->getParent());
1470
48.0k
  }
1471
1472
  /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
1473
  /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
1474
  /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
1475
  /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor
1476
  /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse
1477
  /// must have neither successors nor predecessors.
1478
  static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
1479
2.78k
                                   VPValue *Condition, VPBlockBase *BlockPtr) {
1480
2.78k
    assert(IfTrue->getSuccessors().empty() &&
1481
2.78k
           "Can't insert IfTrue with successors.");
1482
2.78k
    assert(IfFalse->getSuccessors().empty() &&
1483
2.78k
           "Can't insert IfFalse with successors.");
1484
2.78k
    BlockPtr->setTwoSuccessors(IfTrue, IfFalse, Condition);
1485
2.78k
    IfTrue->setPredecessors({BlockPtr});
1486
2.78k
    IfFalse->setPredecessors({BlockPtr});
1487
2.78k
    IfTrue->setParent(BlockPtr->getParent());
1488
2.78k
    IfFalse->setParent(BlockPtr->getParent());
1489
2.78k
  }
1490
1491
  /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
1492
  /// the successors of \p From and \p From to the predecessors of \p To. Both
1493
  /// VPBlockBases must have the same parent, which can be null. Both
1494
  /// VPBlockBases can be already connected to other VPBlockBases.
1495
2.79k
  static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
1496
2.79k
    assert((From->getParent() == To->getParent()) &&
1497
2.79k
           "Can't connect two block with different parents");
1498
2.79k
    assert(From->getNumSuccessors() < 2 &&
1499
2.79k
           "Blocks can't have more than two successors.");
1500
2.79k
    From->appendSuccessor(To);
1501
2.79k
    To->appendPredecessor(From);
1502
2.79k
  }
1503
1504
  /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
1505
  /// from the successors of \p From and \p From from the predecessors of \p To.
1506
38.5k
  static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
1507
38.5k
    assert(To && "Successor to disconnect is null.");
1508
38.5k
    From->removeSuccessor(To);
1509
38.5k
    To->removePredecessor(From);
1510
38.5k
  }
1511
1512
  /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
1513
  static bool isBackEdge(const VPBlockBase *FromBlock,
1514
19
                         const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
1515
19
    assert(FromBlock->getParent() == ToBlock->getParent() &&
1516
19
           FromBlock->getParent() && "Must be in same region");
1517
19
    const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
1518
19
    const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
1519
19
    if (!FromLoop || !ToLoop || FromLoop != ToLoop)
1520
0
      return false;
1521
19
1522
19
    // A back-edge is a branch from the loop latch to its header.
1523
19
    return ToLoop->isLoopLatch(FromBlock) && 
ToBlock == ToLoop->getHeader()0
;
1524
19
  }
1525
1526
  /// Returns true if \p Block is a loop latch
1527
  static bool blockIsLoopLatch(const VPBlockBase *Block,
1528
12
                               const VPLoopInfo *VPLInfo) {
1529
12
    if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
1530
12
      return ParentVPL->isLoopLatch(Block);
1531
0
1532
0
    return false;
1533
0
  }
1534
1535
  /// Count and return the number of succesors of \p PredBlock excluding any
1536
  /// backedges.
1537
  static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
1538
7
                                      VPLoopInfo *VPLI) {
1539
7
    unsigned Count = 0;
1540
12
    for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
1541
12
      if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
1542
12
        Count++;
1543
12
    }
1544
7
    return Count;
1545
7
  }
1546
};
1547
1548
class VPInterleavedAccessInfo {
1549
private:
1550
  DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
1551
      InterleaveGroupMap;
1552
1553
  /// Type for mapping of instruction based interleave groups to VPInstruction
1554
  /// interleave groups
1555
  using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
1556
                             InterleaveGroup<VPInstruction> *>;
1557
1558
  /// Recursively \p Region and populate VPlan based interleave groups based on
1559
  /// \p IAI.
1560
  void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
1561
                   InterleavedAccessInfo &IAI);
1562
  /// Recursively traverse \p Block and populate VPlan based interleave groups
1563
  /// based on \p IAI.
1564
  void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1565
                  InterleavedAccessInfo &IAI);
1566
1567
public:
1568
  VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
1569
1570
  ~VPInterleavedAccessInfo() {
1571
    SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
1572
    // Avoid releasing a pointer twice.
1573
    for (auto &I : InterleaveGroupMap)
1574
      DelSet.insert(I.second);
1575
    for (auto *Ptr : DelSet)
1576
      delete Ptr;
1577
  }
1578
1579
  /// Get the interleave group that \p Instr belongs to.
1580
  ///
1581
  /// \returns nullptr if doesn't have such group.
1582
  InterleaveGroup<VPInstruction> *
1583
  getInterleaveGroup(VPInstruction *Instr) const {
1584
    if (InterleaveGroupMap.count(Instr))
1585
      return InterleaveGroupMap.find(Instr)->second;
1586
    return nullptr;
1587
  }
1588
};
1589
1590
/// Class that maps (parts of) an existing VPlan to trees of combined
1591
/// VPInstructions.
1592
class VPlanSlp {
1593
private:
1594
  enum class OpMode { Failed, Load, Opcode };
1595
1596
  /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
1597
  /// DenseMap keys.
1598
  struct BundleDenseMapInfo {
1599
    static SmallVector<VPValue *, 4> getEmptyKey() {
1600
      return {reinterpret_cast<VPValue *>(-1)};
1601
    }
1602
1603
    static SmallVector<VPValue *, 4> getTombstoneKey() {
1604
      return {reinterpret_cast<VPValue *>(-2)};
1605
    }
1606
1607
    static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
1608
      return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
1609
    }
1610
1611
    static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
1612
                        const SmallVector<VPValue *, 4> &RHS) {
1613
      return LHS == RHS;
1614
    }
1615
  };
1616
1617
  /// Mapping of values in the original VPlan to a combined VPInstruction.
1618
  DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
1619
      BundleToCombined;
1620
1621
  VPInterleavedAccessInfo &IAI;
1622
1623
  /// Basic block to operate on. For now, only instructions in a single BB are
1624
  /// considered.
1625
  const VPBasicBlock &BB;
1626
1627
  /// Indicates whether we managed to combine all visited instructions or not.
1628
  bool CompletelySLP = true;
1629
1630
  /// Width of the widest combined bundle in bits.
1631
  unsigned WidestBundleBits = 0;
1632
1633
  using MultiNodeOpTy =
1634
      typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
1635
1636
  // Input operand bundles for the current multi node. Each multi node operand
1637
  // bundle contains values not matching the multi node's opcode. They will
1638
  // be reordered in reorderMultiNodeOps, once we completed building a
1639
  // multi node.
1640
  SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
1641
1642
  /// Indicates whether we are building a multi node currently.
1643
  bool MultiNodeActive = false;
1644
1645
  /// Check if we can vectorize Operands together.
1646
  bool areVectorizable(ArrayRef<VPValue *> Operands) const;
1647
1648
  /// Add combined instruction \p New for the bundle \p Operands.
1649
  void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
1650
1651
  /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
1652
  VPInstruction *markFailed();
1653
1654
  /// Reorder operands in the multi node to maximize sequential memory access
1655
  /// and commutative operations.
1656
  SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
1657
1658
  /// Choose the best candidate to use for the lane after \p Last. The set of
1659
  /// candidates to choose from are values with an opcode matching \p Last's
1660
  /// or loads consecutive to \p Last.
1661
  std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
1662
                                       SmallPtrSetImpl<VPValue *> &Candidates,
1663
                                       VPInterleavedAccessInfo &IAI);
1664
1665
  /// Print bundle \p Values to dbgs().
1666
  void dumpBundle(ArrayRef<VPValue *> Values);
1667
1668
public:
1669
  VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
1670
1671
  ~VPlanSlp() {
1672
    for (auto &KV : BundleToCombined)
1673
      delete KV.second;
1674
  }
1675
1676
  /// Tries to build an SLP tree rooted at \p Operands and returns a
1677
  /// VPInstruction combining \p Operands, if they can be combined.
1678
  VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
1679
1680
  /// Return the width of the widest combined bundle in bits.
1681
  unsigned getWidestBundleBits() const { return WidestBundleBits; }
1682
1683
  /// Return true if all visited instruction can be combined.
1684
  bool isCompletelySLP() const { return CompletelySLP; }
1685
};
1686
} // end namespace llvm
1687
1688
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H