Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Analysis/DependenceAnalysis.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// DependenceAnalysis is an LLVM pass that analyses dependences between memory
10
// accesses. Currently, it is an (incomplete) implementation of the approach
11
// described in
12
//
13
//            Practical Dependence Testing
14
//            Goff, Kennedy, Tseng
15
//            PLDI 1991
16
//
17
// There's a single entry point that analyzes the dependence between a pair
18
// of memory references in a function, returning either NULL, for no dependence,
19
// or a more-or-less detailed description of the dependence between them.
20
//
21
// Currently, the implementation cannot propagate constraints between
22
// coupled RDIV subscripts and lacks a multi-subscript MIV test.
23
// Both of these are conservative weaknesses;
24
// that is, not a source of correctness problems.
25
//
26
// Since Clang linearizes some array subscripts, the dependence
27
// analysis is using SCEV->delinearize to recover the representation of multiple
28
// subscripts, and thus avoid the more expensive and less precise MIV tests. The
29
// delinearization is controlled by the flag -da-delinearize.
30
//
31
// We should pay some careful attention to the possibility of integer overflow
32
// in the implementation of the various tests. This could happen with Add,
33
// Subtract, or Multiply, with both APInt's and SCEV's.
34
//
35
// Some non-linear subscript pairs can be handled by the GCD test
36
// (and perhaps other tests).
37
// Should explore how often these things occur.
38
//
39
// Finally, it seems like certain test cases expose weaknesses in the SCEV
40
// simplification, especially in the handling of sign and zero extensions.
41
// It could be useful to spend time exploring these.
42
//
43
// Please note that this is work in progress and the interface is subject to
44
// change.
45
//
46
//===----------------------------------------------------------------------===//
47
//                                                                            //
48
//                   In memory of Ken Kennedy, 1945 - 2007                    //
49
//                                                                            //
50
//===----------------------------------------------------------------------===//
51
52
#include "llvm/Analysis/DependenceAnalysis.h"
53
#include "llvm/ADT/STLExtras.h"
54
#include "llvm/ADT/Statistic.h"
55
#include "llvm/Analysis/AliasAnalysis.h"
56
#include "llvm/Analysis/LoopInfo.h"
57
#include "llvm/Analysis/ScalarEvolution.h"
58
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
59
#include "llvm/Analysis/ValueTracking.h"
60
#include "llvm/Config/llvm-config.h"
61
#include "llvm/IR/InstIterator.h"
62
#include "llvm/IR/Module.h"
63
#include "llvm/IR/Operator.h"
64
#include "llvm/Support/CommandLine.h"
65
#include "llvm/Support/Debug.h"
66
#include "llvm/Support/ErrorHandling.h"
67
#include "llvm/Support/raw_ostream.h"
68
69
using namespace llvm;
70
71
#define DEBUG_TYPE "da"
72
73
//===----------------------------------------------------------------------===//
74
// statistics
75
76
STATISTIC(TotalArrayPairs, "Array pairs tested");
77
STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
78
STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
79
STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
80
STATISTIC(ZIVapplications, "ZIV applications");
81
STATISTIC(ZIVindependence, "ZIV independence");
82
STATISTIC(StrongSIVapplications, "Strong SIV applications");
83
STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
84
STATISTIC(StrongSIVindependence, "Strong SIV independence");
85
STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
86
STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
87
STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
88
STATISTIC(ExactSIVapplications, "Exact SIV applications");
89
STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
90
STATISTIC(ExactSIVindependence, "Exact SIV independence");
91
STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
92
STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
93
STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
94
STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
95
STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
96
STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
97
STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
98
STATISTIC(DeltaApplications, "Delta applications");
99
STATISTIC(DeltaSuccesses, "Delta successes");
100
STATISTIC(DeltaIndependence, "Delta independence");
101
STATISTIC(DeltaPropagations, "Delta propagations");
102
STATISTIC(GCDapplications, "GCD applications");
103
STATISTIC(GCDsuccesses, "GCD successes");
104
STATISTIC(GCDindependence, "GCD independence");
105
STATISTIC(BanerjeeApplications, "Banerjee applications");
106
STATISTIC(BanerjeeIndependence, "Banerjee independence");
107
STATISTIC(BanerjeeSuccesses, "Banerjee successes");
108
109
static cl::opt<bool>
110
    Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore,
111
                cl::desc("Try to delinearize array references."));
112
static cl::opt<bool> DisableDelinearizationChecks(
113
    "da-disable-delinearization-checks", cl::init(false), cl::Hidden,
114
    cl::ZeroOrMore,
115
    cl::desc(
116
        "Disable checks that try to statically verify validity of "
117
        "delinearized subscripts. Enabling this option may result in incorrect "
118
        "dependence vectors for languages that allow the subscript of one "
119
        "dimension to underflow or overflow into another dimension."));
120
121
//===----------------------------------------------------------------------===//
122
// basics
123
124
DependenceAnalysis::Result
125
195
DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
126
195
  auto &AA = FAM.getResult<AAManager>(F);
127
195
  auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
128
195
  auto &LI = FAM.getResult<LoopAnalysis>(F);
129
195
  return DependenceInfo(&F, &AA, &SE, &LI);
130
195
}
131
132
AnalysisKey DependenceAnalysis::Key;
133
134
36.0k
INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
135
36.0k
                      "Dependence Analysis", true, true)
136
36.0k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
137
36.0k
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
138
36.0k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
139
36.0k
INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
140
                    true, true)
141
142
char DependenceAnalysisWrapperPass::ID = 0;
143
144
0
FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
145
0
  return new DependenceAnalysisWrapperPass();
146
0
}
147
148
298
bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {
149
298
  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
150
298
  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
151
298
  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
152
298
  info.reset(new DependenceInfo(&F, &AA, &SE, &LI));
153
298
  return false;
154
298
}
155
156
168
DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }
157
158
298
void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }
159
160
56
void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
161
56
  AU.setPreservesAll();
162
56
  AU.addRequiredTransitive<AAResultsWrapperPass>();
163
56
  AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
164
56
  AU.addRequiredTransitive<LoopInfoWrapperPass>();
165
56
}
166
167
168
// Used to test the dependence analyzer.
169
// Looks through the function, noting loads and stores.
170
// Calls depends() on every possible pair and prints out the result.
171
// Ignores all other instructions.
172
389
static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) {
173
389
  auto *F = DA->getFunction();
174
8.86k
  for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
175
8.47k
       ++SrcI) {
176
8.47k
    if (isa<StoreInst>(*SrcI) || 
isa<LoadInst>(*SrcI)7.74k
) {
177
1.13k
      for (inst_iterator DstI = SrcI, DstE = inst_end(F);
178
13.8k
           DstI != DstE; 
++DstI12.6k
) {
179
12.6k
        if (isa<StoreInst>(*DstI) || 
isa<LoadInst>(*DstI)10.8k
) {
180
2.78k
          OS << "da analyze - ";
181
2.78k
          if (auto D = DA->depends(&*SrcI, &*DstI, true)) {
182
1.47k
            D->dump(OS);
183
2.63k
            for (unsigned Level = 1; Level <= D->getLevels(); 
Level++1.15k
) {
184
1.15k
              if (D->isSplitable(Level)) {
185
6
                OS << "da analyze - split level = " << Level;
186
6
                OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
187
6
                OS << "!\n";
188
6
              }
189
1.15k
            }
190
1.47k
          }
191
1.30k
          else
192
1.30k
            OS << "none!\n";
193
2.78k
        }
194
12.6k
      }
195
1.13k
    }
196
8.47k
  }
197
389
}
198
199
void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
200
195
                                          const Module *) const {
201
195
  dumpExampleDependence(OS, info.get());
202
195
}
203
204
PreservedAnalyses
205
194
DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
206
194
  OS << "'Dependence Analysis' for function '" << F.getName() << "':\n";
207
194
  dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F));
208
194
  return PreservedAnalyses::all();
209
194
}
210
211
//===----------------------------------------------------------------------===//
212
// Dependence methods
213
214
// Returns true if this is an input dependence.
215
163
bool Dependence::isInput() const {
216
163
  return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
217
163
}
218
219
220
// Returns true if this is an output dependence.
221
432
bool Dependence::isOutput() const {
222
432
  return Src->mayWriteToMemory() && 
Dst->mayWriteToMemory()214
;
223
432
}
224
225
226
// Returns true if this is an flow (aka true)  dependence.
227
628
bool Dependence::isFlow() const {
228
628
  return Src->mayWriteToMemory() && 
Dst->mayReadFromMemory()410
;
229
628
}
230
231
232
// Returns true if this is an anti dependence.
233
218
bool Dependence::isAnti() const {
234
218
  return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
235
218
}
236
237
238
// Returns true if a particular level is scalar; that is,
239
// if no subscript in the source or destination mention the induction
240
// variable associated with the loop at this level.
241
// Leave this out of line, so it will serve as a virtual method anchor
242
0
bool Dependence::isScalar(unsigned level) const {
243
0
  return false;
244
0
}
245
246
247
//===----------------------------------------------------------------------===//
248
// FullDependence methods
249
250
FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
251
                               bool PossiblyLoopIndependent,
252
                               unsigned CommonLevels)
253
    : Dependence(Source, Destination), Levels(CommonLevels),
254
1.69k
      LoopIndependent(PossiblyLoopIndependent) {
255
1.69k
  Consistent = true;
256
1.69k
  if (CommonLevels)
257
1.53k
    DV = make_unique<DVEntry[]>(CommonLevels);
258
1.69k
}
259
260
// The rest are simple getters that hide the implementation.
261
262
// getDirection - Returns the direction associated with a particular level.
263
2.87k
unsigned FullDependence::getDirection(unsigned Level) const {
264
2.87k
  assert(0 < Level && Level <= Levels && "Level out of range");
265
2.87k
  return DV[Level - 1].Direction;
266
2.87k
}
267
268
269
// Returns the distance (or NULL) associated with a particular level.
270
1.20k
const SCEV *FullDependence::getDistance(unsigned Level) const {
271
1.20k
  assert(0 < Level && Level <= Levels && "Level out of range");
272
1.20k
  return DV[Level - 1].Distance;
273
1.20k
}
274
275
276
// Returns true if a particular level is scalar; that is,
277
// if no subscript in the source or destination mention the induction
278
// variable associated with the loop at this level.
279
1.11k
bool FullDependence::isScalar(unsigned Level) const {
280
1.11k
  assert(0 < Level && Level <= Levels && "Level out of range");
281
1.11k
  return DV[Level - 1].Scalar;
282
1.11k
}
283
284
285
// Returns true if peeling the first iteration from this loop
286
// will break this dependence.
287
1.15k
bool FullDependence::isPeelFirst(unsigned Level) const {
288
1.15k
  assert(0 < Level && Level <= Levels && "Level out of range");
289
1.15k
  return DV[Level - 1].PeelFirst;
290
1.15k
}
291
292
293
// Returns true if peeling the last iteration from this loop
294
// will break this dependence.
295
1.15k
bool FullDependence::isPeelLast(unsigned Level) const {
296
1.15k
  assert(0 < Level && Level <= Levels && "Level out of range");
297
1.15k
  return DV[Level - 1].PeelLast;
298
1.15k
}
299
300
301
// Returns true if splitting this loop will break the dependence.
302
2.31k
bool FullDependence::isSplitable(unsigned Level) const {
303
2.31k
  assert(0 < Level && Level <= Levels && "Level out of range");
304
2.31k
  return DV[Level - 1].Splitable;
305
2.31k
}
306
307
308
//===----------------------------------------------------------------------===//
309
// DependenceInfo::Constraint methods
310
311
// If constraint is a point <X, Y>, returns X.
312
// Otherwise assert.
313
6
const SCEV *DependenceInfo::Constraint::getX() const {
314
6
  assert(Kind == Point && "Kind should be Point");
315
6
  return A;
316
6
}
317
318
319
// If constraint is a point <X, Y>, returns Y.
320
// Otherwise assert.
321
6
const SCEV *DependenceInfo::Constraint::getY() const {
322
6
  assert(Kind == Point && "Kind should be Point");
323
6
  return B;
324
6
}
325
326
327
// If constraint is a line AX + BY = C, returns A.
328
// Otherwise assert.
329
12
const SCEV *DependenceInfo::Constraint::getA() const {
330
12
  assert((Kind == Line || Kind == Distance) &&
331
12
         "Kind should be Line (or Distance)");
332
12
  return A;
333
12
}
334
335
336
// If constraint is a line AX + BY = C, returns B.
337
// Otherwise assert.
338
12
const SCEV *DependenceInfo::Constraint::getB() const {
339
12
  assert((Kind == Line || Kind == Distance) &&
340
12
         "Kind should be Line (or Distance)");
341
12
  return B;
342
12
}
343
344
345
// If constraint is a line AX + BY = C, returns C.
346
// Otherwise assert.
347
8
const SCEV *DependenceInfo::Constraint::getC() const {
348
8
  assert((Kind == Line || Kind == Distance) &&
349
8
         "Kind should be Line (or Distance)");
350
8
  return C;
351
8
}
352
353
354
// If constraint is a distance, returns D.
355
// Otherwise assert.
356
0
const SCEV *DependenceInfo::Constraint::getD() const {
357
0
  assert(Kind == Distance && "Kind should be Distance");
358
0
  return SE->getNegativeSCEV(C);
359
0
}
360
361
362
// Returns the loop associated with this constraint.
363
4
const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {
364
4
  assert((Kind == Distance || Kind == Line || Kind == Point) &&
365
4
         "Kind should be Distance, Line, or Point");
366
4
  return AssociatedLoop;
367
4
}
368
369
void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,
370
2
                                          const Loop *CurLoop) {
371
2
  Kind = Point;
372
2
  A = X;
373
2
  B = Y;
374
2
  AssociatedLoop = CurLoop;
375
2
}
376
377
void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,
378
142
                                         const SCEV *CC, const Loop *CurLoop) {
379
142
  Kind = Line;
380
142
  A = AA;
381
142
  B = BB;
382
142
  C = CC;
383
142
  AssociatedLoop = CurLoop;
384
142
}
385
386
void DependenceInfo::Constraint::setDistance(const SCEV *D,
387
757
                                             const Loop *CurLoop) {
388
757
  Kind = Distance;
389
757
  A = SE->getOne(D->getType());
390
757
  B = SE->getNegativeSCEV(A);
391
757
  C = SE->getNegativeSCEV(D);
392
757
  AssociatedLoop = CurLoop;
393
757
}
394
395
0
void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
396
397
1.69k
void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) {
398
1.69k
  SE = NewSE;
399
1.69k
  Kind = Any;
400
1.69k
}
401
402
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
403
// For debugging purposes. Dumps the constraint out to OS.
404
LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
405
  if (isEmpty())
406
    OS << " Empty\n";
407
  else if (isAny())
408
    OS << " Any\n";
409
  else if (isPoint())
410
    OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
411
  else if (isDistance())
412
    OS << " Distance is " << *getD() <<
413
      " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
414
  else if (isLine())
415
    OS << " Line is " << *getA() << "*X + " <<
416
      *getB() << "*Y = " << *getC() << "\n";
417
  else
418
    llvm_unreachable("unknown constraint type in Constraint::dump");
419
}
420
#endif
421
422
423
// Updates X with the intersection
424
// of the Constraints X and Y. Returns true if X has changed.
425
// Corresponds to Figure 4 from the paper
426
//
427
//            Practical Dependence Testing
428
//            Goff, Kennedy, Tseng
429
//            PLDI 1991
430
4
bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
431
4
  ++DeltaApplications;
432
4
  LLVM_DEBUG(dbgs() << "\tintersect constraints\n");
433
4
  LLVM_DEBUG(dbgs() << "\t    X ="; X->dump(dbgs()));
434
4
  LLVM_DEBUG(dbgs() << "\t    Y ="; Y->dump(dbgs()));
435
4
  assert(!Y->isPoint() && "Y must not be a Point");
436
4
  if (X->isAny()) {
437
2
    if (Y->isAny())
438
0
      return false;
439
2
    *X = *Y;
440
2
    return true;
441
2
  }
442
2
  if (X->isEmpty())
443
0
    return false;
444
2
  if (Y->isEmpty()) {
445
0
    X->setEmpty();
446
0
    return true;
447
0
  }
448
2
449
2
  if (X->isDistance() && 
Y->isDistance()0
) {
450
0
    LLVM_DEBUG(dbgs() << "\t    intersect 2 distances\n");
451
0
    if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
452
0
      return false;
453
0
    if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
454
0
      X->setEmpty();
455
0
      ++DeltaSuccesses;
456
0
      return true;
457
0
    }
458
0
    // Hmmm, interesting situation.
459
0
    // I guess if either is constant, keep it and ignore the other.
460
0
    if (isa<SCEVConstant>(Y->getD())) {
461
0
      *X = *Y;
462
0
      return true;
463
0
    }
464
0
    return false;
465
0
  }
466
2
467
2
  // At this point, the pseudo-code in Figure 4 of the paper
468
2
  // checks if (X->isPoint() && Y->isPoint()).
469
2
  // This case can't occur in our implementation,
470
2
  // since a Point can only arise as the result of intersecting
471
2
  // two Line constraints, and the right-hand value, Y, is never
472
2
  // the result of an intersection.
473
2
  assert(!(X->isPoint() && Y->isPoint()) &&
474
2
         "We shouldn't ever see X->isPoint() && Y->isPoint()");
475
2
476
2
  if (X->isLine() && Y->isLine()) {
477
2
    LLVM_DEBUG(dbgs() << "\t    intersect 2 lines\n");
478
2
    const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
479
2
    const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
480
2
    if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
481
0
      // slopes are equal, so lines are parallel
482
0
      LLVM_DEBUG(dbgs() << "\t\tsame slope\n");
483
0
      Prod1 = SE->getMulExpr(X->getC(), Y->getB());
484
0
      Prod2 = SE->getMulExpr(X->getB(), Y->getC());
485
0
      if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
486
0
        return false;
487
0
      if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
488
0
        X->setEmpty();
489
0
        ++DeltaSuccesses;
490
0
        return true;
491
0
      }
492
0
      return false;
493
0
    }
494
2
    if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
495
2
      // slopes differ, so lines intersect
496
2
      LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n");
497
2
      const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
498
2
      const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
499
2
      const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
500
2
      const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
501
2
      const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
502
2
      const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
503
2
      const SCEVConstant *C1A2_C2A1 =
504
2
        dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
505
2
      const SCEVConstant *C1B2_C2B1 =
506
2
        dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
507
2
      const SCEVConstant *A1B2_A2B1 =
508
2
        dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
509
2
      const SCEVConstant *A2B1_A1B2 =
510
2
        dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
511
2
      if (!C1B2_C2B1 || !C1A2_C2A1 ||
512
2
          !A1B2_A2B1 || !A2B1_A1B2)
513
0
        return false;
514
2
      APInt Xtop = C1B2_C2B1->getAPInt();
515
2
      APInt Xbot = A1B2_A2B1->getAPInt();
516
2
      APInt Ytop = C1A2_C2A1->getAPInt();
517
2
      APInt Ybot = A2B1_A1B2->getAPInt();
518
2
      LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
519
2
      LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
520
2
      LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
521
2
      LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
522
2
      APInt Xq = Xtop; // these need to be initialized, even
523
2
      APInt Xr = Xtop; // though they're just going to be overwritten
524
2
      APInt::sdivrem(Xtop, Xbot, Xq, Xr);
525
2
      APInt Yq = Ytop;
526
2
      APInt Yr = Ytop;
527
2
      APInt::sdivrem(Ytop, Ybot, Yq, Yr);
528
2
      if (Xr != 0 || Yr != 0) {
529
0
        X->setEmpty();
530
0
        ++DeltaSuccesses;
531
0
        return true;
532
0
      }
533
2
      LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
534
2
      if (Xq.slt(0) || Yq.slt(0)) {
535
0
        X->setEmpty();
536
0
        ++DeltaSuccesses;
537
0
        return true;
538
0
      }
539
2
      if (const SCEVConstant *CUB =
540
0
          collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
541
0
        const APInt &UpperBound = CUB->getAPInt();
542
0
        LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
543
0
        if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
544
0
          X->setEmpty();
545
0
          ++DeltaSuccesses;
546
0
          return true;
547
0
        }
548
2
      }
549
2
      X->setPoint(SE->getConstant(Xq),
550
2
                  SE->getConstant(Yq),
551
2
                  X->getAssociatedLoop());
552
2
      ++DeltaSuccesses;
553
2
      return true;
554
2
    }
555
0
    return false;
556
0
  }
557
0
558
0
  // if (X->isLine() && Y->isPoint()) This case can't occur.
559
0
  assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
560
0
561
0
  if (X->isPoint() && Y->isLine()) {
562
0
    LLVM_DEBUG(dbgs() << "\t    intersect Point and Line\n");
563
0
    const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
564
0
    const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
565
0
    const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
566
0
    if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
567
0
      return false;
568
0
    if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
569
0
      X->setEmpty();
570
0
      ++DeltaSuccesses;
571
0
      return true;
572
0
    }
573
0
    return false;
574
0
  }
575
0
576
0
  llvm_unreachable("shouldn't reach the end of Constraint intersection");
577
0
  return false;
578
0
}
579
580
581
//===----------------------------------------------------------------------===//
582
// DependenceInfo methods
583
584
// For debugging purposes. Dumps a dependence to OS.
585
1.47k
void Dependence::dump(raw_ostream &OS) const {
586
1.47k
  bool Splitable = false;
587
1.47k
  if (isConfused())
588
845
    OS << "confused";
589
628
  else {
590
628
    if (isConsistent())
591
212
      OS << "consistent ";
592
628
    if (isFlow())
593
196
      OS << "flow";
594
432
    else if (isOutput())
595
214
      OS << "output";
596
218
    else if (isAnti())
597
55
      OS << "anti";
598
163
    else if (isInput())
599
163
      OS << "input";
600
628
    unsigned Levels = getLevels();
601
628
    OS << " [";
602
1.78k
    for (unsigned II = 1; II <= Levels; 
++II1.15k
) {
603
1.15k
      if (isSplitable(II))
604
6
        Splitable = true;
605
1.15k
      if (isPeelFirst(II))
606
14
        OS << 'p';
607
1.15k
      const SCEV *Distance = getDistance(II);
608
1.15k
      if (Distance)
609
86
        OS << *Distance;
610
1.07k
      else if (isScalar(II))
611
317
        OS << "S";
612
756
      else {
613
756
        unsigned Direction = getDirection(II);
614
756
        if (Direction == DVEntry::ALL)
615
554
          OS << "*";
616
202
        else {
617
202
          if (Direction & DVEntry::LT)
618
120
            OS << "<";
619
202
          if (Direction & DVEntry::EQ)
620
78
            OS << "=";
621
202
          if (Direction & DVEntry::GT)
622
100
            OS << ">";
623
202
        }
624
756
      }
625
1.15k
      if (isPeelLast(II))
626
4
        OS << 'p';
627
1.15k
      if (II < Levels)
628
554
        OS << " ";
629
1.15k
    }
630
628
    if (isLoopIndependent())
631
246
      OS << "|<";
632
628
    OS << "]";
633
628
    if (Splitable)
634
6
      OS << " splitable";
635
628
  }
636
1.47k
  OS << "!\n";
637
1.47k
}
638
639
// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
640
// underlaying objects. If LocA and LocB are known to not alias (for any reason:
641
// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
642
// Otherwise the underlying objects are checked to see if they point to
643
// different identifiable objects.
644
static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
645
                                          const DataLayout &DL,
646
                                          const MemoryLocation &LocA,
647
2.94k
                                          const MemoryLocation &LocB) {
648
2.94k
  // Check the original locations (minus size) for noalias, which can happen for
649
2.94k
  // tbaa, incompatible underlying object locations, etc.
650
2.94k
  MemoryLocation LocAS(LocA.Ptr, LocationSize::unknown(), LocA.AATags);
651
2.94k
  MemoryLocation LocBS(LocB.Ptr, LocationSize::unknown(), LocB.AATags);
652
2.94k
  if (AA->alias(LocAS, LocBS) == NoAlias)
653
411
    return NoAlias;
654
2.53k
655
2.53k
  // Check the underlying objects are the same
656
2.53k
  const Value *AObj = GetUnderlyingObject(LocA.Ptr, DL);
657
2.53k
  const Value *BObj = GetUnderlyingObject(LocB.Ptr, DL);
658
2.53k
659
2.53k
  // If the underlying objects are the same, they must alias
660
2.53k
  if (AObj == BObj)
661
1.68k
    return MustAlias;
662
851
663
851
  // We may have hit the recursion limit for underlying objects, or have
664
851
  // underlying objects where we don't know they will alias.
665
851
  if (!isIdentifiedObject(AObj) || 
!isIdentifiedObject(BObj)95
)
666
851
    return MayAlias;
667
0
668
0
  // Otherwise we know the objects are different and both identified objects so
669
0
  // must not alias.
670
0
  return NoAlias;
671
0
}
672
673
674
// Returns true if the load or store can be analyzed. Atomic and volatile
675
// operations have properties which this analysis does not understand.
676
static
677
5.89k
bool isLoadOrStore(const Instruction *I) {
678
5.89k
  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
679
2.19k
    return LI->isUnordered();
680
3.69k
  else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
681
3.69k
    return SI->isUnordered();
682
0
  return false;
683
0
}
684
685
686
// Examines the loop nesting of the Src and Dst
687
// instructions and establishes their shared loops. Sets the variables
688
// CommonLevels, SrcLevels, and MaxLevels.
689
// The source and destination instructions needn't be contained in the same
690
// loop. The routine establishNestingLevels finds the level of most deeply
691
// nested loop that contains them both, CommonLevels. An instruction that's
692
// not contained in a loop is at level = 0. MaxLevels is equal to the level
693
// of the source plus the level of the destination, minus CommonLevels.
694
// This lets us allocate vectors MaxLevels in length, with room for every
695
// distinct loop referenced in both the source and destination subscripts.
696
// The variable SrcLevels is the nesting depth of the source instruction.
697
// It's used to help calculate distinct loops referenced by the destination.
698
// Here's the map from loops to levels:
699
//            0 - unused
700
//            1 - outermost common loop
701
//          ... - other common loops
702
// CommonLevels - innermost common loop
703
//          ... - loops containing Src but not Dst
704
//    SrcLevels - innermost loop containing Src but not Dst
705
//          ... - loops containing Dst but not Src
706
//    MaxLevels - innermost loops containing Dst but not Src
707
// Consider the follow code fragment:
708
//   for (a = ...) {
709
//     for (b = ...) {
710
//       for (c = ...) {
711
//         for (d = ...) {
712
//           A[] = ...;
713
//         }
714
//       }
715
//       for (e = ...) {
716
//         for (f = ...) {
717
//           for (g = ...) {
718
//             ... = A[];
719
//           }
720
//         }
721
//       }
722
//     }
723
//   }
724
// If we're looking at the possibility of a dependence between the store
725
// to A (the Src) and the load from A (the Dst), we'll note that they
726
// have 2 loops in common, so CommonLevels will equal 2 and the direction
727
// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
728
// A map from loop names to loop numbers would look like
729
//     a - 1
730
//     b - 2 = CommonLevels
731
//     c - 3
732
//     d - 4 = SrcLevels
733
//     e - 5
734
//     f - 6
735
//     g - 7 = MaxLevels
736
void DependenceInfo::establishNestingLevels(const Instruction *Src,
737
1.69k
                                            const Instruction *Dst) {
738
1.69k
  const BasicBlock *SrcBlock = Src->getParent();
739
1.69k
  const BasicBlock *DstBlock = Dst->getParent();
740
1.69k
  unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
741
1.69k
  unsigned DstLevel = LI->getLoopDepth(DstBlock);
742
1.69k
  const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
743
1.69k
  const Loop *DstLoop = LI->getLoopFor(DstBlock);
744
1.69k
  SrcLevels = SrcLevel;
745
1.69k
  MaxLevels = SrcLevel + DstLevel;
746
1.71k
  while (SrcLevel > DstLevel) {
747
28
    SrcLoop = SrcLoop->getParentLoop();
748
28
    SrcLevel--;
749
28
  }
750
1.71k
  while (DstLevel > SrcLevel) {
751
27
    DstLoop = DstLoop->getParentLoop();
752
27
    DstLevel--;
753
27
  }
754
1.74k
  while (SrcLoop != DstLoop) {
755
56
    SrcLoop = SrcLoop->getParentLoop();
756
56
    DstLoop = DstLoop->getParentLoop();
757
56
    SrcLevel--;
758
56
  }
759
1.69k
  CommonLevels = SrcLevel;
760
1.69k
  MaxLevels -= CommonLevels;
761
1.69k
}
762
763
764
// Given one of the loops containing the source, return
765
// its level index in our numbering scheme.
766
3.94k
unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
767
3.94k
  return SrcLoop->getLoopDepth();
768
3.94k
}
769
770
771
// Given one of the loops containing the destination,
772
// return its level index in our numbering scheme.
773
2.96k
unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
774
2.96k
  unsigned D = DstLoop->getLoopDepth();
775
2.96k
  if (D > CommonLevels)
776
46
    return D - CommonLevels + SrcLevels;
777
2.91k
  else
778
2.91k
    return D;
779
2.96k
}
780
781
782
// Returns true if Expression is loop invariant in LoopNest.
783
bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
784
20.5k
                                     const Loop *LoopNest) const {
785
20.5k
  if (!LoopNest)
786
7.30k
    return true;
787
13.2k
  return SE->isLoopInvariant(Expression, LoopNest) &&
788
13.2k
    
isLoopInvariant(Expression, LoopNest->getParentLoop())13.1k
;
789
13.2k
}
790
791
792
793
// Finds the set of loops from the LoopNest that
794
// have a level <= CommonLevels and are referred to by the SCEV Expression.
795
void DependenceInfo::collectCommonLoops(const SCEV *Expression,
796
                                        const Loop *LoopNest,
797
228
                                        SmallBitVector &Loops) const {
798
643
  while (LoopNest) {
799
415
    unsigned Level = LoopNest->getLoopDepth();
800
415
    if (Level <= CommonLevels && 
!SE->isLoopInvariant(Expression, LoopNest)398
)
801
395
      Loops.set(Level);
802
415
    LoopNest = LoopNest->getParentLoop();
803
415
  }
804
228
}
805
806
182
void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
807
182
808
182
  unsigned widestWidthSeen = 0;
809
182
  Type *widestType;
810
182
811
182
  // Go through each pair and find the widest bit to which we need
812
182
  // to extend all of them.
813
184
  for (Subscript *Pair : Pairs) {
814
184
    const SCEV *Src = Pair->Src;
815
184
    const SCEV *Dst = Pair->Dst;
816
184
    IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
817
184
    IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
818
184
    if (SrcTy == nullptr || DstTy == nullptr) {
819
0
      assert(SrcTy == DstTy && "This function only unify integer types and "
820
0
             "expect Src and Dst share the same type "
821
0
             "otherwise.");
822
0
      continue;
823
0
    }
824
184
    if (SrcTy->getBitWidth() > widestWidthSeen) {
825
182
      widestWidthSeen = SrcTy->getBitWidth();
826
182
      widestType = SrcTy;
827
182
    }
828
184
    if (DstTy->getBitWidth() > widestWidthSeen) {
829
0
      widestWidthSeen = DstTy->getBitWidth();
830
0
      widestType = DstTy;
831
0
    }
832
184
  }
833
182
834
182
835
182
  assert(widestWidthSeen > 0);
836
182
837
182
  // Now extend each pair to the widest seen.
838
184
  for (Subscript *Pair : Pairs) {
839
184
    const SCEV *Src = Pair->Src;
840
184
    const SCEV *Dst = Pair->Dst;
841
184
    IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
842
184
    IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
843
184
    if (SrcTy == nullptr || DstTy == nullptr) {
844
0
      assert(SrcTy == DstTy && "This function only unify integer types and "
845
0
             "expect Src and Dst share the same type "
846
0
             "otherwise.");
847
0
      continue;
848
0
    }
849
184
    if (SrcTy->getBitWidth() < widestWidthSeen)
850
0
      // Sign-extend Src to widestType
851
0
      Pair->Src = SE->getSignExtendExpr(Src, widestType);
852
184
    if (DstTy->getBitWidth() < widestWidthSeen) {
853
0
      // Sign-extend Dst to widestType
854
0
      Pair->Dst = SE->getSignExtendExpr(Dst, widestType);
855
0
    }
856
184
  }
857
182
}
858
859
// removeMatchingExtensions - Examines a subscript pair.
860
// If the source and destination are identically sign (or zero)
861
// extended, it strips off the extension in an effect to simplify
862
// the actual analysis.
863
1.80k
void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
864
1.80k
  const SCEV *Src = Pair->Src;
865
1.80k
  const SCEV *Dst = Pair->Dst;
866
1.80k
  if ((isa<SCEVZeroExtendExpr>(Src) && 
isa<SCEVZeroExtendExpr>(Dst)0
) ||
867
1.80k
      (isa<SCEVSignExtendExpr>(Src) && 
isa<SCEVSignExtendExpr>(Dst)0
)) {
868
0
    const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
869
0
    const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
870
0
    const SCEV *SrcCastOp = SrcCast->getOperand();
871
0
    const SCEV *DstCastOp = DstCast->getOperand();
872
0
    if (SrcCastOp->getType() == DstCastOp->getType()) {
873
0
      Pair->Src = SrcCastOp;
874
0
      Pair->Dst = DstCastOp;
875
0
    }
876
0
  }
877
1.80k
}
878
879
880
// Examine the scev and return true iff it's linear.
881
// Collect any loops mentioned in the set of "Loops".
882
bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
883
3.78k
                                       SmallBitVector &Loops) {
884
3.78k
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src);
885
3.78k
  if (!AddRec)
886
1.80k
    return isLoopInvariant(Src, LoopNest);
887
1.98k
  const SCEV *Start = AddRec->getStart();
888
1.98k
  const SCEV *Step = AddRec->getStepRecurrence(*SE);
889
1.98k
  const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
890
1.98k
  if (!isa<SCEVCouldNotCompute>(UB)) {
891
1.97k
    if (SE->getTypeSizeInBits(Start->getType()) <
892
1.97k
        SE->getTypeSizeInBits(UB->getType())) {
893
0
      if (!AddRec->getNoWrapFlags())
894
0
        return false;
895
1.98k
    }
896
1.97k
  }
897
1.98k
  if (!isLoopInvariant(Step, LoopNest))
898
0
    return false;
899
1.98k
  Loops.set(mapSrcLoop(AddRec->getLoop()));
900
1.98k
  return checkSrcSubscript(Start, LoopNest, Loops);
901
1.98k
}
902
903
904
905
// Examine the scev and return true iff it's linear.
906
// Collect any loops mentioned in the set of "Loops".
907
bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
908
3.63k
                                       SmallBitVector &Loops) {
909
3.63k
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst);
910
3.63k
  if (!AddRec)
911
1.69k
    return isLoopInvariant(Dst, LoopNest);
912
1.93k
  const SCEV *Start = AddRec->getStart();
913
1.93k
  const SCEV *Step = AddRec->getStepRecurrence(*SE);
914
1.93k
  const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
915
1.93k
  if (!isa<SCEVCouldNotCompute>(UB)) {
916
1.92k
    if (SE->getTypeSizeInBits(Start->getType()) <
917
1.92k
        SE->getTypeSizeInBits(UB->getType())) {
918
0
      if (!AddRec->getNoWrapFlags())
919
0
        return false;
920
1.93k
    }
921
1.92k
  }
922
1.93k
  if (!isLoopInvariant(Step, LoopNest))
923
0
    return false;
924
1.93k
  Loops.set(mapDstLoop(AddRec->getLoop()));
925
1.93k
  return checkDstSubscript(Start, LoopNest, Loops);
926
1.93k
}
927
928
929
// Examines the subscript pair (the Src and Dst SCEVs)
930
// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
931
// Collects the associated loops in a set.
932
DependenceInfo::Subscript::ClassificationKind
933
DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
934
                             const SCEV *Dst, const Loop *DstLoopNest,
935
1.80k
                             SmallBitVector &Loops) {
936
1.80k
  SmallBitVector SrcLoops(MaxLevels + 1);
937
1.80k
  SmallBitVector DstLoops(MaxLevels + 1);
938
1.80k
  if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
939
112
    return Subscript::NonLinear;
940
1.69k
  if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
941
2
    return Subscript::NonLinear;
942
1.69k
  Loops = SrcLoops;
943
1.69k
  Loops |= DstLoops;
944
1.69k
  unsigned N = Loops.count();
945
1.69k
  if (N == 0)
946
274
    return Subscript::ZIV;
947
1.41k
  if (N == 1)
948
907
    return Subscript::SIV;
949
511
  if (N == 2 && 
(425
SrcLoops.count() == 0425
||
950
425
                 DstLoops.count() == 0 ||
951
425
                 
(411
SrcLoops.count() == 1411
&&
DstLoops.count() == 136
)))
952
44
    return Subscript::RDIV;
953
467
  return Subscript::MIV;
954
467
}
955
956
957
// A wrapper around SCEV::isKnownPredicate.
958
// Looks for cases where we're interested in comparing for equality.
959
// If both X and Y have been identically sign or zero extended,
960
// it strips off the (confusing) extensions before invoking
961
// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
962
// will be similarly updated.
963
//
964
// If SCEV::isKnownPredicate can't prove the predicate,
965
// we try simple subtraction, which seems to help in some cases
966
// involving symbolics.
967
bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,
968
14.1k
                                      const SCEV *Y) const {
969
14.1k
  if (Pred == CmpInst::ICMP_EQ ||
970
14.1k
      
Pred == CmpInst::ICMP_NE13.7k
) {
971
468
    if ((isa<SCEVSignExtendExpr>(X) &&
972
468
         
isa<SCEVSignExtendExpr>(Y)0
) ||
973
468
        (isa<SCEVZeroExtendExpr>(X) &&
974
468
         
isa<SCEVZeroExtendExpr>(Y)0
)) {
975
0
      const SCEVCastExpr *CX = cast<SCEVCastExpr>(X);
976
0
      const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y);
977
0
      const SCEV *Xop = CX->getOperand();
978
0
      const SCEV *Yop = CY->getOperand();
979
0
      if (Xop->getType() == Yop->getType()) {
980
0
        X = Xop;
981
0
        Y = Yop;
982
0
      }
983
0
    }
984
468
  }
985
14.1k
  if (SE->isKnownPredicate(Pred, X, Y))
986
2.50k
    return true;
987
11.6k
  // If SE->isKnownPredicate can't prove the condition,
988
11.6k
  // we try the brute-force approach of subtracting
989
11.6k
  // and testing the difference.
990
11.6k
  // By testing with SE->isKnownPredicate first, we avoid
991
11.6k
  // the possibility of overflow when the arguments are constants.
992
11.6k
  const SCEV *Delta = SE->getMinusSCEV(X, Y);
993
11.6k
  switch (Pred) {
994
11.6k
  case CmpInst::ICMP_EQ:
995
66
    return Delta->isZero();
996
11.6k
  case CmpInst::ICMP_NE:
997
8
    return SE->isKnownNonZero(Delta);
998
11.6k
  case CmpInst::ICMP_SGE:
999
2
    return SE->isKnownNonNegative(Delta);
1000
11.6k
  case CmpInst::ICMP_SLE:
1001
0
    return SE->isKnownNonPositive(Delta);
1002
11.6k
  case CmpInst::ICMP_SGT:
1003
10.8k
    return SE->isKnownPositive(Delta);
1004
11.6k
  case CmpInst::ICMP_SLT:
1005
785
    return SE->isKnownNegative(Delta);
1006
11.6k
  default:
1007
0
    llvm_unreachable("unexpected predicate in isKnownPredicate");
1008
11.6k
  }
1009
11.6k
}
1010
1011
/// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1))
1012
/// with some extra checking if S is an AddRec and we can prove less-than using
1013
/// the loop bounds.
1014
228
bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const {
1015
228
  // First unify to the same type
1016
228
  auto *SType = dyn_cast<IntegerType>(S->getType());
1017
228
  auto *SizeType = dyn_cast<IntegerType>(Size->getType());
1018
228
  if (!SType || !SizeType)
1019
0
    return false;
1020
228
  Type *MaxType =
1021
228
      (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : 
SizeType0
;
1022
228
  S = SE->getTruncateOrZeroExtend(S, MaxType);
1023
228
  Size = SE->getTruncateOrZeroExtend(Size, MaxType);
1024
228
1025
228
  // Special check for addrecs using BE taken count
1026
228
  const SCEV *Bound = SE->getMinusSCEV(S, Size);
1027
228
  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) {
1028
204
    if (AddRec->isAffine()) {
1029
204
      const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop());
1030
204
      if (!isa<SCEVCouldNotCompute>(BECount)) {
1031
204
        const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE);
1032
204
        if (SE->isKnownNegative(Limit))
1033
176
          return true;
1034
52
      }
1035
204
    }
1036
204
  }
1037
52
1038
52
  // Check using normal isKnownNegative
1039
52
  const SCEV *LimitedBound =
1040
52
      SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType())));
1041
52
  return SE->isKnownNegative(LimitedBound);
1042
52
}
1043
1044
250
bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const {
1045
250
  bool Inbounds = false;
1046
250
  if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr))
1047
250
    Inbounds = SrcGEP->isInBounds();
1048
250
  if (Inbounds) {
1049
250
    if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
1050
214
      if (AddRec->isAffine()) {
1051
214
        // We know S is for Ptr, the operand on a load/store, so doesn't wrap.
1052
214
        // If both parts are NonNegative, the end result will be NonNegative
1053
214
        if (SE->isKnownNonNegative(AddRec->getStart()) &&
1054
214
            
SE->isKnownNonNegative(AddRec->getOperand(1))204
)
1055
204
          return true;
1056
46
      }
1057
214
    }
1058
250
  }
1059
46
1060
46
  return SE->isKnownNonNegative(S);
1061
46
}
1062
1063
// All subscripts are all the same type.
1064
// Loop bound may be smaller (e.g., a char).
1065
// Should zero extend loop bound, since it's always >= 0.
1066
// This routine collects upper bound and extends or truncates if needed.
1067
// Truncating is safe when subscripts are known not to wrap. Cases without
1068
// nowrap flags should have been rejected earlier.
1069
// Return null if no bound available.
1070
4.62k
const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
1071
4.62k
  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
1072
4.59k
    const SCEV *UB = SE->getBackedgeTakenCount(L);
1073
4.59k
    return SE->getTruncateOrZeroExtend(UB, T);
1074
4.59k
  }
1075
28
  return nullptr;
1076
28
}
1077
1078
1079
// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
1080
// If the cast fails, returns NULL.
1081
const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,
1082
98
                                                              Type *T) const {
1083
98
  if (const SCEV *UB = collectUpperBound(L, T))
1084
98
    return dyn_cast<SCEVConstant>(UB);
1085
0
  return nullptr;
1086
0
}
1087
1088
1089
// testZIV -
1090
// When we have a pair of subscripts of the form [c1] and [c2],
1091
// where c1 and c2 are both loop invariant, we attack it using
1092
// the ZIV test. Basically, we test by comparing the two values,
1093
// but there are actually three possible results:
1094
// 1) the values are equal, so there's a dependence
1095
// 2) the values are different, so there's no dependence
1096
// 3) the values might be equal, so we have to assume a dependence.
1097
//
1098
// Return true if dependence disproved.
1099
bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
1100
274
                             FullDependence &Result) const {
1101
274
  LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
1102
274
  LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
1103
274
  ++ZIVapplications;
1104
274
  if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
1105
260
    LLVM_DEBUG(dbgs() << "    provably dependent\n");
1106
260
    return false; // provably dependent
1107
260
  }
1108
14
  if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
1109
6
    LLVM_DEBUG(dbgs() << "    provably independent\n");
1110
6
    ++ZIVindependence;
1111
6
    return true; // provably independent
1112
6
  }
1113
8
  LLVM_DEBUG(dbgs() << "    possibly dependent\n");
1114
8
  Result.Consistent = false;
1115
8
  return false; // possibly dependent
1116
8
}
1117
1118
1119
// strongSIVtest -
1120
// From the paper, Practical Dependence Testing, Section 4.2.1
1121
//
1122
// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
1123
// where i is an induction variable, c1 and c2 are loop invariant,
1124
//  and a is a constant, we can solve it exactly using the Strong SIV test.
1125
//
1126
// Can prove independence. Failing that, can compute distance (and direction).
1127
// In the presence of symbolic terms, we can sometimes make progress.
1128
//
1129
// If there's a dependence,
1130
//
1131
//    c1 + a*i = c2 + a*i'
1132
//
1133
// The dependence distance is
1134
//
1135
//    d = i' - i = (c1 - c2)/a
1136
//
1137
// A dependence only exists if d is an integer and abs(d) <= U, where U is the
1138
// loop's upper bound. If a dependence exists, the dependence direction is
1139
// defined as
1140
//
1141
//                { < if d > 0
1142
//    direction = { = if d = 0
1143
//                { > if d < 0
1144
//
1145
// Return true if dependence disproved.
1146
bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
1147
                                   const SCEV *DstConst, const Loop *CurLoop,
1148
                                   unsigned Level, FullDependence &Result,
1149
773
                                   Constraint &NewConstraint) const {
1150
773
  LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
1151
773
  LLVM_DEBUG(dbgs() << "\t    Coeff = " << *Coeff);
1152
773
  LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
1153
773
  LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst);
1154
773
  LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
1155
773
  LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst);
1156
773
  LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
1157
773
  ++StrongSIVapplications;
1158
773
  assert(0 < Level && Level <= CommonLevels && "level out of range");
1159
773
  Level--;
1160
773
1161
773
  const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1162
773
  LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta);
1163
773
  LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
1164
773
1165
773
  // check that |Delta| < iteration count
1166
773
  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1167
765
    LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound);
1168
765
    LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
1169
765
    const SCEV *AbsDelta =
1170
765
      SE->isKnownNonNegative(Delta) ? 
Delta733
:
SE->getNegativeSCEV(Delta)32
;
1171
765
    const SCEV *AbsCoeff =
1172
765
      SE->isKnownNonNegative(Coeff) ? 
Coeff661
:
SE->getNegativeSCEV(Coeff)104
;
1173
765
    const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
1174
765
    if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
1175
4
      // Distance greater than trip count - no dependence
1176
4
      ++StrongSIVindependence;
1177
4
      ++StrongSIVsuccesses;
1178
4
      return true;
1179
4
    }
1180
769
  }
1181
769
1182
769
  // Can we compute distance?
1183
769
  if (isa<SCEVConstant>(Delta) && 
isa<SCEVConstant>(Coeff)761
) {
1184
735
    APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();
1185
735
    APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();
1186
735
    APInt Distance  = ConstDelta; // these need to be initialized
1187
735
    APInt Remainder = ConstDelta;
1188
735
    APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
1189
735
    LLVM_DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
1190
735
    LLVM_DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1191
735
    // Make sure Coeff divides Delta exactly
1192
735
    if (Remainder != 0) {
1193
4
      // Coeff doesn't divide Distance, no dependence
1194
4
      ++StrongSIVindependence;
1195
4
      ++StrongSIVsuccesses;
1196
4
      return true;
1197
4
    }
1198
731
    Result.DV[Level].Distance = SE->getConstant(Distance);
1199
731
    NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
1200
731
    if (Distance.sgt(0))
1201
37
      Result.DV[Level].Direction &= Dependence::DVEntry::LT;
1202
694
    else if (Distance.slt(0))
1203
20
      Result.DV[Level].Direction &= Dependence::DVEntry::GT;
1204
674
    else
1205
674
      Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1206
731
    ++StrongSIVsuccesses;
1207
731
  }
1208
34
  else if (Delta->isZero()) {
1209
26
    // since 0/X == 0
1210
26
    Result.DV[Level].Distance = Delta;
1211
26
    NewConstraint.setDistance(Delta, CurLoop);
1212
26
    Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1213
26
    ++StrongSIVsuccesses;
1214
26
  }
1215
8
  else {
1216
8
    if (Coeff->isOne()) {
1217
0
      LLVM_DEBUG(dbgs() << "\t    Distance = " << *Delta << "\n");
1218
0
      Result.DV[Level].Distance = Delta; // since X/1 == X
1219
0
      NewConstraint.setDistance(Delta, CurLoop);
1220
0
    }
1221
8
    else {
1222
8
      Result.Consistent = false;
1223
8
      NewConstraint.setLine(Coeff,
1224
8
                            SE->getNegativeSCEV(Coeff),
1225
8
                            SE->getNegativeSCEV(Delta), CurLoop);
1226
8
    }
1227
8
1228
8
    // maybe we can get a useful direction
1229
8
    bool DeltaMaybeZero     = !SE->isKnownNonZero(Delta);
1230
8
    bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
1231
8
    bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
1232
8
    bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
1233
8
    bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
1234
8
    // The double negatives above are confusing.
1235
8
    // It helps to read !SE->isKnownNonZero(Delta)
1236
8
    // as "Delta might be Zero"
1237
8
    unsigned NewDirection = Dependence::DVEntry::NONE;
1238
8
    if ((DeltaMaybePositive && CoeffMaybePositive) ||
1239
8
        
(0
DeltaMaybeNegative0
&&
CoeffMaybeNegative0
))
1240
8
      NewDirection = Dependence::DVEntry::LT;
1241
8
    if (DeltaMaybeZero)
1242
8
      NewDirection |= Dependence::DVEntry::EQ;
1243
8
    if ((DeltaMaybeNegative && CoeffMaybePositive) ||
1244
8
        
(0
DeltaMaybePositive0
&&
CoeffMaybeNegative0
))
1245
8
      NewDirection |= Dependence::DVEntry::GT;
1246
8
    if (NewDirection < Result.DV[Level].Direction)
1247
0
      ++StrongSIVsuccesses;
1248
8
    Result.DV[Level].Direction &= NewDirection;
1249
8
  }
1250
769
  
return false765
;
1251
769
}
1252
1253
1254
// weakCrossingSIVtest -
1255
// From the paper, Practical Dependence Testing, Section 4.2.2
1256
//
1257
// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1258
// where i is an induction variable, c1 and c2 are loop invariant,
1259
// and a is a constant, we can solve it exactly using the
1260
// Weak-Crossing SIV test.
1261
//
1262
// Given c1 + a*i = c2 - a*i', we can look for the intersection of
1263
// the two lines, where i = i', yielding
1264
//
1265
//    c1 + a*i = c2 - a*i
1266
//    2a*i = c2 - c1
1267
//    i = (c2 - c1)/2a
1268
//
1269
// If i < 0, there is no dependence.
1270
// If i > upperbound, there is no dependence.
1271
// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1272
// If i = upperbound, there's a dependence with distance = 0.
1273
// If i is integral, there's a dependence (all directions).
1274
// If the non-integer part = 1/2, there's a dependence (<> directions).
1275
// Otherwise, there's no dependence.
1276
//
1277
// Can prove independence. Failing that,
1278
// can sometimes refine the directions.
1279
// Can determine iteration for splitting.
1280
//
1281
// Return true if dependence disproved.
1282
bool DependenceInfo::weakCrossingSIVtest(
1283
    const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
1284
    const Loop *CurLoop, unsigned Level, FullDependence &Result,
1285
22
    Constraint &NewConstraint, const SCEV *&SplitIter) const {
1286
22
  LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1287
22
  LLVM_DEBUG(dbgs() << "\t    Coeff = " << *Coeff << "\n");
1288
22
  LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1289
22
  LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1290
22
  ++WeakCrossingSIVapplications;
1291
22
  assert(0 < Level && Level <= CommonLevels && "Level out of range");
1292
22
  Level--;
1293
22
  Result.Consistent = false;
1294
22
  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1295
22
  LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1296
22
  NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
1297
22
  if (Delta->isZero()) {
1298
2
    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
1299
2
    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
1300
2
    ++WeakCrossingSIVsuccesses;
1301
2
    if (!Result.DV[Level].Direction) {
1302
0
      ++WeakCrossingSIVindependence;
1303
0
      return true;
1304
0
    }
1305
2
    Result.DV[Level].Distance = Delta; // = 0
1306
2
    return false;
1307
2
  }
1308
20
  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
1309
20
  if (!ConstCoeff)
1310
0
    return false;
1311
20
1312
20
  Result.DV[Level].Splitable = true;
1313
20
  if (SE->isKnownNegative(ConstCoeff)) {
1314
0
    ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
1315
0
    assert(ConstCoeff &&
1316
0
           "dynamic cast of negative of ConstCoeff should yield constant");
1317
0
    Delta = SE->getNegativeSCEV(Delta);
1318
0
  }
1319
20
  assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
1320
20
1321
20
  // compute SplitIter for use by DependenceInfo::getSplitIteration()
1322
20
  SplitIter = SE->getUDivExpr(
1323
20
      SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
1324
20
      SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
1325
20
  LLVM_DEBUG(dbgs() << "\t    Split iter = " << *SplitIter << "\n");
1326
20
1327
20
  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1328
20
  if (!ConstDelta)
1329
4
    return false;
1330
16
1331
16
  // We're certain that ConstCoeff > 0; therefore,
1332
16
  // if Delta < 0, then no dependence.
1333
16
  LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1334
16
  LLVM_DEBUG(dbgs() << "\t    ConstCoeff = " << *ConstCoeff << "\n");
1335
16
  if (SE->isKnownNegative(Delta)) {
1336
2
    // No dependence, Delta < 0
1337
2
    ++WeakCrossingSIVindependence;
1338
2
    ++WeakCrossingSIVsuccesses;
1339
2
    return true;
1340
2
  }
1341
14
1342
14
  // We're certain that Delta > 0 and ConstCoeff > 0.
1343
14
  // Check Delta/(2*ConstCoeff) against upper loop bound
1344
14
  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1345
14
    LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1346
14
    const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
1347
14
    const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
1348
14
                                    ConstantTwo);
1349
14
    LLVM_DEBUG(dbgs() << "\t    ML = " << *ML << "\n");
1350
14
    if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
1351
2
      // Delta too big, no dependence
1352
2
      ++WeakCrossingSIVindependence;
1353
2
      ++WeakCrossingSIVsuccesses;
1354
2
      return true;
1355
2
    }
1356
12
    if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
1357
2
      // i = i' = UB
1358
2
      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
1359
2
      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
1360
2
      ++WeakCrossingSIVsuccesses;
1361
2
      if (!Result.DV[Level].Direction) {
1362
0
        ++WeakCrossingSIVindependence;
1363
0
        return true;
1364
0
      }
1365
2
      Result.DV[Level].Splitable = false;
1366
2
      Result.DV[Level].Distance = SE->getZero(Delta->getType());
1367
2
      return false;
1368
2
    }
1369
12
  }
1370
10
1371
10
  // check that Coeff divides Delta
1372
10
  APInt APDelta = ConstDelta->getAPInt();
1373
10
  APInt APCoeff = ConstCoeff->getAPInt();
1374
10
  APInt Distance = APDelta; // these need to be initialzed
1375
10
  APInt Remainder = APDelta;
1376
10
  APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
1377
10
  LLVM_DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1378
10
  if (Remainder != 0) {
1379
2
    // Coeff doesn't divide Delta, no dependence
1380
2
    ++WeakCrossingSIVindependence;
1381
2
    ++WeakCrossingSIVsuccesses;
1382
2
    return true;
1383
2
  }
1384
8
  LLVM_DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
1385
8
1386
8
  // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1387
8
  APInt Two = APInt(Distance.getBitWidth(), 2, true);
1388
8
  Remainder = Distance.srem(Two);
1389
8
  LLVM_DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1390
8
  if (Remainder != 0) {
1391
8
    // Equal direction isn't possible
1392
8
    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
1393
8
    ++WeakCrossingSIVsuccesses;
1394
8
  }
1395
8
  return false;
1396
8
}
1397
1398
1399
// Kirch's algorithm, from
1400
//
1401
//        Optimizing Supercompilers for Supercomputers
1402
//        Michael Wolfe
1403
//        MIT Press, 1989
1404
//
1405
// Program 2.1, page 29.
1406
// Computes the GCD of AM and BM.
1407
// Also finds a solution to the equation ax - by = gcd(a, b).
1408
// Returns true if dependence disproved; i.e., gcd does not divide Delta.
1409
static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
1410
80
                    const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
1411
80
  APInt A0(Bits, 1, true), A1(Bits, 0, true);
1412
80
  APInt B0(Bits, 0, true), B1(Bits, 1, true);
1413
80
  APInt G0 = AM.abs();
1414
80
  APInt G1 = BM.abs();
1415
80
  APInt Q = G0; // these need to be initialized
1416
80
  APInt R = G0;
1417
80
  APInt::sdivrem(G0, G1, Q, R);
1418
152
  while (R != 0) {
1419
72
    APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
1420
72
    APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
1421
72
    G0 = G1; G1 = R;
1422
72
    APInt::sdivrem(G0, G1, Q, R);
1423
72
  }
1424
80
  G = G1;
1425
80
  LLVM_DEBUG(dbgs() << "\t    GCD = " << G << "\n");
1426
80
  X = AM.slt(0) ? 
-A120
:
A160
;
1427
80
  Y = BM.slt(0) ? 
B120
:
-B160
;
1428
80
1429
80
  // make sure gcd divides Delta
1430
80
  R = Delta.srem(G);
1431
80
  if (R != 0)
1432
8
    return true; // gcd doesn't divide Delta, no dependence
1433
72
  Q = Delta.sdiv(G);
1434
72
  X *= Q;
1435
72
  Y *= Q;
1436
72
  return false;
1437
72
}
1438
1439
216
static APInt floorOfQuotient(const APInt &A, const APInt &B) {
1440
216
  APInt Q = A; // these need to be initialized
1441
216
  APInt R = A;
1442
216
  APInt::sdivrem(A, B, Q, R);
1443
216
  if (R == 0)
1444
104
    return Q;
1445
112
  if ((A.sgt(0) && 
B.sgt(0)66
) ||
1446
112
      
(78
A.slt(0)78
&&
B.slt(0)46
))
1447
58
    return Q;
1448
54
  else
1449
54
    return Q - 1;
1450
112
}
1451
1452
216
static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
1453
216
  APInt Q = A; // these need to be initialized
1454
216
  APInt R = A;
1455
216
  APInt::sdivrem(A, B, Q, R);
1456
216
  if (R == 0)
1457
98
    return Q;
1458
118
  if ((A.sgt(0) && 
B.sgt(0)70
) ||
1459
118
      
(60
A.slt(0)60
&&
B.slt(0)48
))
1460
58
    return Q + 1;
1461
60
  else
1462
60
    return Q;
1463
118
}
1464
1465
1466
static
1467
216
APInt maxAPInt(APInt A, APInt B) {
1468
216
  return A.sgt(B) ? 
A44
:
B172
;
1469
216
}
1470
1471
1472
static
1473
216
APInt minAPInt(APInt A, APInt B) {
1474
216
  return A.slt(B) ? 
A40
:
B176
;
1475
216
}
1476
1477
1478
// exactSIVtest -
1479
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1480
// where i is an induction variable, c1 and c2 are loop invariant, and a1
1481
// and a2 are constant, we can solve it exactly using an algorithm developed
1482
// by Banerjee and Wolfe. See Section 2.5.3 in
1483
//
1484
//        Optimizing Supercompilers for Supercomputers
1485
//        Michael Wolfe
1486
//        MIT Press, 1989
1487
//
1488
// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1489
// so use them if possible. They're also a bit better with symbolics and,
1490
// in the case of the strong SIV test, can compute Distances.
1491
//
1492
// Return true if dependence disproved.
1493
bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1494
                                  const SCEV *SrcConst, const SCEV *DstConst,
1495
                                  const Loop *CurLoop, unsigned Level,
1496
                                  FullDependence &Result,
1497
72
                                  Constraint &NewConstraint) const {
1498
72
  LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
1499
72
  LLVM_DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
1500
72
  LLVM_DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
1501
72
  LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1502
72
  LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1503
72
  ++ExactSIVapplications;
1504
72
  assert(0 < Level && Level <= CommonLevels && "Level out of range");
1505
72
  Level--;
1506
72
  Result.Consistent = false;
1507
72
  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1508
72
  LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1509
72
  NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff),
1510
72
                        Delta, CurLoop);
1511
72
  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1512
72
  const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1513
72
  const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1514
72
  if (!ConstDelta || 
!ConstSrcCoeff54
||
!ConstDstCoeff54
)
1515
18
    return false;
1516
54
1517
54
  // find gcd
1518
54
  APInt G, X, Y;
1519
54
  APInt AM = ConstSrcCoeff->getAPInt();
1520
54
  APInt BM = ConstDstCoeff->getAPInt();
1521
54
  unsigned Bits = AM.getBitWidth();
1522
54
  if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
1523
6
    // gcd doesn't divide Delta, no dependence
1524
6
    ++ExactSIVindependence;
1525
6
    ++ExactSIVsuccesses;
1526
6
    return true;
1527
6
  }
1528
48
1529
48
  LLVM_DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
1530
48
1531
48
  // since SCEV construction normalizes, LM = 0
1532
48
  APInt UM(Bits, 1, true);
1533
48
  bool UMvalid = false;
1534
48
  // UM is perhaps unavailable, let's check
1535
48
  if (const SCEVConstant *CUB =
1536
48
      collectConstantUpperBound(CurLoop, Delta->getType())) {
1537
48
    UM = CUB->getAPInt();
1538
48
    LLVM_DEBUG(dbgs() << "\t    UM = " << UM << "\n");
1539
48
    UMvalid = true;
1540
48
  }
1541
48
1542
48
  APInt TU(APInt::getSignedMaxValue(Bits));
1543
48
  APInt TL(APInt::getSignedMinValue(Bits));
1544
48
1545
48
  // test(BM/G, LM-X) and test(-BM/G, X-UM)
1546
48
  APInt TMUL = BM.sdiv(G);
1547
48
  if (TMUL.sgt(0)) {
1548
36
    TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
1549
36
    LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1550
36
    if (UMvalid) {
1551
36
      TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL));
1552
36
      LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1553
36
    }
1554
36
  }
1555
12
  else {
1556
12
    TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
1557
12
    LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1558
12
    if (UMvalid) {
1559
12
      TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL));
1560
12
      LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1561
12
    }
1562
12
  }
1563
48
1564
48
  // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
1565
48
  TMUL = AM.sdiv(G);
1566
48
  if (TMUL.sgt(0)) {
1567
36
    TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
1568
36
    LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1569
36
    if (UMvalid) {
1570
36
      TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL));
1571
36
      LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1572
36
    }
1573
36
  }
1574
12
  else {
1575
12
    TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
1576
12
    LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1577
12
    if (UMvalid) {
1578
12
      TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL));
1579
12
      LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1580
12
    }
1581
12
  }
1582
48
  if (TL.sgt(TU)) {
1583
12
    ++ExactSIVindependence;
1584
12
    ++ExactSIVsuccesses;
1585
12
    return true;
1586
12
  }
1587
36
1588
36
  // explore directions
1589
36
  unsigned NewDirection = Dependence::DVEntry::NONE;
1590
36
1591
36
  // less than
1592
36
  APInt SaveTU(TU); // save these
1593
36
  APInt SaveTL(TL);
1594
36
  LLVM_DEBUG(dbgs() << "\t    exploring LT direction\n");
1595
36
  TMUL = AM - BM;
1596
36
  if (TMUL.sgt(0)) {
1597
24
    TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL));
1598
24
    LLVM_DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1599
24
  }
1600
12
  else {
1601
12
    TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL));
1602
12
    LLVM_DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1603
12
  }
1604
36
  if (TL.sle(TU)) {
1605
26
    NewDirection |= Dependence::DVEntry::LT;
1606
26
    ++ExactSIVsuccesses;
1607
26
  }
1608
36
1609
36
  // equal
1610
36
  TU = SaveTU; // restore
1611
36
  TL = SaveTL;
1612
36
  LLVM_DEBUG(dbgs() << "\t    exploring EQ direction\n");
1613
36
  if (TMUL.sgt(0)) {
1614
24
    TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL));
1615
24
    LLVM_DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1616
24
  }
1617
12
  else {
1618
12
    TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL));
1619
12
    LLVM_DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1620
12
  }
1621
36
  TMUL = BM - AM;
1622
36
  if (TMUL.sgt(0)) {
1623
12
    TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL));
1624
12
    LLVM_DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1625
12
  }
1626
24
  else {
1627
24
    TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL));
1628
24
    LLVM_DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1629
24
  }
1630
36
  if (TL.sle(TU)) {
1631
2
    NewDirection |= Dependence::DVEntry::EQ;
1632
2
    ++ExactSIVsuccesses;
1633
2
  }
1634
36
1635
36
  // greater than
1636
36
  TU = SaveTU; // restore
1637
36
  TL = SaveTL;
1638
36
  LLVM_DEBUG(dbgs() << "\t    exploring GT direction\n");
1639
36
  if (TMUL.sgt(0)) {
1640
12
    TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL));
1641
12
    LLVM_DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1642
12
  }
1643
24
  else {
1644
24
    TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL));
1645
24
    LLVM_DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1646
24
  }
1647
36
  if (TL.sle(TU)) {
1648
8
    NewDirection |= Dependence::DVEntry::GT;
1649
8
    ++ExactSIVsuccesses;
1650
8
  }
1651
36
1652
36
  // finished
1653
36
  Result.DV[Level].Direction &= NewDirection;
1654
36
  if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
1655
0
    ++ExactSIVindependence;
1656
36
  return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
1657
36
}
1658
1659
1660
1661
// Return true if the divisor evenly divides the dividend.
1662
static
1663
bool isRemainderZero(const SCEVConstant *Dividend,
1664
12
                     const SCEVConstant *Divisor) {
1665
12
  const APInt &ConstDividend = Dividend->getAPInt();
1666
12
  const APInt &ConstDivisor = Divisor->getAPInt();
1667
12
  return ConstDividend.srem(ConstDivisor) == 0;
1668
12
}
1669
1670
1671
// weakZeroSrcSIVtest -
1672
// From the paper, Practical Dependence Testing, Section 4.2.2
1673
//
1674
// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1675
// where i is an induction variable, c1 and c2 are loop invariant,
1676
// and a is a constant, we can solve it exactly using the
1677
// Weak-Zero SIV test.
1678
//
1679
// Given
1680
//
1681
//    c1 = c2 + a*i
1682
//
1683
// we get
1684
//
1685
//    (c1 - c2)/a = i
1686
//
1687
// If i is not an integer, there's no dependence.
1688
// If i < 0 or > UB, there's no dependence.
1689
// If i = 0, the direction is >= and peeling the
1690
// 1st iteration will break the dependence.
1691
// If i = UB, the direction is <= and peeling the
1692
// last iteration will break the dependence.
1693
// Otherwise, the direction is *.
1694
//
1695
// Can prove independence. Failing that, we can sometimes refine
1696
// the directions. Can sometimes show that first or last
1697
// iteration carries all the dependences (so worth peeling).
1698
//
1699
// (see also weakZeroDstSIVtest)
1700
//
1701
// Return true if dependence disproved.
1702
bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
1703
                                        const SCEV *SrcConst,
1704
                                        const SCEV *DstConst,
1705
                                        const Loop *CurLoop, unsigned Level,
1706
                                        FullDependence &Result,
1707
20
                                        Constraint &NewConstraint) const {
1708
20
  // For the WeakSIV test, it's possible the loop isn't common to
1709
20
  // the Src and Dst loops. If it isn't, then there's no need to
1710
20
  // record a direction.
1711
20
  LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1712
20
  LLVM_DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << "\n");
1713
20
  LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1714
20
  LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1715
20
  ++WeakZeroSIVapplications;
1716
20
  assert(0 < Level && Level <= MaxLevels && "Level out of range");
1717
20
  Level--;
1718
20
  Result.Consistent = false;
1719
20
  const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1720
20
  NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
1721
20
                        CurLoop);
1722
20
  LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1723
20
  if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
1724
10
    if (Level < CommonLevels) {
1725
8
      Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1726
8
      Result.DV[Level].PeelFirst = true;
1727
8
      ++WeakZeroSIVsuccesses;
1728
8
    }
1729
10
    return false; // dependences caused by first iteration
1730
10
  }
1731
10
  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1732
10
  if (!ConstCoeff)
1733
0
    return false;
1734
10
  const SCEV *AbsCoeff =
1735
10
    SE->isKnownNegative(ConstCoeff) ?
1736
10
    
SE->getNegativeSCEV(ConstCoeff)0
: ConstCoeff;
1737
10
  const SCEV *NewDelta =
1738
10
    SE->isKnownNegative(ConstCoeff) ? 
SE->getNegativeSCEV(Delta)0
: Delta;
1739
10
1740
10
  // check that Delta/SrcCoeff < iteration count
1741
10
  // really check NewDelta < count*AbsCoeff
1742
10
  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1743
10
    LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1744
10
    const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1745
10
    if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
1746
2
      ++WeakZeroSIVindependence;
1747
2
      ++WeakZeroSIVsuccesses;
1748
2
      return true;
1749
2
    }
1750
8
    if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
1751
2
      // dependences caused by last iteration
1752
2
      if (Level < CommonLevels) {
1753
2
        Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1754
2
        Result.DV[Level].PeelLast = true;
1755
2
        ++WeakZeroSIVsuccesses;
1756
2
      }
1757
2
      return false;
1758
2
    }
1759
6
  }
1760
6
1761
6
  // check that Delta/SrcCoeff >= 0
1762
6
  // really check that NewDelta >= 0
1763
6
  if (SE->isKnownNegative(NewDelta)) {
1764
2
    // No dependence, newDelta < 0
1765
2
    ++WeakZeroSIVindependence;
1766
2
    ++WeakZeroSIVsuccesses;
1767
2
    return true;
1768
2
  }
1769
4
1770
4
  // if SrcCoeff doesn't divide Delta, then no dependence
1771
4
  if (isa<SCEVConstant>(Delta) &&
1772
4
      !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
1773
2
    ++WeakZeroSIVindependence;
1774
2
    ++WeakZeroSIVsuccesses;
1775
2
    return true;
1776
2
  }
1777
2
  return false;
1778
2
}
1779
1780
1781
// weakZeroDstSIVtest -
1782
// From the paper, Practical Dependence Testing, Section 4.2.2
1783
//
1784
// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
1785
// where i is an induction variable, c1 and c2 are loop invariant,
1786
// and a is a constant, we can solve it exactly using the
1787
// Weak-Zero SIV test.
1788
//
1789
// Given
1790
//
1791
//    c1 + a*i = c2
1792
//
1793
// we get
1794
//
1795
//    i = (c2 - c1)/a
1796
//
1797
// If i is not an integer, there's no dependence.
1798
// If i < 0 or > UB, there's no dependence.
1799
// If i = 0, the direction is <= and peeling the
1800
// 1st iteration will break the dependence.
1801
// If i = UB, the direction is >= and peeling the
1802
// last iteration will break the dependence.
1803
// Otherwise, the direction is *.
1804
//
1805
// Can prove independence. Failing that, we can sometimes refine
1806
// the directions. Can sometimes show that first or last
1807
// iteration carries all the dependences (so worth peeling).
1808
//
1809
// (see also weakZeroSrcSIVtest)
1810
//
1811
// Return true if dependence disproved.
1812
bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
1813
                                        const SCEV *SrcConst,
1814
                                        const SCEV *DstConst,
1815
                                        const Loop *CurLoop, unsigned Level,
1816
                                        FullDependence &Result,
1817
20
                                        Constraint &NewConstraint) const {
1818
20
  // For the WeakSIV test, it's possible the loop isn't common to the
1819
20
  // Src and Dst loops. If it isn't, then there's no need to record a direction.
1820
20
  LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1821
20
  LLVM_DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << "\n");
1822
20
  LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1823
20
  LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1824
20
  ++WeakZeroSIVapplications;
1825
20
  assert(0 < Level && Level <= SrcLevels && "Level out of range");
1826
20
  Level--;
1827
20
  Result.Consistent = false;
1828
20
  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1829
20
  NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
1830
20
                        CurLoop);
1831
20
  LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1832
20
  if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
1833
6
    if (Level < CommonLevels) {
1834
6
      Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1835
6
      Result.DV[Level].PeelFirst = true;
1836
6
      ++WeakZeroSIVsuccesses;
1837
6
    }
1838
6
    return false; // dependences caused by first iteration
1839
6
  }
1840
14
  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1841
14
  if (!ConstCoeff)
1842
0
    return false;
1843
14
  const SCEV *AbsCoeff =
1844
14
    SE->isKnownNegative(ConstCoeff) ?
1845
14
    
SE->getNegativeSCEV(ConstCoeff)0
: ConstCoeff;
1846
14
  const SCEV *NewDelta =
1847
14
    SE->isKnownNegative(ConstCoeff) ? 
SE->getNegativeSCEV(Delta)0
: Delta;
1848
14
1849
14
  // check that Delta/SrcCoeff < iteration count
1850
14
  // really check NewDelta < count*AbsCoeff
1851
14
  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1852
14
    LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1853
14
    const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1854
14
    if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
1855
2
      ++WeakZeroSIVindependence;
1856
2
      ++WeakZeroSIVsuccesses;
1857
2
      return true;
1858
2
    }
1859
12
    if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
1860
2
      // dependences caused by last iteration
1861
2
      if (Level < CommonLevels) {
1862
2
        Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1863
2
        Result.DV[Level].PeelLast = true;
1864
2
        ++WeakZeroSIVsuccesses;
1865
2
      }
1866
2
      return false;
1867
2
    }
1868
10
  }
1869
10
1870
10
  // check that Delta/SrcCoeff >= 0
1871
10
  // really check that NewDelta >= 0
1872
10
  if (SE->isKnownNegative(NewDelta)) {
1873
2
    // No dependence, newDelta < 0
1874
2
    ++WeakZeroSIVindependence;
1875
2
    ++WeakZeroSIVsuccesses;
1876
2
    return true;
1877
2
  }
1878
8
1879
8
  // if SrcCoeff doesn't divide Delta, then no dependence
1880
8
  if (isa<SCEVConstant>(Delta) &&
1881
8
      !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
1882
2
    ++WeakZeroSIVindependence;
1883
2
    ++WeakZeroSIVsuccesses;
1884
2
    return true;
1885
2
  }
1886
6
  return false;
1887
6
}
1888
1889
1890
// exactRDIVtest - Tests the RDIV subscript pair for dependence.
1891
// Things of the form [c1 + a*i] and [c2 + b*j],
1892
// where i and j are induction variable, c1 and c2 are loop invariant,
1893
// and a and b are constants.
1894
// Returns true if any possible dependence is disproved.
1895
// Marks the result as inconsistent.
1896
// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
1897
bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1898
                                   const SCEV *SrcConst, const SCEV *DstConst,
1899
                                   const Loop *SrcLoop, const Loop *DstLoop,
1900
44
                                   FullDependence &Result) const {
1901
44
  LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
1902
44
  LLVM_DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
1903
44
  LLVM_DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
1904
44
  LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1905
44
  LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1906
44
  ++ExactRDIVapplications;
1907
44
  Result.Consistent = false;
1908
44
  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1909
44
  LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1910
44
  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1911
44
  const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1912
44
  const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1913
44
  if (!ConstDelta || 
!ConstSrcCoeff26
||
!ConstDstCoeff26
)
1914
18
    return false;
1915
26
1916
26
  // find gcd
1917
26
  APInt G, X, Y;
1918
26
  APInt AM = ConstSrcCoeff->getAPInt();
1919
26
  APInt BM = ConstDstCoeff->getAPInt();
1920
26
  unsigned Bits = AM.getBitWidth();
1921
26
  if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
1922
2
    // gcd doesn't divide Delta, no dependence
1923
2
    ++ExactRDIVindependence;
1924
2
    return true;
1925
2
  }
1926
24
1927
24
  LLVM_DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
1928
24
1929
24
  // since SCEV construction seems to normalize, LM = 0
1930
24
  APInt SrcUM(Bits, 1, true);
1931
24
  bool SrcUMvalid = false;
1932
24
  // SrcUM is perhaps unavailable, let's check
1933
24
  if (const SCEVConstant *UpperBound =
1934
24
      collectConstantUpperBound(SrcLoop, Delta->getType())) {
1935
24
    SrcUM = UpperBound->getAPInt();
1936
24
    LLVM_DEBUG(dbgs() << "\t    SrcUM = " << SrcUM << "\n");
1937
24
    SrcUMvalid = true;
1938
24
  }
1939
24
1940
24
  APInt DstUM(Bits, 1, true);
1941
24
  bool DstUMvalid = false;
1942
24
  // UM is perhaps unavailable, let's check
1943
24
  if (const SCEVConstant *UpperBound =
1944
24
      collectConstantUpperBound(DstLoop, Delta->getType())) {
1945
24
    DstUM = UpperBound->getAPInt();
1946
24
    LLVM_DEBUG(dbgs() << "\t    DstUM = " << DstUM << "\n");
1947
24
    DstUMvalid = true;
1948
24
  }
1949
24
1950
24
  APInt TU(APInt::getSignedMaxValue(Bits));
1951
24
  APInt TL(APInt::getSignedMinValue(Bits));
1952
24
1953
24
  // test(BM/G, LM-X) and test(-BM/G, X-UM)
1954
24
  APInt TMUL = BM.sdiv(G);
1955
24
  if (TMUL.sgt(0)) {
1956
16
    TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
1957
16
    LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1958
16
    if (SrcUMvalid) {
1959
16
      TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL));
1960
16
      LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1961
16
    }
1962
16
  }
1963
8
  else {
1964
8
    TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
1965
8
    LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1966
8
    if (SrcUMvalid) {
1967
8
      TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL));
1968
8
      LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1969
8
    }
1970
8
  }
1971
24
1972
24
  // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
1973
24
  TMUL = AM.sdiv(G);
1974
24
  if (TMUL.sgt(0)) {
1975
16
    TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
1976
16
    LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1977
16
    if (DstUMvalid) {
1978
16
      TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL));
1979
16
      LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1980
16
    }
1981
16
  }
1982
8
  else {
1983
8
    TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
1984
8
    LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1985
8
    if (DstUMvalid) {
1986
8
      TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL));
1987
8
      LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1988
8
    }
1989
8
  }
1990
24
  if (TL.sgt(TU))
1991
18
    ++ExactRDIVindependence;
1992
24
  return TL.sgt(TU);
1993
24
}
1994
1995
1996
// symbolicRDIVtest -
1997
// In Section 4.5 of the Practical Dependence Testing paper,the authors
1998
// introduce a special case of Banerjee's Inequalities (also called the
1999
// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
2000
// particularly cases with symbolics. Since it's only able to disprove
2001
// dependence (not compute distances or directions), we'll use it as a
2002
// fall back for the other tests.
2003
//
2004
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2005
// where i and j are induction variables and c1 and c2 are loop invariants,
2006
// we can use the symbolic tests to disprove some dependences, serving as a
2007
// backup for the RDIV test. Note that i and j can be the same variable,
2008
// letting this test serve as a backup for the various SIV tests.
2009
//
2010
// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
2011
//  0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
2012
// loop bounds for the i and j loops, respectively. So, ...
2013
//
2014
// c1 + a1*i = c2 + a2*j
2015
// a1*i - a2*j = c2 - c1
2016
//
2017
// To test for a dependence, we compute c2 - c1 and make sure it's in the
2018
// range of the maximum and minimum possible values of a1*i - a2*j.
2019
// Considering the signs of a1 and a2, we have 4 possible cases:
2020
//
2021
// 1) If a1 >= 0 and a2 >= 0, then
2022
//        a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
2023
//              -a2*N2 <= c2 - c1 <= a1*N1
2024
//
2025
// 2) If a1 >= 0 and a2 <= 0, then
2026
//        a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
2027
//                  0 <= c2 - c1 <= a1*N1 - a2*N2
2028
//
2029
// 3) If a1 <= 0 and a2 >= 0, then
2030
//        a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
2031
//        a1*N1 - a2*N2 <= c2 - c1 <= 0
2032
//
2033
// 4) If a1 <= 0 and a2 <= 0, then
2034
//        a1*N1 - a2*0  <= c2 - c1 <= a1*0 - a2*N2
2035
//        a1*N1         <= c2 - c1 <=       -a2*N2
2036
//
2037
// return true if dependence disproved
2038
bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
2039
                                      const SCEV *C1, const SCEV *C2,
2040
                                      const Loop *Loop1,
2041
855
                                      const Loop *Loop2) const {
2042
855
  ++SymbolicRDIVapplications;
2043
855
  LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
2044
855
  LLVM_DEBUG(dbgs() << "\t    A1 = " << *A1);
2045
855
  LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
2046
855
  LLVM_DEBUG(dbgs() << "\t    A2 = " << *A2 << "\n");
2047
855
  LLVM_DEBUG(dbgs() << "\t    C1 = " << *C1 << "\n");
2048
855
  LLVM_DEBUG(dbgs() << "\t    C2 = " << *C2 << "\n");
2049
855
  const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
2050
855
  const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
2051
855
  LLVM_DEBUG(if (N1) dbgs() << "\t    N1 = " << *N1 << "\n");
2052
855
  LLVM_DEBUG(if (N2) dbgs() << "\t    N2 = " << *N2 << "\n");
2053
855
  const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
2054
855
  const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
2055
855
  LLVM_DEBUG(dbgs() << "\t    C2 - C1 = " << *C2_C1 << "\n");
2056
855
  LLVM_DEBUG(dbgs() << "\t    C1 - C2 = " << *C1_C2 << "\n");
2057
855
  if (SE->isKnownNonNegative(A1)) {
2058
719
    if (SE->isKnownNonNegative(A2)) {
2059
697
      // A1 >= 0 && A2 >= 0
2060
697
      if (N1) {
2061
693
        // make sure that c2 - c1 <= a1*N1
2062
693
        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2063
693
        LLVM_DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
2064
693
        if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
2065
4
          ++SymbolicRDIVindependence;
2066
4
          return true;
2067
4
        }
2068
693
      }
2069
693
      if (N2) {
2070
689
        // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
2071
689
        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2072
689
        LLVM_DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
2073
689
        if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
2074
4
          ++SymbolicRDIVindependence;
2075
4
          return true;
2076
4
        }
2077
22
      }
2078
22
    }
2079
22
    else if (SE->isKnownNonPositive(A2)) {
2080
22
      // a1 >= 0 && a2 <= 0
2081
22
      if (N1 && N2) {
2082
22
        // make sure that c2 - c1 <= a1*N1 - a2*N2
2083
22
        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2084
22
        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2085
22
        const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
2086
22
        LLVM_DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2087
22
        if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
2088
4
          ++SymbolicRDIVindependence;
2089
4
          return true;
2090
4
        }
2091
18
      }
2092
18
      // make sure that 0 <= c2 - c1
2093
18
      if (SE->isKnownNegative(C2_C1)) {
2094
0
        ++SymbolicRDIVindependence;
2095
0
        return true;
2096
0
      }
2097
136
    }
2098
136
  }
2099
136
  else if (SE->isKnownNonPositive(A1)) {
2100
104
    if (SE->isKnownNonNegative(A2)) {
2101
4
      // a1 <= 0 && a2 >= 0
2102
4
      if (N1 && N2) {
2103
4
        // make sure that a1*N1 - a2*N2 <= c2 - c1
2104
4
        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2105
4
        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2106
4
        const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
2107
4
        LLVM_DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2108
4
        if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
2109
4
          ++SymbolicRDIVindependence;
2110
4
          return true;
2111
4
        }
2112
0
      }
2113
0
      // make sure that c2 - c1 <= 0
2114
0
      if (SE->isKnownPositive(C2_C1)) {
2115
0
        ++SymbolicRDIVindependence;
2116
0
        return true;
2117
0
      }
2118
100
    }
2119
100
    else if (SE->isKnownNonPositive(A2)) {
2120
100
      // a1 <= 0 && a2 <= 0
2121
100
      if (N1) {
2122
100
        // make sure that a1*N1 <= c2 - c1
2123
100
        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2124
100
        LLVM_DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
2125
100
        if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
2126
4
          ++SymbolicRDIVindependence;
2127
4
          return true;
2128
4
        }
2129
96
      }
2130
96
      if (N2) {
2131
96
        // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
2132
96
        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2133
96
        LLVM_DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
2134
96
        if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
2135
6
          ++SymbolicRDIVindependence;
2136
6
          return true;
2137
6
        }
2138
829
      }
2139
96
    }
2140
104
  }
2141
829
  return false;
2142
829
}
2143
2144
2145
// testSIV -
2146
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
2147
// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
2148
// a2 are constant, we attack it with an SIV test. While they can all be
2149
// solved with the Exact SIV test, it's worthwhile to use simpler tests when
2150
// they apply; they're cheaper and sometimes more precise.
2151
//
2152
// Return true if dependence disproved.
2153
bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
2154
                             FullDependence &Result, Constraint &NewConstraint,
2155
907
                             const SCEV *&SplitIter) const {
2156
907
  LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
2157
907
  LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2158
907
  const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2159
907
  const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2160
907
  if (SrcAddRec && 
DstAddRec887
) {
2161
867
    const SCEV *SrcConst = SrcAddRec->getStart();
2162
867
    const SCEV *DstConst = DstAddRec->getStart();
2163
867
    const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2164
867
    const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2165
867
    const Loop *CurLoop = SrcAddRec->getLoop();
2166
867
    assert(CurLoop == DstAddRec->getLoop() &&
2167
867
           "both loops in SIV should be same");
2168
867
    Level = mapSrcLoop(CurLoop);
2169
867
    bool disproven;
2170
867
    if (SrcCoeff == DstCoeff)
2171
773
      disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2172
773
                                Level, Result, NewConstraint);
2173
94
    else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
2174
22
      disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2175
22
                                      Level, Result, NewConstraint, SplitIter);
2176
72
    else
2177
72
      disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
2178
72
                               Level, Result, NewConstraint);
2179
867
    return disproven ||
2180
867
      
gcdMIVtest(Src, Dst, Result)835
||
2181
867
      
symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop)831
;
2182
867
  }
2183
40
  if (SrcAddRec) {
2184
20
    const SCEV *SrcConst = SrcAddRec->getStart();
2185
20
    const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2186
20
    const SCEV *DstConst = Dst;
2187
20
    const Loop *CurLoop = SrcAddRec->getLoop();
2188
20
    Level = mapSrcLoop(CurLoop);
2189
20
    return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2190
20
                              Level, Result, NewConstraint) ||
2191
20
      
gcdMIVtest(Src, Dst, Result)14
;
2192
20
  }
2193
20
  if (DstAddRec) {
2194
20
    const SCEV *DstConst = DstAddRec->getStart();
2195
20
    const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2196
20
    const SCEV *SrcConst = Src;
2197
20
    const Loop *CurLoop = DstAddRec->getLoop();
2198
20
    Level = mapDstLoop(CurLoop);
2199
20
    return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
2200
20
                              CurLoop, Level, Result, NewConstraint) ||
2201
20
      
gcdMIVtest(Src, Dst, Result)14
;
2202
20
  }
2203
0
  llvm_unreachable("SIV test expected at least one AddRec");
2204
0
  return false;
2205
0
}
2206
2207
2208
// testRDIV -
2209
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2210
// where i and j are induction variables, c1 and c2 are loop invariant,
2211
// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2212
// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2213
// It doesn't make sense to talk about distance or direction in this case,
2214
// so there's no point in making special versions of the Strong SIV test or
2215
// the Weak-crossing SIV test.
2216
//
2217
// With minor algebra, this test can also be used for things like
2218
// [c1 + a1*i + a2*j][c2].
2219
//
2220
// Return true if dependence disproved.
2221
bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
2222
44
                              FullDependence &Result) const {
2223
44
  // we have 3 possible situations here:
2224
44
  //   1) [a*i + b] and [c*j + d]
2225
44
  //   2) [a*i + c*j + b] and [d]
2226
44
  //   3) [b] and [a*i + c*j + d]
2227
44
  // We need to find what we've got and get organized
2228
44
2229
44
  const SCEV *SrcConst, *DstConst;
2230
44
  const SCEV *SrcCoeff, *DstCoeff;
2231
44
  const Loop *SrcLoop, *DstLoop;
2232
44
2233
44
  LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
2234
44
  LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2235
44
  const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2236
44
  const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2237
44
  if (SrcAddRec && DstAddRec) {
2238
30
    SrcConst = SrcAddRec->getStart();
2239
30
    SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2240
30
    SrcLoop = SrcAddRec->getLoop();
2241
30
    DstConst = DstAddRec->getStart();
2242
30
    DstCoeff = DstAddRec->getStepRecurrence(*SE);
2243
30
    DstLoop = DstAddRec->getLoop();
2244
30
  }
2245
14
  else if (SrcAddRec) {
2246
14
    if (const SCEVAddRecExpr *tmpAddRec =
2247
14
        dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
2248
14
      SrcConst = tmpAddRec->getStart();
2249
14
      SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
2250
14
      SrcLoop = tmpAddRec->getLoop();
2251
14
      DstConst = Dst;
2252
14
      DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
2253
14
      DstLoop = SrcAddRec->getLoop();
2254
14
    }
2255
14
    else
2256
14
      
llvm_unreachable0
("RDIV reached by surprising SCEVs");
2257
14
  }
2258
0
  else if (DstAddRec) {
2259
0
    if (const SCEVAddRecExpr *tmpAddRec =
2260
0
        dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
2261
0
      DstConst = tmpAddRec->getStart();
2262
0
      DstCoeff = tmpAddRec->getStepRecurrence(*SE);
2263
0
      DstLoop = tmpAddRec->getLoop();
2264
0
      SrcConst = Src;
2265
0
      SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
2266
0
      SrcLoop = DstAddRec->getLoop();
2267
0
    }
2268
0
    else
2269
0
      llvm_unreachable("RDIV reached by surprising SCEVs");
2270
0
  }
2271
0
  else
2272
0
    llvm_unreachable("RDIV expected at least one AddRec");
2273
44
  return exactRDIVtest(SrcCoeff, DstCoeff,
2274
44
                       SrcConst, DstConst,
2275
44
                       SrcLoop, DstLoop,
2276
44
                       Result) ||
2277
44
    
gcdMIVtest(Src, Dst, Result)24
||
2278
44
    symbolicRDIVtest(SrcCoeff, DstCoeff,
2279
24
                     SrcConst, DstConst,
2280
24
                     SrcLoop, DstLoop);
2281
44
}
2282
2283
2284
// Tests the single-subscript MIV pair (Src and Dst) for dependence.
2285
// Return true if dependence disproved.
2286
// Can sometimes refine direction vectors.
2287
bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
2288
                             const SmallBitVector &Loops,
2289
467
                             FullDependence &Result) const {
2290
467
  LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
2291
467
  LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2292
467
  Result.Consistent = false;
2293
467
  return gcdMIVtest(Src, Dst, Result) ||
2294
467
    
banerjeeMIVtest(Src, Dst, Loops, Result)459
;
2295
467
}
2296
2297
2298
// Given a product, e.g., 10*X*Y, returns the first constant operand,
2299
// in this case 10. If there is no constant part, returns NULL.
2300
static
2301
4.80k
const SCEVConstant *getConstantPart(const SCEV *Expr) {
2302
4.80k
  if (const auto *Constant = dyn_cast<SCEVConstant>(Expr))
2303
4.55k
    return Constant;
2304
248
  else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr))
2305
248
    if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0)))
2306
246
      return Constant;
2307
2
  return nullptr;
2308
2
}
2309
2310
2311
//===----------------------------------------------------------------------===//
2312
// gcdMIVtest -
2313
// Tests an MIV subscript pair for dependence.
2314
// Returns true if any possible dependence is disproved.
2315
// Marks the result as inconsistent.
2316
// Can sometimes disprove the equal direction for 1 or more loops,
2317
// as discussed in Michael Wolfe's book,
2318
// High Performance Compilers for Parallel Computing, page 235.
2319
//
2320
// We spend some effort (code!) to handle cases like
2321
// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
2322
// but M and N are just loop-invariant variables.
2323
// This should help us handle linearized subscripts;
2324
// also makes this test a useful backup to the various SIV tests.
2325
//
2326
// It occurs to me that the presence of loop-invariant variables
2327
// changes the nature of the test from "greatest common divisor"
2328
// to "a common divisor".
2329
bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
2330
1.35k
                                FullDependence &Result) const {
2331
1.35k
  LLVM_DEBUG(dbgs() << "starting gcd\n");
2332
1.35k
  ++GCDapplications;
2333
1.35k
  unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
2334
1.35k
  APInt RunningGCD = APInt::getNullValue(BitWidth);
2335
1.35k
2336
1.35k
  // Examine Src coefficients.
2337
1.35k
  // Compute running GCD and record source constant.
2338
1.35k
  // Because we're looking for the constant at the end of the chain,
2339
1.35k
  // we can't quit the loop just because the GCD == 1.
2340
1.35k
  const SCEV *Coefficients = Src;
2341
3.25k
  while (const SCEVAddRecExpr *AddRec =
2342
1.89k
         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2343
1.89k
    const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2344
1.89k
    // If the coefficient is the product of a constant and other stuff,
2345
1.89k
    // we can use the constant in the GCD computation.
2346
1.89k
    const auto *Constant = getConstantPart(Coeff);
2347
1.89k
    if (!Constant)
2348
0
      return false;
2349
1.89k
    APInt ConstCoeff = Constant->getAPInt();
2350
1.89k
    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2351
1.89k
    Coefficients = AddRec->getStart();
2352
1.89k
  }
2353
1.35k
  const SCEV *SrcConst = Coefficients;
2354
1.35k
2355
1.35k
  // Examine Dst coefficients.
2356
1.35k
  // Compute running GCD and record destination constant.
2357
1.35k
  // Because we're looking for the constant at the end of the chain,
2358
1.35k
  // we can't quit the loop just because the GCD == 1.
2359
1.35k
  Coefficients = Dst;
2360
3.23k
  while (const SCEVAddRecExpr *AddRec =
2361
1.88k
         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2362
1.88k
    const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2363
1.88k
    // If the coefficient is the product of a constant and other stuff,
2364
1.88k
    // we can use the constant in the GCD computation.
2365
1.88k
    const auto *Constant = getConstantPart(Coeff);
2366
1.88k
    if (!Constant)
2367
0
      return false;
2368
1.88k
    APInt ConstCoeff = Constant->getAPInt();
2369
1.88k
    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2370
1.88k
    Coefficients = AddRec->getStart();
2371
1.88k
  }
2372
1.35k
  const SCEV *DstConst = Coefficients;
2373
1.35k
2374
1.35k
  APInt ExtraGCD = APInt::getNullValue(BitWidth);
2375
1.35k
  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
2376
1.35k
  LLVM_DEBUG(dbgs() << "    Delta = " << *Delta << "\n");
2377
1.35k
  const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
2378
1.35k
  if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
2379
30
    // If Delta is a sum of products, we may be able to make further progress.
2380
92
    for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; 
Op++62
) {
2381
62
      const SCEV *Operand = Sum->getOperand(Op);
2382
62
      if (isa<SCEVConstant>(Operand)) {
2383
28
        assert(!Constant && "Surprised to find multiple constants");
2384
28
        Constant = cast<SCEVConstant>(Operand);
2385
28
      }
2386
34
      else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
2387
34
        // Search for constant operand to participate in GCD;
2388
34
        // If none found; return false.
2389
34
        const SCEVConstant *ConstOp = getConstantPart(Product);
2390
34
        if (!ConstOp)
2391
0
          return false;
2392
34
        APInt ConstOpValue = ConstOp->getAPInt();
2393
34
        ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
2394
34
                                                   ConstOpValue.abs());
2395
34
      }
2396
0
      else
2397
0
        return false;
2398
62
    }
2399
30
  }
2400
1.35k
  if (!Constant)
2401
32
    return false;
2402
1.32k
  APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
2403
1.32k
  LLVM_DEBUG(dbgs() << "    ConstDelta = " << ConstDelta << "\n");
2404
1.32k
  if (ConstDelta == 0)
2405
1.06k
    return false;
2406
254
  RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
2407
254
  LLVM_DEBUG(dbgs() << "    RunningGCD = " << RunningGCD << "\n");
2408
254
  APInt Remainder = ConstDelta.srem(RunningGCD);
2409
254
  if (Remainder != 0) {
2410
12
    ++GCDindependence;
2411
12
    return true;
2412
12
  }
2413
242
2414
242
  // Try to disprove equal directions.
2415
242
  // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
2416
242
  // the code above can't disprove the dependence because the GCD = 1.
2417
242
  // So we consider what happen if i = i' and what happens if j = j'.
2418
242
  // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
2419
242
  // which is infeasible, so we can disallow the = direction for the i level.
2420
242
  // Setting j = j' doesn't help matters, so we end up with a direction vector
2421
242
  // of [<>, *]
2422
242
  //
2423
242
  // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
2424
242
  // we need to remember that the constant part is 5 and the RunningGCD should
2425
242
  // be initialized to ExtraGCD = 30.
2426
242
  LLVM_DEBUG(dbgs() << "    ExtraGCD = " << ExtraGCD << '\n');
2427
242
2428
242
  bool Improved = false;
2429
242
  Coefficients = Src;
2430
611
  while (const SCEVAddRecExpr *AddRec =
2431
369
         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2432
369
    Coefficients = AddRec->getStart();
2433
369
    const Loop *CurLoop = AddRec->getLoop();
2434
369
    RunningGCD = ExtraGCD;
2435
369
    const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
2436
369
    const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
2437
369
    const SCEV *Inner = Src;
2438
1.04k
    while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
2439
679
      AddRec = cast<SCEVAddRecExpr>(Inner);
2440
679
      const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2441
679
      if (CurLoop == AddRec->getLoop())
2442
369
        ; // SrcCoeff == Coeff
2443
310
      else {
2444
310
        // If the coefficient is the product of a constant and other stuff,
2445
310
        // we can use the constant in the GCD computation.
2446
310
        Constant = getConstantPart(Coeff);
2447
310
        if (!Constant)
2448
0
          return false;
2449
310
        APInt ConstCoeff = Constant->getAPInt();
2450
310
        RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2451
310
      }
2452
679
      Inner = AddRec->getStart();
2453
679
    }
2454
369
    Inner = Dst;
2455
1.00k
    while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
2456
637
      AddRec = cast<SCEVAddRecExpr>(Inner);
2457
637
      const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2458
637
      if (CurLoop == AddRec->getLoop())
2459
327
        DstCoeff = Coeff;
2460
310
      else {
2461
310
        // If the coefficient is the product of a constant and other stuff,
2462
310
        // we can use the constant in the GCD computation.
2463
310
        Constant = getConstantPart(Coeff);
2464
310
        if (!Constant)
2465
0
          return false;
2466
310
        APInt ConstCoeff = Constant->getAPInt();
2467
310
        RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2468
310
      }
2469
637
      Inner = AddRec->getStart();
2470
637
    }
2471
369
    Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
2472
369
    // If the coefficient is the product of a constant and other stuff,
2473
369
    // we can use the constant in the GCD computation.
2474
369
    Constant = getConstantPart(Delta);
2475
369
    if (!Constant)
2476
2
      // The difference of the two coefficients might not be a product
2477
2
      // or constant, in which case we give up on this direction.
2478
2
      continue;
2479
367
    APInt ConstCoeff = Constant->getAPInt();
2480
367
    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2481
367
    LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
2482
367
    if (RunningGCD != 0) {
2483
310
      Remainder = ConstDelta.srem(RunningGCD);
2484
310
      LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
2485
310
      if (Remainder != 0) {
2486
74
        unsigned Level = mapSrcLoop(CurLoop);
2487
74
        Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
2488
74
        Improved = true;
2489
74
      }
2490
310
    }
2491
367
  }
2492
242
  if (Improved)
2493
74
    ++GCDsuccesses;
2494
242
  LLVM_DEBUG(dbgs() << "all done\n");
2495
242
  return false;
2496
242
}
2497
2498
2499
//===----------------------------------------------------------------------===//
2500
// banerjeeMIVtest -
2501
// Use Banerjee's Inequalities to test an MIV subscript pair.
2502
// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2503
// Generally follows the discussion in Section 2.5.2 of
2504
//
2505
//    Optimizing Supercompilers for Supercomputers
2506
//    Michael Wolfe
2507
//
2508
// The inequalities given on page 25 are simplified in that loops are
2509
// normalized so that the lower bound is always 0 and the stride is always 1.
2510
// For example, Wolfe gives
2511
//
2512
//     LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2513
//
2514
// where A_k is the coefficient of the kth index in the source subscript,
2515
// B_k is the coefficient of the kth index in the destination subscript,
2516
// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2517
// index, and N_k is the stride of the kth index. Since all loops are normalized
2518
// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2519
// equation to
2520
//
2521
//     LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2522
//            = (A^-_k - B_k)^- (U_k - 1)  - B_k
2523
//
2524
// Similar simplifications are possible for the other equations.
2525
//
2526
// When we can't determine the number of iterations for a loop,
2527
// we use NULL as an indicator for the worst case, infinity.
2528
// When computing the upper bound, NULL denotes +inf;
2529
// for the lower bound, NULL denotes -inf.
2530
//
2531
// Return true if dependence disproved.
2532
bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
2533
                                     const SmallBitVector &Loops,
2534
459
                                     FullDependence &Result) const {
2535
459
  LLVM_DEBUG(dbgs() << "starting Banerjee\n");
2536
459
  ++BanerjeeApplications;
2537
459
  LLVM_DEBUG(dbgs() << "    Src = " << *Src << '\n');
2538
459
  const SCEV *A0;
2539
459
  CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
2540
459
  LLVM_DEBUG(dbgs() << "    Dst = " << *Dst << '\n');
2541
459
  const SCEV *B0;
2542
459
  CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
2543
459
  BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
2544
459
  const SCEV *Delta = SE->getMinusSCEV(B0, A0);
2545
459
  LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
2546
459
2547
459
  // Compute bounds for all the * directions.
2548
459
  LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
2549
1.53k
  for (unsigned K = 1; K <= MaxLevels; 
++K1.07k
) {
2550
1.07k
    Bound[K].Iterations = A[K].Iterations ? 
A[K].Iterations1.00k
:
B[K].Iterations75
;
2551
1.07k
    Bound[K].Direction = Dependence::DVEntry::ALL;
2552
1.07k
    Bound[K].DirSet = Dependence::DVEntry::NONE;
2553
1.07k
    findBoundsALL(A, B, Bound, K);
2554
#ifndef NDEBUG
2555
    LLVM_DEBUG(dbgs() << "\t    " << K << '\t');
2556
    if (Bound[K].Lower[Dependence::DVEntry::ALL])
2557
      LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
2558
    else
2559
      LLVM_DEBUG(dbgs() << "-inf\t");
2560
    if (Bound[K].Upper[Dependence::DVEntry::ALL])
2561
      LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
2562
    else
2563
      LLVM_DEBUG(dbgs() << "+inf\n");
2564
#endif
2565
  }
2566
459
2567
459
  // Test the *, *, *, ... case.
2568
459
  bool Disproved = false;
2569
459
  if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
2570
451
    // Explore the direction vector hierarchy.
2571
451
    unsigned DepthExpanded = 0;
2572
451
    unsigned NewDeps = exploreDirections(1, A, B, Bound,
2573
451
                                         Loops, DepthExpanded, Delta);
2574
451
    if (NewDeps > 0) {
2575
451
      bool Improved = false;
2576
1.49k
      for (unsigned K = 1; K <= CommonLevels; 
++K1.03k
) {
2577
1.03k
        if (Loops[K]) {
2578
988
          unsigned Old = Result.DV[K - 1].Direction;
2579
988
          Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
2580
988
          Improved |= Old != Result.DV[K - 1].Direction;
2581
988
          if (!Result.DV[K - 1].Direction) {
2582
0
            Improved = false;
2583
0
            Disproved = true;
2584
0
            break;
2585
0
          }
2586
988
        }
2587
1.03k
      }
2588
451
      if (Improved)
2589
310
        ++BanerjeeSuccesses;
2590
451
    }
2591
0
    else {
2592
0
      ++BanerjeeIndependence;
2593
0
      Disproved = true;
2594
0
    }
2595
451
  }
2596
8
  else {
2597
8
    ++BanerjeeIndependence;
2598
8
    Disproved = true;
2599
8
  }
2600
459
  delete [] Bound;
2601
459
  delete [] A;
2602
459
  delete [] B;
2603
459
  return Disproved;
2604
459
}
2605
2606
2607
// Hierarchically expands the direction vector
2608
// search space, combining the directions of discovered dependences
2609
// in the DirSet field of Bound. Returns the number of distinct
2610
// dependences discovered. If the dependence is disproved,
2611
// it will return 0.
2612
unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
2613
                                           CoefficientInfo *B, BoundInfo *Bound,
2614
                                           const SmallBitVector &Loops,
2615
                                           unsigned &DepthExpanded,
2616
4.16k
                                           const SCEV *Delta) const {
2617
4.16k
  if (Level > CommonLevels) {
2618
2.17k
    // record result
2619
2.17k
    LLVM_DEBUG(dbgs() << "\t[");
2620
8.86k
    for (unsigned K = 1; K <= CommonLevels; 
++K6.68k
) {
2621
6.68k
      if (Loops[K]) {
2622
6.38k
        Bound[K].DirSet |= Bound[K].Direction;
2623
#ifndef NDEBUG
2624
        switch (Bound[K].Direction) {
2625
        case Dependence::DVEntry::LT:
2626
          LLVM_DEBUG(dbgs() << " <");
2627
          break;
2628
        case Dependence::DVEntry::EQ:
2629
          LLVM_DEBUG(dbgs() << " =");
2630
          break;
2631
        case Dependence::DVEntry::GT:
2632
          LLVM_DEBUG(dbgs() << " >");
2633
          break;
2634
        case Dependence::DVEntry::ALL:
2635
          LLVM_DEBUG(dbgs() << " *");
2636
          break;
2637
        default:
2638
          llvm_unreachable("unexpected Bound[K].Direction");
2639
        }
2640
#endif
2641
      }
2642
6.68k
    }
2643
2.17k
    LLVM_DEBUG(dbgs() << " ]\n");
2644
2.17k
    return 1;
2645
2.17k
  }
2646
1.98k
  if (Loops[Level]) {
2647
1.91k
    if (Level > DepthExpanded) {
2648
988
      DepthExpanded = Level;
2649
988
      // compute bounds for <, =, > at current level
2650
988
      findBoundsLT(A, B, Bound, Level);
2651
988
      findBoundsGT(A, B, Bound, Level);
2652
988
      findBoundsEQ(A, B, Bound, Level);
2653
#ifndef NDEBUG
2654
      LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
2655
      LLVM_DEBUG(dbgs() << "\t    <\t");
2656
      if (Bound[Level].Lower[Dependence::DVEntry::LT])
2657
        LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
2658
                          << '\t');
2659
      else
2660
        LLVM_DEBUG(dbgs() << "-inf\t");
2661
      if (Bound[Level].Upper[Dependence::DVEntry::LT])
2662
        LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
2663
                          << '\n');
2664
      else
2665
        LLVM_DEBUG(dbgs() << "+inf\n");
2666
      LLVM_DEBUG(dbgs() << "\t    =\t");
2667
      if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2668
        LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
2669
                          << '\t');
2670
      else
2671
        LLVM_DEBUG(dbgs() << "-inf\t");
2672
      if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2673
        LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
2674
                          << '\n');
2675
      else
2676
        LLVM_DEBUG(dbgs() << "+inf\n");
2677
      LLVM_DEBUG(dbgs() << "\t    >\t");
2678
      if (Bound[Level].Lower[Dependence::DVEntry::GT])
2679
        LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
2680
                          << '\t');
2681
      else
2682
        LLVM_DEBUG(dbgs() << "-inf\t");
2683
      if (Bound[Level].Upper[Dependence::DVEntry::GT])
2684
        LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
2685
                          << '\n');
2686
      else
2687
        LLVM_DEBUG(dbgs() << "+inf\n");
2688
#endif
2689
    }
2690
1.91k
2691
1.91k
    unsigned NewDeps = 0;
2692
1.91k
2693
1.91k
    // test bounds for <, *, *, ...
2694
1.91k
    if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
2695
1.05k
      NewDeps += exploreDirections(Level + 1, A, B, Bound,
2696
1.05k
                                   Loops, DepthExpanded, Delta);
2697
1.91k
2698
1.91k
    // Test bounds for =, *, *, ...
2699
1.91k
    if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
2700
1.51k
      NewDeps += exploreDirections(Level + 1, A, B, Bound,
2701
1.51k
                                   Loops, DepthExpanded, Delta);
2702
1.91k
2703
1.91k
    // test bounds for >, *, *, ...
2704
1.91k
    if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
2705
1.06k
      NewDeps += exploreDirections(Level + 1, A, B, Bound,
2706
1.06k
                                   Loops, DepthExpanded, Delta);
2707
1.91k
2708
1.91k
    Bound[Level].Direction = Dependence::DVEntry::ALL;
2709
1.91k
    return NewDeps;
2710
1.91k
  }
2711
79
  else
2712
79
    return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
2713
1.98k
}
2714
2715
2716
// Returns true iff the current bounds are plausible.
2717
bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
2718
6.18k
                                BoundInfo *Bound, const SCEV *Delta) const {
2719
6.18k
  Bound[Level].Direction = DirKind;
2720
6.18k
  if (const SCEV *LowerBound = getLowerBound(Bound))
2721
6.18k
    if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
2722
1.05k
      return false;
2723
5.13k
  if (const SCEV *UpperBound = getUpperBound(Bound))
2724
5.13k
    if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
2725
1.04k
      return false;
2726
4.08k
  return true;
2727
4.08k
}
2728
2729
2730
// Computes the upper and lower bounds for level K
2731
// using the * direction. Records them in Bound.
2732
// Wolfe gives the equations
2733
//
2734
//    LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2735
//    UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2736
//
2737
// Since we normalize loops, we can simplify these equations to
2738
//
2739
//    LB^*_k = (A^-_k - B^+_k)U_k
2740
//    UB^*_k = (A^+_k - B^-_k)U_k
2741
//
2742
// We must be careful to handle the case where the upper bound is unknown.
2743
// Note that the lower bound is always <= 0
2744
// and the upper bound is always >= 0.
2745
void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,
2746
1.07k
                                   BoundInfo *Bound, unsigned K) const {
2747
1.07k
  Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity.
2748
1.07k
  Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity.
2749
1.07k
  if (Bound[K].Iterations) {
2750
1.02k
    Bound[K].Lower[Dependence::DVEntry::ALL] =
2751
1.02k
      SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
2752
1.02k
                     Bound[K].Iterations);
2753
1.02k
    Bound[K].Upper[Dependence::DVEntry::ALL] =
2754
1.02k
      SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
2755
1.02k
                     Bound[K].Iterations);
2756
1.02k
  }
2757
51
  else {
2758
51
    // If the difference is 0, we won't need to know the number of iterations.
2759
51
    if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
2760
51
      Bound[K].Lower[Dependence::DVEntry::ALL] =
2761
51
          SE->getZero(A[K].Coeff->getType());
2762
51
    if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
2763
51
      Bound[K].Upper[Dependence::DVEntry::ALL] =
2764
51
          SE->getZero(A[K].Coeff->getType());
2765
51
  }
2766
1.07k
}
2767
2768
2769
// Computes the upper and lower bounds for level K
2770
// using the = direction. Records them in Bound.
2771
// Wolfe gives the equations
2772
//
2773
//    LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2774
//    UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2775
//
2776
// Since we normalize loops, we can simplify these equations to
2777
//
2778
//    LB^=_k = (A_k - B_k)^- U_k
2779
//    UB^=_k = (A_k - B_k)^+ U_k
2780
//
2781
// We must be careful to handle the case where the upper bound is unknown.
2782
// Note that the lower bound is always <= 0
2783
// and the upper bound is always >= 0.
2784
void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,
2785
988
                                  BoundInfo *Bound, unsigned K) const {
2786
988
  Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity.
2787
988
  Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity.
2788
988
  if (Bound[K].Iterations) {
2789
988
    const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2790
988
    const SCEV *NegativePart = getNegativePart(Delta);
2791
988
    Bound[K].Lower[Dependence::DVEntry::EQ] =
2792
988
      SE->getMulExpr(NegativePart, Bound[K].Iterations);
2793
988
    const SCEV *PositivePart = getPositivePart(Delta);
2794
988
    Bound[K].Upper[Dependence::DVEntry::EQ] =
2795
988
      SE->getMulExpr(PositivePart, Bound[K].Iterations);
2796
988
  }
2797
0
  else {
2798
0
    // If the positive/negative part of the difference is 0,
2799
0
    // we won't need to know the number of iterations.
2800
0
    const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2801
0
    const SCEV *NegativePart = getNegativePart(Delta);
2802
0
    if (NegativePart->isZero())
2803
0
      Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
2804
0
    const SCEV *PositivePart = getPositivePart(Delta);
2805
0
    if (PositivePart->isZero())
2806
0
      Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
2807
0
  }
2808
988
}
2809
2810
2811
// Computes the upper and lower bounds for level K
2812
// using the < direction. Records them in Bound.
2813
// Wolfe gives the equations
2814
//
2815
//    LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2816
//    UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2817
//
2818
// Since we normalize loops, we can simplify these equations to
2819
//
2820
//    LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2821
//    UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2822
//
2823
// We must be careful to handle the case where the upper bound is unknown.
2824
void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,
2825
988
                                  BoundInfo *Bound, unsigned K) const {
2826
988
  Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
2827
988
  Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
2828
988
  if (Bound[K].Iterations) {
2829
988
    const SCEV *Iter_1 = SE->getMinusSCEV(
2830
988
        Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
2831
988
    const SCEV *NegPart =
2832
988
      getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2833
988
    Bound[K].Lower[Dependence::DVEntry::LT] =
2834
988
      SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
2835
988
    const SCEV *PosPart =
2836
988
      getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2837
988
    Bound[K].Upper[Dependence::DVEntry::LT] =
2838
988
      SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
2839
988
  }
2840
0
  else {
2841
0
    // If the positive/negative part of the difference is 0,
2842
0
    // we won't need to know the number of iterations.
2843
0
    const SCEV *NegPart =
2844
0
      getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2845
0
    if (NegPart->isZero())
2846
0
      Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2847
0
    const SCEV *PosPart =
2848
0
      getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2849
0
    if (PosPart->isZero())
2850
0
      Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2851
0
  }
2852
988
}
2853
2854
2855
// Computes the upper and lower bounds for level K
2856
// using the > direction. Records them in Bound.
2857
// Wolfe gives the equations
2858
//
2859
//    LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2860
//    UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2861
//
2862
// Since we normalize loops, we can simplify these equations to
2863
//
2864
//    LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2865
//    UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2866
//
2867
// We must be careful to handle the case where the upper bound is unknown.
2868
void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,
2869
988
                                  BoundInfo *Bound, unsigned K) const {
2870
988
  Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
2871
988
  Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
2872
988
  if (Bound[K].Iterations) {
2873
988
    const SCEV *Iter_1 = SE->getMinusSCEV(
2874
988
        Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
2875
988
    const SCEV *NegPart =
2876
988
      getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2877
988
    Bound[K].Lower[Dependence::DVEntry::GT] =
2878
988
      SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
2879
988
    const SCEV *PosPart =
2880
988
      getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2881
988
    Bound[K].Upper[Dependence::DVEntry::GT] =
2882
988
      SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
2883
988
  }
2884
0
  else {
2885
0
    // If the positive/negative part of the difference is 0,
2886
0
    // we won't need to know the number of iterations.
2887
0
    const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2888
0
    if (NegPart->isZero())
2889
0
      Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
2890
0
    const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2891
0
    if (PosPart->isZero())
2892
0
      Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
2893
0
  }
2894
988
}
2895
2896
2897
// X^+ = max(X, 0)
2898
4.97k
const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
2899
4.97k
  return SE->getSMaxExpr(X, SE->getZero(X->getType()));
2900
4.97k
}
2901
2902
2903
// X^- = min(X, 0)
2904
4.97k
const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
2905
4.97k
  return SE->getSMinExpr(X, SE->getZero(X->getType()));
2906
4.97k
}
2907
2908
2909
// Walks through the subscript,
2910
// collecting each coefficient, the associated loop bounds,
2911
// and recording its positive and negative parts for later use.
2912
DependenceInfo::CoefficientInfo *
2913
DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
2914
918
                                 const SCEV *&Constant) const {
2915
918
  const SCEV *Zero = SE->getZero(Subscript->getType());
2916
918
  CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
2917
3.07k
  for (unsigned K = 1; K <= MaxLevels; 
++K2.15k
) {
2918
2.15k
    CI[K].Coeff = Zero;
2919
2.15k
    CI[K].PosPart = Zero;
2920
2.15k
    CI[K].NegPart = Zero;
2921
2.15k
    CI[K].Iterations = nullptr;
2922
2.15k
  }
2923
2.92k
  while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
2924
2.00k
    const Loop *L = AddRec->getLoop();
2925
2.00k
    unsigned K = SrcFlag ? 
mapSrcLoop(L)1.00k
:
mapDstLoop(L)1.00k
;
2926
2.00k
    CI[K].Coeff = AddRec->getStepRecurrence(*SE);
2927
2.00k
    CI[K].PosPart = getPositivePart(CI[K].Coeff);
2928
2.00k
    CI[K].NegPart = getNegativePart(CI[K].Coeff);
2929
2.00k
    CI[K].Iterations = collectUpperBound(L, Subscript->getType());
2930
2.00k
    Subscript = AddRec->getStart();
2931
2.00k
  }
2932
918
  Constant = Subscript;
2933
#ifndef NDEBUG
2934
  LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
2935
  for (unsigned K = 1; K <= MaxLevels; ++K) {
2936
    LLVM_DEBUG(dbgs() << "\t    " << K << "\t" << *CI[K].Coeff);
2937
    LLVM_DEBUG(dbgs() << "\tPos Part = ");
2938
    LLVM_DEBUG(dbgs() << *CI[K].PosPart);
2939
    LLVM_DEBUG(dbgs() << "\tNeg Part = ");
2940
    LLVM_DEBUG(dbgs() << *CI[K].NegPart);
2941
    LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
2942
    if (CI[K].Iterations)
2943
      LLVM_DEBUG(dbgs() << *CI[K].Iterations);
2944
    else
2945
      LLVM_DEBUG(dbgs() << "+inf");
2946
    LLVM_DEBUG(dbgs() << '\n');
2947
  }
2948
  LLVM_DEBUG(dbgs() << "\t    Constant = " << *Subscript << '\n');
2949
#endif
2950
  return CI;
2951
918
}
2952
2953
2954
// Looks through all the bounds info and
2955
// computes the lower bound given the current direction settings
2956
// at each level. If the lower bound for any level is -inf,
2957
// the result is -inf.
2958
6.18k
const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {
2959
6.18k
  const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
2960
17.4k
  for (unsigned K = 2; Sum && K <= MaxLevels; 
++K11.2k
) {
2961
11.2k
    if (Bound[K].Lower[Bound[K].Direction])
2962
11.2k
      Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
2963
0
    else
2964
0
      Sum = nullptr;
2965
11.2k
  }
2966
6.18k
  return Sum;
2967
6.18k
}
2968
2969
2970
// Looks through all the bounds info and
2971
// computes the upper bound given the current direction settings
2972
// at each level. If the upper bound at any level is +inf,
2973
// the result is +inf.
2974
5.13k
const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {
2975
5.13k
  const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
2976
14.8k
  for (unsigned K = 2; Sum && K <= MaxLevels; 
++K9.74k
) {
2977
9.74k
    if (Bound[K].Upper[Bound[K].Direction])
2978
9.74k
      Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
2979
0
    else
2980
0
      Sum = nullptr;
2981
9.74k
  }
2982
5.13k
  return Sum;
2983
5.13k
}
2984
2985
2986
//===----------------------------------------------------------------------===//
2987
// Constraint manipulation for Delta test.
2988
2989
// Given a linear SCEV,
2990
// return the coefficient (the step)
2991
// corresponding to the specified loop.
2992
// If there isn't one, return 0.
2993
// For example, given a*i + b*j + c*k, finding the coefficient
2994
// corresponding to the j loop would yield b.
2995
const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr,
2996
0
                                            const Loop *TargetLoop) const {
2997
0
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
2998
0
  if (!AddRec)
2999
0
    return SE->getZero(Expr->getType());
3000
0
  if (AddRec->getLoop() == TargetLoop)
3001
0
    return AddRec->getStepRecurrence(*SE);
3002
0
  return findCoefficient(AddRec->getStart(), TargetLoop);
3003
0
}
3004
3005
3006
// Given a linear SCEV,
3007
// return the SCEV given by zeroing out the coefficient
3008
// corresponding to the specified loop.
3009
// For example, given a*i + b*j + c*k, zeroing the coefficient
3010
// corresponding to the j loop would yield a*i + c*k.
3011
const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr,
3012
0
                                            const Loop *TargetLoop) const {
3013
0
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
3014
0
  if (!AddRec)
3015
0
    return Expr; // ignore
3016
0
  if (AddRec->getLoop() == TargetLoop)
3017
0
    return AddRec->getStart();
3018
0
  return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
3019
0
                           AddRec->getStepRecurrence(*SE),
3020
0
                           AddRec->getLoop(),
3021
0
                           AddRec->getNoWrapFlags());
3022
0
}
3023
3024
3025
// Given a linear SCEV Expr,
3026
// return the SCEV given by adding some Value to the
3027
// coefficient corresponding to the specified TargetLoop.
3028
// For example, given a*i + b*j + c*k, adding 1 to the coefficient
3029
// corresponding to the j loop would yield a*i + (b+1)*j + c*k.
3030
const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr,
3031
                                             const Loop *TargetLoop,
3032
0
                                             const SCEV *Value) const {
3033
0
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
3034
0
  if (!AddRec) // create a new addRec
3035
0
    return SE->getAddRecExpr(Expr,
3036
0
                             Value,
3037
0
                             TargetLoop,
3038
0
                             SCEV::FlagAnyWrap); // Worst case, with no info.
3039
0
  if (AddRec->getLoop() == TargetLoop) {
3040
0
    const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
3041
0
    if (Sum->isZero())
3042
0
      return AddRec->getStart();
3043
0
    return SE->getAddRecExpr(AddRec->getStart(),
3044
0
                             Sum,
3045
0
                             AddRec->getLoop(),
3046
0
                             AddRec->getNoWrapFlags());
3047
0
  }
3048
0
  if (SE->isLoopInvariant(AddRec, TargetLoop))
3049
0
    return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap);
3050
0
  return SE->getAddRecExpr(
3051
0
      addToCoefficient(AddRec->getStart(), TargetLoop, Value),
3052
0
      AddRec->getStepRecurrence(*SE), AddRec->getLoop(),
3053
0
      AddRec->getNoWrapFlags());
3054
0
}
3055
3056
3057
// Review the constraints, looking for opportunities
3058
// to simplify a subscript pair (Src and Dst).
3059
// Return true if some simplification occurs.
3060
// If the simplification isn't exact (that is, if it is conservative
3061
// in terms of dependence), set consistent to false.
3062
// Corresponds to Figure 5 from the paper
3063
//
3064
//            Practical Dependence Testing
3065
//            Goff, Kennedy, Tseng
3066
//            PLDI 1991
3067
bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
3068
                               SmallBitVector &Loops,
3069
                               SmallVectorImpl<Constraint> &Constraints,
3070
0
                               bool &Consistent) {
3071
0
  bool Result = false;
3072
0
  for (unsigned LI : Loops.set_bits()) {
3073
0
    LLVM_DEBUG(dbgs() << "\t    Constraint[" << LI << "] is");
3074
0
    LLVM_DEBUG(Constraints[LI].dump(dbgs()));
3075
0
    if (Constraints[LI].isDistance())
3076
0
      Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
3077
0
    else if (Constraints[LI].isLine())
3078
0
      Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
3079
0
    else if (Constraints[LI].isPoint())
3080
0
      Result |= propagatePoint(Src, Dst, Constraints[LI]);
3081
0
  }
3082
0
  return Result;
3083
0
}
3084
3085
3086
// Attempt to propagate a distance
3087
// constraint into a subscript pair (Src and Dst).
3088
// Return true if some simplification occurs.
3089
// If the simplification isn't exact (that is, if it is conservative
3090
// in terms of dependence), set consistent to false.
3091
bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
3092
                                       Constraint &CurConstraint,
3093
0
                                       bool &Consistent) {
3094
0
  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3095
0
  LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3096
0
  const SCEV *A_K = findCoefficient(Src, CurLoop);
3097
0
  if (A_K->isZero())
3098
0
    return false;
3099
0
  const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
3100
0
  Src = SE->getMinusSCEV(Src, DA_K);
3101
0
  Src = zeroCoefficient(Src, CurLoop);
3102
0
  LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3103
0
  LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3104
0
  Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
3105
0
  LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3106
0
  if (!findCoefficient(Dst, CurLoop)->isZero())
3107
0
    Consistent = false;
3108
0
  return true;
3109
0
}
3110
3111
3112
// Attempt to propagate a line
3113
// constraint into a subscript pair (Src and Dst).
3114
// Return true if some simplification occurs.
3115
// If the simplification isn't exact (that is, if it is conservative
3116
// in terms of dependence), set consistent to false.
3117
bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
3118
                                   Constraint &CurConstraint,
3119
0
                                   bool &Consistent) {
3120
0
  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3121
0
  const SCEV *A = CurConstraint.getA();
3122
0
  const SCEV *B = CurConstraint.getB();
3123
0
  const SCEV *C = CurConstraint.getC();
3124
0
  LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C
3125
0
                    << "\n");
3126
0
  LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
3127
0
  LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
3128
0
  if (A->isZero()) {
3129
0
    const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
3130
0
    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3131
0
    if (!Bconst || !Cconst) return false;
3132
0
    APInt Beta = Bconst->getAPInt();
3133
0
    APInt Charlie = Cconst->getAPInt();
3134
0
    APInt CdivB = Charlie.sdiv(Beta);
3135
0
    assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
3136
0
    const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3137
0
    //    Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3138
0
    Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3139
0
    Dst = zeroCoefficient(Dst, CurLoop);
3140
0
    if (!findCoefficient(Src, CurLoop)->isZero())
3141
0
      Consistent = false;
3142
0
  }
3143
0
  else if (B->isZero()) {
3144
0
    const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3145
0
    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3146
0
    if (!Aconst || !Cconst) return false;
3147
0
    APInt Alpha = Aconst->getAPInt();
3148
0
    APInt Charlie = Cconst->getAPInt();
3149
0
    APInt CdivA = Charlie.sdiv(Alpha);
3150
0
    assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3151
0
    const SCEV *A_K = findCoefficient(Src, CurLoop);
3152
0
    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3153
0
    Src = zeroCoefficient(Src, CurLoop);
3154
0
    if (!findCoefficient(Dst, CurLoop)->isZero())
3155
0
      Consistent = false;
3156
0
  }
3157
0
  else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
3158
0
    const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3159
0
    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3160
0
    if (!Aconst || !Cconst) return false;
3161
0
    APInt Alpha = Aconst->getAPInt();
3162
0
    APInt Charlie = Cconst->getAPInt();
3163
0
    APInt CdivA = Charlie.sdiv(Alpha);
3164
0
    assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3165
0
    const SCEV *A_K = findCoefficient(Src, CurLoop);
3166
0
    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3167
0
    Src = zeroCoefficient(Src, CurLoop);
3168
0
    Dst = addToCoefficient(Dst, CurLoop, A_K);
3169
0
    if (!findCoefficient(Dst, CurLoop)->isZero())
3170
0
      Consistent = false;
3171
0
  }
3172
0
  else {
3173
0
    // paper is incorrect here, or perhaps just misleading
3174
0
    const SCEV *A_K = findCoefficient(Src, CurLoop);
3175
0
    Src = SE->getMulExpr(Src, A);
3176
0
    Dst = SE->getMulExpr(Dst, A);
3177
0
    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
3178
0
    Src = zeroCoefficient(Src, CurLoop);
3179
0
    Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
3180
0
    if (!findCoefficient(Dst, CurLoop)->isZero())
3181
0
      Consistent = false;
3182
0
  }
3183
0
  LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
3184
0
  LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
3185
0
  return true;
3186
0
}
3187
3188
3189
// Attempt to propagate a point
3190
// constraint into a subscript pair (Src and Dst).
3191
// Return true if some simplification occurs.
3192
bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
3193
0
                                    Constraint &CurConstraint) {
3194
0
  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3195
0
  const SCEV *A_K = findCoefficient(Src, CurLoop);
3196
0
  const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3197
0
  const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
3198
0
  const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
3199
0
  LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3200
0
  Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
3201
0
  Src = zeroCoefficient(Src, CurLoop);
3202
0
  LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3203
0
  LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3204
0
  Dst = zeroCoefficient(Dst, CurLoop);
3205
0
  LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3206
0
  return true;
3207
0
}
3208
3209
3210
// Update direction vector entry based on the current constraint.
3211
void DependenceInfo::updateDirection(Dependence::DVEntry &Level,
3212
2
                                     const Constraint &CurConstraint) const {
3213
2
  LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint =");
3214
2
  LLVM_DEBUG(CurConstraint.dump(dbgs()));
3215
2
  if (CurConstraint.isAny())
3216
0
    ; // use defaults
3217
2
  else if (CurConstraint.isDistance()) {
3218
0
    // this one is consistent, the others aren't
3219
0
    Level.Scalar = false;
3220
0
    Level.Distance = CurConstraint.getD();
3221
0
    unsigned NewDirection = Dependence::DVEntry::NONE;
3222
0
    if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
3223
0
      NewDirection = Dependence::DVEntry::EQ;
3224
0
    if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
3225
0
      NewDirection |= Dependence::DVEntry::LT;
3226
0
    if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
3227
0
      NewDirection |= Dependence::DVEntry::GT;
3228
0
    Level.Direction &= NewDirection;
3229
0
  }
3230
2
  else if (CurConstraint.isLine()) {
3231
0
    Level.Scalar = false;
3232
0
    Level.Distance = nullptr;
3233
0
    // direction should be accurate
3234
0
  }
3235
2
  else if (CurConstraint.isPoint()) {
3236
2
    Level.Scalar = false;
3237
2
    Level.Distance = nullptr;
3238
2
    unsigned NewDirection = Dependence::DVEntry::NONE;
3239
2
    if (!isKnownPredicate(CmpInst::ICMP_NE,
3240
2
                          CurConstraint.getY(),
3241
2
                          CurConstraint.getX()))
3242
0
      // if X may be = Y
3243
0
      NewDirection |= Dependence::DVEntry::EQ;
3244
2
    if (!isKnownPredicate(CmpInst::ICMP_SLE,
3245
2
                          CurConstraint.getY(),
3246
2
                          CurConstraint.getX()))
3247
0
      // if Y may be > X
3248
0
      NewDirection |= Dependence::DVEntry::LT;
3249
2
    if (!isKnownPredicate(CmpInst::ICMP_SGE,
3250
2
                          CurConstraint.getY(),
3251
2
                          CurConstraint.getX()))
3252
2
      // if Y may be < X
3253
2
      NewDirection |= Dependence::DVEntry::GT;
3254
2
    Level.Direction &= NewDirection;
3255
2
  }
3256
2
  else
3257
2
    
llvm_unreachable0
("constraint has unexpected kind");
3258
2
}
3259
3260
/// Check if we can delinearize the subscripts. If the SCEVs representing the
3261
/// source and destination array references are recurrences on a nested loop,
3262
/// this function flattens the nested recurrences into separate recurrences
3263
/// for each loop level.
3264
bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
3265
1.58k
                                    SmallVectorImpl<Subscript> &Pair) {
3266
1.58k
  assert(isLoadOrStore(Src) && "instruction is not load or store");
3267
1.58k
  assert(isLoadOrStore(Dst) && "instruction is not load or store");
3268
1.58k
  Value *SrcPtr = getLoadStorePointerOperand(Src);
3269
1.58k
  Value *DstPtr = getLoadStorePointerOperand(Dst);
3270
1.58k
3271
1.58k
  Loop *SrcLoop = LI->getLoopFor(Src->getParent());
3272
1.58k
  Loop *DstLoop = LI->getLoopFor(Dst->getParent());
3273
1.58k
3274
1.58k
  // Below code mimics the code in Delinearization.cpp
3275
1.58k
  const SCEV *SrcAccessFn =
3276
1.58k
    SE->getSCEVAtScope(SrcPtr, SrcLoop);
3277
1.58k
  const SCEV *DstAccessFn =
3278
1.58k
    SE->getSCEVAtScope(DstPtr, DstLoop);
3279
1.58k
3280
1.58k
  const SCEVUnknown *SrcBase =
3281
1.58k
      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3282
1.58k
  const SCEVUnknown *DstBase =
3283
1.58k
      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3284
1.58k
3285
1.58k
  if (!SrcBase || !DstBase || SrcBase != DstBase)
3286
0
    return false;
3287
1.58k
3288
1.58k
  const SCEV *ElementSize = SE->getElementSize(Src);
3289
1.58k
  if (ElementSize != SE->getElementSize(Dst))
3290
0
    return false;
3291
1.58k
3292
1.58k
  const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
3293
1.58k
  const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
3294
1.58k
3295
1.58k
  const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
3296
1.58k
  const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
3297
1.58k
  if (!SrcAR || 
!DstAR1.20k
||
!SrcAR->isAffine()1.17k
||
!DstAR->isAffine()1.17k
)
3298
414
    return false;
3299
1.17k
3300
1.17k
  // First step: collect parametric terms in both array references.
3301
1.17k
  SmallVector<const SCEV *, 4> Terms;
3302
1.17k
  SE->collectParametricTerms(SrcAR, Terms);
3303
1.17k
  SE->collectParametricTerms(DstAR, Terms);
3304
1.17k
3305
1.17k
  // Second step: find subscript sizes.
3306
1.17k
  SmallVector<const SCEV *, 4> Sizes;
3307
1.17k
  SE->findArrayDimensions(Terms, Sizes, ElementSize);
3308
1.17k
3309
1.17k
  // Third step: compute the access functions for each subscript.
3310
1.17k
  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
3311
1.17k
  SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
3312
1.17k
  SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
3313
1.17k
3314
1.17k
  // Fail when there is only a subscript: that's a linearized access function.
3315
1.17k
  if (SrcSubscripts.size() < 2 || 
DstSubscripts.size() < 2130
||
3316
1.17k
      
SrcSubscripts.size() != DstSubscripts.size()130
)
3317
1.04k
    return false;
3318
130
3319
130
  int size = SrcSubscripts.size();
3320
130
3321
130
  // Statically check that the array bounds are in-range. The first subscript we
3322
130
  // don't have a size for and it cannot overflow into another subscript, so is
3323
130
  // always safe. The others need to be 0 <= subscript[i] < bound, for both src
3324
130
  // and dst.
3325
130
  // FIXME: It may be better to record these sizes and add them as constraints
3326
130
  // to the dependency checks.
3327
130
  if (!DisableDelinearizationChecks)
3328
200
    
for (int i = 1; 112
i < size;
++i88
) {
3329
154
      if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
3330
18
        return false;
3331
136
3332
136
      if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
3333
40
        return false;
3334
96
3335
96
      if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
3336
4
        return false;
3337
92
3338
92
      if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
3339
4
        return false;
3340
92
    }
3341
130
3342
130
  
LLVM_DEBUG64
({
3343
64
    dbgs() << "\nSrcSubscripts: ";
3344
64
    for (int i = 0; i < size; i++)
3345
64
      dbgs() << *SrcSubscripts[i];
3346
64
    dbgs() << "\nDstSubscripts: ";
3347
64
    for (int i = 0; i < size; i++)
3348
64
      dbgs() << *DstSubscripts[i];
3349
64
  });
3350
64
3351
64
  // The delinearization transforms a single-subscript MIV dependence test into
3352
64
  // a multi-subscript SIV dependence test that is easier to compute. So we
3353
64
  // resize Pair to contain as many pairs of subscripts as the delinearization
3354
64
  // has found, and then initialize the pairs following the delinearization.
3355
64
  Pair.resize(size);
3356
244
  for (int i = 0; i < size; 
++i180
) {
3357
180
    Pair[i].Src = SrcSubscripts[i];
3358
180
    Pair[i].Dst = DstSubscripts[i];
3359
180
    unifySubscriptType(&Pair[i]);
3360
180
  }
3361
64
3362
64
  return true;
3363
130
}
3364
3365
//===----------------------------------------------------------------------===//
3366
3367
#ifndef NDEBUG
3368
// For debugging purposes, dump a small bit vector to dbgs().
3369
static void dumpSmallBitVector(SmallBitVector &BV) {
3370
  dbgs() << "{";
3371
  for (unsigned VI : BV.set_bits()) {
3372
    dbgs() << VI;
3373
    if (BV.find_next(VI) >= 0)
3374
      dbgs() << ' ';
3375
  }
3376
  dbgs() << "}\n";
3377
}
3378
#endif
3379
3380
bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
3381
2
                                FunctionAnalysisManager::Invalidator &Inv) {
3382
2
  // Check if the analysis itself has been invalidated.
3383
2
  auto PAC = PA.getChecker<DependenceAnalysis>();
3384
2
  if (!PAC.preserved() && 
!PAC.preservedSet<AllAnalysesOn<Function>>()0
)
3385
0
    return true;
3386
2
3387
2
  // Check transitive dependencies.
3388
2
  return Inv.invalidate<AAManager>(F, PA) ||
3389
2
         Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
3390
2
         
Inv.invalidate<LoopAnalysis>(F, PA)0
;
3391
2
}
3392
3393
// depends -
3394
// Returns NULL if there is no dependence.
3395
// Otherwise, return a Dependence with as many details as possible.
3396
// Corresponds to Section 3.1 in the paper
3397
//
3398
//            Practical Dependence Testing
3399
//            Goff, Kennedy, Tseng
3400
//            PLDI 1991
3401
//
3402
// Care is required to keep the routine below, getSplitIteration(),
3403
// up to date with respect to this routine.
3404
std::unique_ptr<Dependence>
3405
DependenceInfo::depends(Instruction *Src, Instruction *Dst,
3406
2.94k
                        bool PossiblyLoopIndependent) {
3407
2.94k
  if (Src == Dst)
3408
1.13k
    PossiblyLoopIndependent = false;
3409
2.94k
3410
2.94k
  if ((!Src->mayReadFromMemory() && 
!Src->mayWriteToMemory()1.73k
) ||
3411
2.94k
      (!Dst->mayReadFromMemory() && 
!Dst->mayWriteToMemory()1.95k
))
3412
0
    // if both instructions don't reference memory, there's no dependence
3413
0
    return nullptr;
3414
2.94k
3415
2.94k
  if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
3416
0
    // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
3417
0
    LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
3418
0
    return make_unique<Dependence>(Src, Dst);
3419
0
  }
3420
2.94k
3421
2.94k
  assert(isLoadOrStore(Src) && "instruction is not load or store");
3422
2.94k
  assert(isLoadOrStore(Dst) && "instruction is not load or store");
3423
2.94k
  Value *SrcPtr = getLoadStorePointerOperand(Src);
3424
2.94k
  Value *DstPtr = getLoadStorePointerOperand(Dst);
3425
2.94k
3426
2.94k
  switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(),
3427
2.94k
                                 MemoryLocation::get(Dst),
3428
2.94k
                                 MemoryLocation::get(Src))) {
3429
2.94k
  case MayAlias:
3430
851
  case PartialAlias:
3431
851
    // cannot analyse objects if we don't understand their aliasing.
3432
851
    LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
3433
851
    return make_unique<Dependence>(Src, Dst);
3434
851
  case NoAlias:
3435
411
    // If the objects noalias, they are distinct, accesses are independent.
3436
411
    LLVM_DEBUG(dbgs() << "no alias\n");
3437
411
    return nullptr;
3438
1.68k
  case MustAlias:
3439
1.68k
    break; // The underlying objects alias; test accesses for dependence.
3440
1.68k
  }
3441
1.68k
3442
1.68k
  // establish loop nesting levels
3443
1.68k
  establishNestingLevels(Src, Dst);
3444
1.68k
  LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
3445
1.68k
  LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
3446
1.68k
3447
1.68k
  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
3448
1.68k
  ++TotalArrayPairs;
3449
1.68k
3450
1.68k
  unsigned Pairs = 1;
3451
1.68k
  SmallVector<Subscript, 2> Pair(Pairs);
3452
1.68k
  const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
3453
1.68k
  const SCEV *DstSCEV = SE->getSCEV(DstPtr);
3454
1.68k
  LLVM_DEBUG(dbgs() << "    SrcSCEV = " << *SrcSCEV << "\n");
3455
1.68k
  LLVM_DEBUG(dbgs() << "    DstSCEV = " << *DstSCEV << "\n");
3456
1.68k
  Pair[0].Src = SrcSCEV;
3457
1.68k
  Pair[0].Dst = DstSCEV;
3458
1.68k
3459
1.68k
  if (Delinearize) {
3460
1.58k
    if (tryDelinearize(Src, Dst, Pair)) {
3461
64
      LLVM_DEBUG(dbgs() << "    delinearized\n");
3462
64
      Pairs = Pair.size();
3463
64
    }
3464
1.58k
  }
3465
1.68k
3466
3.48k
  for (unsigned P = 0; P < Pairs; 
++P1.80k
) {
3467
1.80k
    Pair[P].Loops.resize(MaxLevels + 1);
3468
1.80k
    Pair[P].GroupLoops.resize(MaxLevels + 1);
3469
1.80k
    Pair[P].Group.resize(Pairs);
3470
1.80k
    removeMatchingExtensions(&Pair[P]);
3471
1.80k
    Pair[P].Classification =
3472
1.80k
      classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
3473
1.80k
                   Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
3474
1.80k
                   Pair[P].Loops);
3475
1.80k
    Pair[P].GroupLoops = Pair[P].Loops;
3476
1.80k
    Pair[P].Group.set(P);
3477
1.80k
    LLVM_DEBUG(dbgs() << "    subscript " << P << "\n");
3478
1.80k
    LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
3479
1.80k
    LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
3480
1.80k
    LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
3481
1.80k
    LLVM_DEBUG(dbgs() << "\tloops = ");
3482
1.80k
    LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));
3483
1.80k
  }
3484
1.68k
3485
1.68k
  SmallBitVector Separable(Pairs);
3486
1.68k
  SmallBitVector Coupled(Pairs);
3487
1.68k
3488
1.68k
  // Partition subscripts into separable and minimally-coupled groups
3489
1.68k
  // Algorithm in paper is algorithmically better;
3490
1.68k
  // this may be faster in practice. Check someday.
3491
1.68k
  //
3492
1.68k
  // Here's an example of how it works. Consider this code:
3493
1.68k
  //
3494
1.68k
  //   for (i = ...) {
3495
1.68k
  //     for (j = ...) {
3496
1.68k
  //       for (k = ...) {
3497
1.68k
  //         for (l = ...) {
3498
1.68k
  //           for (m = ...) {
3499
1.68k
  //             A[i][j][k][m] = ...;
3500
1.68k
  //             ... = A[0][j][l][i + j];
3501
1.68k
  //           }
3502
1.68k
  //         }
3503
1.68k
  //       }
3504
1.68k
  //     }
3505
1.68k
  //   }
3506
1.68k
  //
3507
1.68k
  // There are 4 subscripts here:
3508
1.68k
  //    0 [i] and [0]
3509
1.68k
  //    1 [j] and [j]
3510
1.68k
  //    2 [k] and [l]
3511
1.68k
  //    3 [m] and [i + j]
3512
1.68k
  //
3513
1.68k
  // We've already classified each subscript pair as ZIV, SIV, etc.,
3514
1.68k
  // and collected all the loops mentioned by pair P in Pair[P].Loops.
3515
1.68k
  // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
3516
1.68k
  // and set Pair[P].Group = {P}.
3517
1.68k
  //
3518
1.68k
  //      Src Dst    Classification Loops  GroupLoops Group
3519
1.68k
  //    0 [i] [0]         SIV       {1}      {1}        {0}
3520
1.68k
  //    1 [j] [j]         SIV       {2}      {2}        {1}
3521
1.68k
  //    2 [k] [l]         RDIV      {3,4}    {3,4}      {2}
3522
1.68k
  //    3 [m] [i + j]     MIV       {1,2,5}  {1,2,5}    {3}
3523
1.68k
  //
3524
1.68k
  // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
3525
1.68k
  // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
3526
1.68k
  //
3527
1.68k
  // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
3528
1.68k
  // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
3529
1.68k
  // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
3530
1.68k
  // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
3531
1.68k
  // to either Separable or Coupled).
3532
1.68k
  //
3533
1.68k
  // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
3534
1.68k
  // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty,
3535
1.68k
  // so Pair[3].Group = {0, 1, 3} and Done = false.
3536
1.68k
  //
3537
1.68k
  // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
3538
1.68k
  // Since Done remains true, we add 2 to the set of Separable pairs.
3539
1.68k
  //
3540
1.68k
  // Finally, we consider 3. There's nothing to compare it with,
3541
1.68k
  // so Done remains true and we add it to the Coupled set.
3542
1.68k
  // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
3543
1.68k
  //
3544
1.68k
  // In the end, we've got 1 separable subscript and 1 coupled group.
3545
3.48k
  for (unsigned SI = 0; SI < Pairs; 
++SI1.80k
) {
3546
1.80k
    if (Pair[SI].Classification == Subscript::NonLinear) {
3547
114
      // ignore these, but collect loops for later
3548
114
      ++NonlinearSubscriptPairs;
3549
114
      collectCommonLoops(Pair[SI].Src,
3550
114
                         LI->getLoopFor(Src->getParent()),
3551
114
                         Pair[SI].Loops);
3552
114
      collectCommonLoops(Pair[SI].Dst,
3553
114
                         LI->getLoopFor(Dst->getParent()),
3554
114
                         Pair[SI].Loops);
3555
114
      Result.Consistent = false;
3556
1.68k
    } else if (Pair[SI].Classification == Subscript::ZIV) {
3557
274
      // always separable
3558
274
      Separable.set(SI);
3559
274
    }
3560
1.41k
    else {
3561
1.41k
      // SIV, RDIV, or MIV, so check for coupled group
3562
1.41k
      bool Done = true;
3563
1.59k
      for (unsigned SJ = SI + 1; SJ < Pairs; 
++SJ186
) {
3564
186
        SmallBitVector Intersection = Pair[SI].GroupLoops;
3565
186
        Intersection &= Pair[SJ].GroupLoops;
3566
186
        if (Intersection.any()) {
3567
2
          // accumulate set of all the loops in group
3568
2
          Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
3569
2
          // accumulate set of all subscripts in group
3570
2
          Pair[SJ].Group |= Pair[SI].Group;
3571
2
          Done = false;
3572
2
        }
3573
186
      }
3574
1.41k
      if (Done) {
3575
1.41k
        if (Pair[SI].Group.count() == 1) {
3576
1.40k
          Separable.set(SI);
3577
1.40k
          ++SeparableSubscriptPairs;
3578
1.40k
        }
3579
2
        else {
3580
2
          Coupled.set(SI);
3581
2
          ++CoupledSubscriptPairs;
3582
2
        }
3583
1.41k
      }
3584
1.41k
    }
3585
1.80k
  }
3586
1.68k
3587
1.68k
  LLVM_DEBUG(dbgs() << "    Separable = ");
3588
1.68k
  LLVM_DEBUG(dumpSmallBitVector(Separable));
3589
1.68k
  LLVM_DEBUG(dbgs() << "    Coupled = ");
3590
1.68k
  LLVM_DEBUG(dumpSmallBitVector(Coupled));
3591
1.68k
3592
1.68k
  Constraint NewConstraint;
3593
1.68k
  NewConstraint.setAny(SE);
3594
1.68k
3595
1.68k
  // test separable subscripts
3596
1.68k
  for (unsigned SI : Separable.set_bits()) {
3597
1.68k
    LLVM_DEBUG(dbgs() << "testing subscript " << SI);
3598
1.68k
    switch (Pair[SI].Classification) {
3599
1.68k
    case Subscript::ZIV:
3600
274
      LLVM_DEBUG(dbgs() << ", ZIV\n");
3601
274
      if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
3602
6
        return nullptr;
3603
268
      break;
3604
897
    case Subscript::SIV: {
3605
897
      LLVM_DEBUG(dbgs() << ", SIV\n");
3606
897
      unsigned Level;
3607
897
      const SCEV *SplitIter = nullptr;
3608
897
      if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
3609
897
                  SplitIter))
3610
60
        return nullptr;
3611
837
      break;
3612
837
    }
3613
837
    case Subscript::RDIV:
3614
44
      LLVM_DEBUG(dbgs() << ", RDIV\n");
3615
44
      if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
3616
34
        return nullptr;
3617
10
      break;
3618
467
    case Subscript::MIV:
3619
467
      LLVM_DEBUG(dbgs() << ", MIV\n");
3620
467
      if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
3621
16
        return nullptr;
3622
451
      break;
3623
451
    default:
3624
0
      llvm_unreachable("subscript has unexpected classification");
3625
1.68k
    }
3626
1.68k
  }
3627
1.68k
3628
1.68k
  
if (1.56k
Coupled.count()1.56k
) {
3629
2
    // test coupled subscript groups
3630
2
    LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n");
3631
2
    LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
3632
2
    SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
3633
8
    for (unsigned II = 0; II <= MaxLevels; 
++II6
)
3634
6
      Constraints[II].setAny(SE);
3635
2
    for (unsigned SI : Coupled.set_bits()) {
3636
2
      LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { ");
3637
2
      SmallBitVector Group(Pair[SI].Group);
3638
2
      SmallBitVector Sivs(Pairs);
3639
2
      SmallBitVector Mivs(Pairs);
3640
2
      SmallBitVector ConstrainedLevels(MaxLevels + 1);
3641
2
      SmallVector<Subscript *, 4> PairsInGroup;
3642
4
      for (unsigned SJ : Group.set_bits()) {
3643
4
        LLVM_DEBUG(dbgs() << SJ << " ");
3644
4
        if (Pair[SJ].Classification == Subscript::SIV)
3645
4
          Sivs.set(SJ);
3646
0
        else
3647
0
          Mivs.set(SJ);
3648
4
        PairsInGroup.push_back(&Pair[SJ]);
3649
4
      }
3650
2
      unifySubscriptType(PairsInGroup);
3651
2
      LLVM_DEBUG(dbgs() << "}\n");
3652
4
      while (Sivs.any()) {
3653
2
        bool Changed = false;
3654
4
        for (unsigned SJ : Sivs.set_bits()) {
3655
4
          LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
3656
4
          // SJ is an SIV subscript that's part of the current coupled group
3657
4
          unsigned Level;
3658
4
          const SCEV *SplitIter = nullptr;
3659
4
          LLVM_DEBUG(dbgs() << "SIV\n");
3660
4
          if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
3661
4
                      SplitIter))
3662
0
            return nullptr;
3663
4
          ConstrainedLevels.set(Level);
3664
4
          if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
3665
4
            if (Constraints[Level].isEmpty()) {
3666
0
              ++DeltaIndependence;
3667
0
              return nullptr;
3668
0
            }
3669
4
            Changed = true;
3670
4
          }
3671
4
          Sivs.reset(SJ);
3672
4
        }
3673
2
        if (Changed) {
3674
2
          // propagate, possibly creating new SIVs and ZIVs
3675
2
          LLVM_DEBUG(dbgs() << "    propagating\n");
3676
2
          LLVM_DEBUG(dbgs() << "\tMivs = ");
3677
2
          LLVM_DEBUG(dumpSmallBitVector(Mivs));
3678
2
          for (unsigned SJ : Mivs.set_bits()) {
3679
0
            // SJ is an MIV subscript that's part of the current coupled group
3680
0
            LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
3681
0
            if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
3682
0
                          Constraints, Result.Consistent)) {
3683
0
              LLVM_DEBUG(dbgs() << "\t    Changed\n");
3684
0
              ++DeltaPropagations;
3685
0
              Pair[SJ].Classification =
3686
0
                classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
3687
0
                             Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
3688
0
                             Pair[SJ].Loops);
3689
0
              switch (Pair[SJ].Classification) {
3690
0
              case Subscript::ZIV:
3691
0
                LLVM_DEBUG(dbgs() << "ZIV\n");
3692
0
                if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3693
0
                  return nullptr;
3694
0
                Mivs.reset(SJ);
3695
0
                break;
3696
0
              case Subscript::SIV:
3697
0
                Sivs.set(SJ);
3698
0
                Mivs.reset(SJ);
3699
0
                break;
3700
0
              case Subscript::RDIV:
3701
0
              case Subscript::MIV:
3702
0
                break;
3703
0
              default:
3704
0
                llvm_unreachable("bad subscript classification");
3705
0
              }
3706
0
            }
3707
0
          }
3708
2
        }
3709
2
      }
3710
2
3711
2
      // test & propagate remaining RDIVs
3712
2
      for (unsigned SJ : Mivs.set_bits()) {
3713
0
        if (Pair[SJ].Classification == Subscript::RDIV) {
3714
0
          LLVM_DEBUG(dbgs() << "RDIV test\n");
3715
0
          if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3716
0
            return nullptr;
3717
0
          // I don't yet understand how to propagate RDIV results
3718
0
          Mivs.reset(SJ);
3719
0
        }
3720
0
      }
3721
2
3722
2
      // test remaining MIVs
3723
2
      // This code is temporary.
3724
2
      // Better to somehow test all remaining subscripts simultaneously.
3725
2
      for (unsigned SJ : Mivs.set_bits()) {
3726
0
        if (Pair[SJ].Classification == Subscript::MIV) {
3727
0
          LLVM_DEBUG(dbgs() << "MIV test\n");
3728
0
          if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
3729
0
            return nullptr;
3730
0
        }
3731
0
        else
3732
0
          llvm_unreachable("expected only MIV subscripts at this point");
3733
0
      }
3734
2
3735
2
      // update Result.DV from constraint vector
3736
2
      LLVM_DEBUG(dbgs() << "    updating\n");
3737
2
      for (unsigned SJ : ConstrainedLevels.set_bits()) {
3738
2
        if (SJ > CommonLevels)
3739
0
          break;
3740
2
        updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
3741
2
        if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
3742
0
          return nullptr;
3743
2
      }
3744
2
    }
3745
2
  }
3746
1.56k
3747
1.56k
  // Make sure the Scalar flags are set correctly.
3748
1.56k
  SmallBitVector CompleteLoops(MaxLevels + 1);
3749
3.25k
  for (unsigned SI = 0; SI < Pairs; 
++SI1.68k
)
3750
1.68k
    CompleteLoops |= Pair[SI].Loops;
3751
3.92k
  for (unsigned II = 1; II <= CommonLevels; 
++II2.35k
)
3752
2.35k
    if (CompleteLoops[II])
3753
2.02k
      Result.DV[II - 1].Scalar = false;
3754
1.56k
3755
1.56k
  if (PossiblyLoopIndependent) {
3756
432
    // Make sure the LoopIndependent flag is set correctly.
3757
432
    // All directions must include equal, otherwise no
3758
432
    // loop-independent dependence is possible.
3759
944
    for (unsigned II = 1; II <= CommonLevels; 
++II512
) {
3760
668
      if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
3761
156
        Result.LoopIndependent = false;
3762
156
        break;
3763
156
      }
3764
668
    }
3765
432
  }
3766
1.13k
  else {
3767
1.13k
    // On the other hand, if all directions are equal and there's no
3768
1.13k
    // loop-independent dependence possible, then no dependence exists.
3769
1.13k
    bool AllEqual = true;
3770
2.27k
    for (unsigned II = 1; II <= CommonLevels; 
++II1.14k
) {
3771
1.38k
      if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
3772
244
        AllEqual = false;
3773
244
        break;
3774
244
      }
3775
1.38k
    }
3776
1.13k
    if (AllEqual)
3777
892
      return nullptr;
3778
676
  }
3779
676
3780
676
  return make_unique<FullDependence>(std::move(Result));
3781
676
}
3782
3783
3784
3785
//===----------------------------------------------------------------------===//
3786
// getSplitIteration -
3787
// Rather than spend rarely-used space recording the splitting iteration
3788
// during the Weak-Crossing SIV test, we re-compute it on demand.
3789
// The re-computation is basically a repeat of the entire dependence test,
3790
// though simplified since we know that the dependence exists.
3791
// It's tedious, since we must go through all propagations, etc.
3792
//
3793
// Care is required to keep this code up to date with respect to the routine
3794
// above, depends().
3795
//
3796
// Generally, the dependence analyzer will be used to build
3797
// a dependence graph for a function (basically a map from instructions
3798
// to dependences). Looking for cycles in the graph shows us loops
3799
// that cannot be trivially vectorized/parallelized.
3800
//
3801
// We can try to improve the situation by examining all the dependences
3802
// that make up the cycle, looking for ones we can break.
3803
// Sometimes, peeling the first or last iteration of a loop will break
3804
// dependences, and we've got flags for those possibilities.
3805
// Sometimes, splitting a loop at some other iteration will do the trick,
3806
// and we've got a flag for that case. Rather than waste the space to
3807
// record the exact iteration (since we rarely know), we provide
3808
// a method that calculates the iteration. It's a drag that it must work
3809
// from scratch, but wonderful in that it's possible.
3810
//
3811
// Here's an example:
3812
//
3813
//    for (i = 0; i < 10; i++)
3814
//        A[i] = ...
3815
//        ... = A[11 - i]
3816
//
3817
// There's a loop-carried flow dependence from the store to the load,
3818
// found by the weak-crossing SIV test. The dependence will have a flag,
3819
// indicating that the dependence can be broken by splitting the loop.
3820
// Calling getSplitIteration will return 5.
3821
// Splitting the loop breaks the dependence, like so:
3822
//
3823
//    for (i = 0; i <= 5; i++)
3824
//        A[i] = ...
3825
//        ... = A[11 - i]
3826
//    for (i = 6; i < 10; i++)
3827
//        A[i] = ...
3828
//        ... = A[11 - i]
3829
//
3830
// breaks the dependence and allows us to vectorize/parallelize
3831
// both loops.
3832
const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
3833
6
                                              unsigned SplitLevel) {
3834
6
  assert(Dep.isSplitable(SplitLevel) &&
3835
6
         "Dep should be splitable at SplitLevel");
3836
6
  Instruction *Src = Dep.getSrc();
3837
6
  Instruction *Dst = Dep.getDst();
3838
6
  assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
3839
6
  assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
3840
6
  assert(isLoadOrStore(Src));
3841
6
  assert(isLoadOrStore(Dst));
3842
6
  Value *SrcPtr = getLoadStorePointerOperand(Src);
3843
6
  Value *DstPtr = getLoadStorePointerOperand(Dst);
3844
6
  assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(),
3845
6
                                MemoryLocation::get(Dst),
3846
6
                                MemoryLocation::get(Src)) == MustAlias);
3847
6
3848
6
  // establish loop nesting levels
3849
6
  establishNestingLevels(Src, Dst);
3850
6
3851
6
  FullDependence Result(Src, Dst, false, CommonLevels);
3852
6
3853
6
  unsigned Pairs = 1;
3854
6
  SmallVector<Subscript, 2> Pair(Pairs);
3855
6
  const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
3856
6
  const SCEV *DstSCEV = SE->getSCEV(DstPtr);
3857
6
  Pair[0].Src = SrcSCEV;
3858
6
  Pair[0].Dst = DstSCEV;
3859
6
3860
6
  if (Delinearize) {
3861
6
    if (tryDelinearize(Src, Dst, Pair)) {
3862
0
      LLVM_DEBUG(dbgs() << "    delinearized\n");
3863
0
      Pairs = Pair.size();
3864
0
    }
3865
6
  }
3866
6
3867
12
  for (unsigned P = 0; P < Pairs; 
++P6
) {
3868
6
    Pair[P].Loops.resize(MaxLevels + 1);
3869
6
    Pair[P].GroupLoops.resize(MaxLevels + 1);
3870
6
    Pair[P].Group.resize(Pairs);
3871
6
    removeMatchingExtensions(&Pair[P]);
3872
6
    Pair[P].Classification =
3873
6
      classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
3874
6
                   Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
3875
6
                   Pair[P].Loops);
3876
6
    Pair[P].GroupLoops = Pair[P].Loops;
3877
6
    Pair[P].Group.set(P);
3878
6
  }
3879
6
3880
6
  SmallBitVector Separable(Pairs);
3881
6
  SmallBitVector Coupled(Pairs);
3882
6
3883
6
  // partition subscripts into separable and minimally-coupled groups
3884
12
  for (unsigned SI = 0; SI < Pairs; 
++SI6
) {
3885
6
    if (Pair[SI].Classification == Subscript::NonLinear) {
3886
0
      // ignore these, but collect loops for later
3887
0
      collectCommonLoops(Pair[SI].Src,
3888
0
                         LI->getLoopFor(Src->getParent()),
3889
0
                         Pair[SI].Loops);
3890
0
      collectCommonLoops(Pair[SI].Dst,
3891
0
                         LI->getLoopFor(Dst->getParent()),
3892
0
                         Pair[SI].Loops);
3893
0
      Result.Consistent = false;
3894
0
    }
3895
6
    else if (Pair[SI].Classification == Subscript::ZIV)
3896
0
      Separable.set(SI);
3897
6
    else {
3898
6
      // SIV, RDIV, or MIV, so check for coupled group
3899
6
      bool Done = true;
3900
6
      for (unsigned SJ = SI + 1; SJ < Pairs; 
++SJ0
) {
3901
0
        SmallBitVector Intersection = Pair[SI].GroupLoops;
3902
0
        Intersection &= Pair[SJ].GroupLoops;
3903
0
        if (Intersection.any()) {
3904
0
          // accumulate set of all the loops in group
3905
0
          Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
3906
0
          // accumulate set of all subscripts in group
3907
0
          Pair[SJ].Group |= Pair[SI].Group;
3908
0
          Done = false;
3909
0
        }
3910
0
      }
3911
6
      if (Done) {
3912
6
        if (Pair[SI].Group.count() == 1)
3913
6
          Separable.set(SI);
3914
0
        else
3915
0
          Coupled.set(SI);
3916
6
      }
3917
6
    }
3918
6
  }
3919
6
3920
6
  Constraint NewConstraint;
3921
6
  NewConstraint.setAny(SE);
3922
6
3923
6
  // test separable subscripts
3924
6
  for (unsigned SI : Separable.set_bits()) {
3925
6
    switch (Pair[SI].Classification) {
3926
6
    case Subscript::SIV: {
3927
6
      unsigned Level;
3928
6
      const SCEV *SplitIter = nullptr;
3929
6
      (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
3930
6
                     Result, NewConstraint, SplitIter);
3931
6
      if (Level == SplitLevel) {
3932
6
        assert(SplitIter != nullptr);
3933
6
        return SplitIter;
3934
6
      }
3935
0
      break;
3936
0
    }
3937
0
    case Subscript::ZIV:
3938
0
    case Subscript::RDIV:
3939
0
    case Subscript::MIV:
3940
0
      break;
3941
0
    default:
3942
0
      llvm_unreachable("subscript has unexpected classification");
3943
6
    }
3944
6
  }
3945
6
3946
6
  
if (0
Coupled.count()0
) {
3947
0
    // test coupled subscript groups
3948
0
    SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
3949
0
    for (unsigned II = 0; II <= MaxLevels; ++II)
3950
0
      Constraints[II].setAny(SE);
3951
0
    for (unsigned SI : Coupled.set_bits()) {
3952
0
      SmallBitVector Group(Pair[SI].Group);
3953
0
      SmallBitVector Sivs(Pairs);
3954
0
      SmallBitVector Mivs(Pairs);
3955
0
      SmallBitVector ConstrainedLevels(MaxLevels + 1);
3956
0
      for (unsigned SJ : Group.set_bits()) {
3957
0
        if (Pair[SJ].Classification == Subscript::SIV)
3958
0
          Sivs.set(SJ);
3959
0
        else
3960
0
          Mivs.set(SJ);
3961
0
      }
3962
0
      while (Sivs.any()) {
3963
0
        bool Changed = false;
3964
0
        for (unsigned SJ : Sivs.set_bits()) {
3965
0
          // SJ is an SIV subscript that's part of the current coupled group
3966
0
          unsigned Level;
3967
0
          const SCEV *SplitIter = nullptr;
3968
0
          (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
3969
0
                         Result, NewConstraint, SplitIter);
3970
0
          if (Level == SplitLevel && SplitIter)
3971
0
            return SplitIter;
3972
0
          ConstrainedLevels.set(Level);
3973
0
          if (intersectConstraints(&Constraints[Level], &NewConstraint))
3974
0
            Changed = true;
3975
0
          Sivs.reset(SJ);
3976
0
        }
3977
0
        if (Changed) {
3978
0
          // propagate, possibly creating new SIVs and ZIVs
3979
0
          for (unsigned SJ : Mivs.set_bits()) {
3980
0
            // SJ is an MIV subscript that's part of the current coupled group
3981
0
            if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
3982
0
                          Pair[SJ].Loops, Constraints, Result.Consistent)) {
3983
0
              Pair[SJ].Classification =
3984
0
                classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
3985
0
                             Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
3986
0
                             Pair[SJ].Loops);
3987
0
              switch (Pair[SJ].Classification) {
3988
0
              case Subscript::ZIV:
3989
0
                Mivs.reset(SJ);
3990
0
                break;
3991
0
              case Subscript::SIV:
3992
0
                Sivs.set(SJ);
3993
0
                Mivs.reset(SJ);
3994
0
                break;
3995
0
              case Subscript::RDIV:
3996
0
              case Subscript::MIV:
3997
0
                break;
3998
0
              default:
3999
0
                llvm_unreachable("bad subscript classification");
4000
0
              }
4001
0
            }
4002
0
          }
4003
0
        }
4004
0
      }
4005
0
    }
4006
0
  }
4007
0
  llvm_unreachable("somehow reached end of routine");
4008
0
  return nullptr;
4009
0
}