Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Analysis/DependenceAnalysis.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// DependenceAnalysis is an LLVM pass that analyses dependences between memory
11
// accesses. Currently, it is an (incomplete) implementation of the approach
12
// described in
13
//
14
//            Practical Dependence Testing
15
//            Goff, Kennedy, Tseng
16
//            PLDI 1991
17
//
18
// There's a single entry point that analyzes the dependence between a pair
19
// of memory references in a function, returning either NULL, for no dependence,
20
// or a more-or-less detailed description of the dependence between them.
21
//
22
// Currently, the implementation cannot propagate constraints between
23
// coupled RDIV subscripts and lacks a multi-subscript MIV test.
24
// Both of these are conservative weaknesses;
25
// that is, not a source of correctness problems.
26
//
27
// The implementation depends on the GEP instruction to differentiate
28
// subscripts. Since Clang linearizes some array subscripts, the dependence
29
// analysis is using SCEV->delinearize to recover the representation of multiple
30
// subscripts, and thus avoid the more expensive and less precise MIV tests. The
31
// delinearization is controlled by the flag -da-delinearize.
32
//
33
// We should pay some careful attention to the possibility of integer overflow
34
// in the implementation of the various tests. This could happen with Add,
35
// Subtract, or Multiply, with both APInt's and SCEV's.
36
//
37
// Some non-linear subscript pairs can be handled by the GCD test
38
// (and perhaps other tests).
39
// Should explore how often these things occur.
40
//
41
// Finally, it seems like certain test cases expose weaknesses in the SCEV
42
// simplification, especially in the handling of sign and zero extensions.
43
// It could be useful to spend time exploring these.
44
//
45
// Please note that this is work in progress and the interface is subject to
46
// change.
47
//
48
//===----------------------------------------------------------------------===//
49
//                                                                            //
50
//                   In memory of Ken Kennedy, 1945 - 2007                    //
51
//                                                                            //
52
//===----------------------------------------------------------------------===//
53
54
#include "llvm/Analysis/DependenceAnalysis.h"
55
#include "llvm/ADT/STLExtras.h"
56
#include "llvm/ADT/Statistic.h"
57
#include "llvm/Analysis/AliasAnalysis.h"
58
#include "llvm/Analysis/LoopInfo.h"
59
#include "llvm/Analysis/ScalarEvolution.h"
60
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
61
#include "llvm/Analysis/ValueTracking.h"
62
#include "llvm/IR/InstIterator.h"
63
#include "llvm/IR/Module.h"
64
#include "llvm/IR/Operator.h"
65
#include "llvm/Support/CommandLine.h"
66
#include "llvm/Support/Debug.h"
67
#include "llvm/Support/ErrorHandling.h"
68
#include "llvm/Support/raw_ostream.h"
69
70
using namespace llvm;
71
72
#define DEBUG_TYPE "da"
73
74
//===----------------------------------------------------------------------===//
75
// statistics
76
77
STATISTIC(TotalArrayPairs, "Array pairs tested");
78
STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
79
STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
80
STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
81
STATISTIC(ZIVapplications, "ZIV applications");
82
STATISTIC(ZIVindependence, "ZIV independence");
83
STATISTIC(StrongSIVapplications, "Strong SIV applications");
84
STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
85
STATISTIC(StrongSIVindependence, "Strong SIV independence");
86
STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
87
STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
88
STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
89
STATISTIC(ExactSIVapplications, "Exact SIV applications");
90
STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
91
STATISTIC(ExactSIVindependence, "Exact SIV independence");
92
STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
93
STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
94
STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
95
STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
96
STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
97
STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
98
STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
99
STATISTIC(DeltaApplications, "Delta applications");
100
STATISTIC(DeltaSuccesses, "Delta successes");
101
STATISTIC(DeltaIndependence, "Delta independence");
102
STATISTIC(DeltaPropagations, "Delta propagations");
103
STATISTIC(GCDapplications, "GCD applications");
104
STATISTIC(GCDsuccesses, "GCD successes");
105
STATISTIC(GCDindependence, "GCD independence");
106
STATISTIC(BanerjeeApplications, "Banerjee applications");
107
STATISTIC(BanerjeeIndependence, "Banerjee independence");
108
STATISTIC(BanerjeeSuccesses, "Banerjee successes");
109
110
static cl::opt<bool>
111
Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
112
            cl::desc("Try to delinearize array references."));
113
114
//===----------------------------------------------------------------------===//
115
// basics
116
117
DependenceAnalysis::Result
118
0
DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
119
0
  auto &AA = FAM.getResult<AAManager>(F);
120
0
  auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
121
0
  auto &LI = FAM.getResult<LoopAnalysis>(F);
122
0
  return DependenceInfo(&F, &AA, &SE, &LI);
123
0
}
124
125
AnalysisKey DependenceAnalysis::Key;
126
127
24.6k
INITIALIZE_PASS_BEGIN24.6k
(DependenceAnalysisWrapperPass, "da",
128
24.6k
                      "Dependence Analysis", true, true)
129
24.6k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
130
24.6k
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
131
24.6k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
132
24.6k
INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
133
                    true, true)
134
135
char DependenceAnalysisWrapperPass::ID = 0;
136
137
0
FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
138
0
  return new DependenceAnalysisWrapperPass();
139
0
}
140
141
208
bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {
142
208
  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
143
208
  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
144
208
  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
145
208
  info.reset(new DependenceInfo(&F, &AA, &SE, &LI));
146
208
  return false;
147
208
}
148
149
25
DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }
150
151
208
void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }
152
153
43
void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
154
43
  AU.setPreservesAll();
155
43
  AU.addRequiredTransitive<AAResultsWrapperPass>();
156
43
  AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
157
43
  AU.addRequiredTransitive<LoopInfoWrapperPass>();
158
43
}
159
160
161
// Used to test the dependence analyzer.
162
// Looks through the function, noting loads and stores.
163
// Calls depends() on every possible pair and prints out the result.
164
// Ignores all other instructions.
165
183
static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) {
166
183
  auto *F = DA->getFunction();
167
4.49k
  for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
168
4.31k
       
++SrcI4.31k
) {
169
4.31k
    if (
isa<StoreInst>(*SrcI) || 4.31k
isa<LoadInst>(*SrcI)3.95k
) {
170
571
      for (inst_iterator DstI = SrcI, DstE = inst_end(F);
171
8.02k
           
DstI != DstE8.02k
;
++DstI7.45k
) {
172
7.45k
        if (
isa<StoreInst>(*DstI) || 7.45k
isa<LoadInst>(*DstI)6.36k
) {
173
1.66k
          OS << "da analyze - ";
174
1.66k
          if (auto 
D1.66k
= DA->depends(&*SrcI, &*DstI, true)) {
175
922
            D->dump(OS);
176
1.63k
            for (unsigned Level = 1; 
Level <= D->getLevels()1.63k
;
Level++714
) {
177
714
              if (
D->isSplitable(Level)714
) {
178
10
                OS << "da analyze - split level = " << Level;
179
10
                OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
180
10
                OS << "!\n";
181
10
              }
182
714
            }
183
922
          }
184
1.66k
          else
185
743
            OS << "none!\n";
186
1.66k
        }
187
7.45k
      }
188
571
    }
189
4.31k
  }
190
183
}
191
192
void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
193
183
                                          const Module *) const {
194
183
  dumpExampleDependence(OS, info.get());
195
183
}
196
197
//===----------------------------------------------------------------------===//
198
// Dependence methods
199
200
// Returns true if this is an input dependence.
201
105
bool Dependence::isInput() const {
202
105
  return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
203
105
}
204
205
206
// Returns true if this is an output dependence.
207
277
bool Dependence::isOutput() const {
208
150
  return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
209
277
}
210
211
212
// Returns true if this is an flow (aka true)  dependence.
213
376
bool Dependence::isFlow() const {
214
249
  return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
215
376
}
216
217
218
// Returns true if this is an anti dependence.
219
127
bool Dependence::isAnti() const {
220
127
  return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
221
127
}
222
223
224
// Returns true if a particular level is scalar; that is,
225
// if no subscript in the source or destination mention the induction
226
// variable associated with the loop at this level.
227
// Leave this out of line, so it will serve as a virtual method anchor
228
0
bool Dependence::isScalar(unsigned level) const {
229
0
  return false;
230
0
}
231
232
233
//===----------------------------------------------------------------------===//
234
// FullDependence methods
235
236
FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
237
                               bool PossiblyLoopIndependent,
238
                               unsigned CommonLevels)
239
    : Dependence(Source, Destination), Levels(CommonLevels),
240
915
      LoopIndependent(PossiblyLoopIndependent) {
241
915
  Consistent = true;
242
915
  if (CommonLevels)
243
851
    DV = make_unique<DVEntry[]>(CommonLevels);
244
915
}
245
246
// The rest are simple getters that hide the implementation.
247
248
// getDirection - Returns the direction associated with a particular level.
249
1.48k
unsigned FullDependence::getDirection(unsigned Level) const {
250
1.48k
  assert(0 < Level && Level <= Levels && "Level out of range");
251
1.48k
  return DV[Level - 1].Direction;
252
1.48k
}
253
254
255
// Returns the distance (or NULL) associated with a particular level.
256
771
const SCEV *FullDependence::getDistance(unsigned Level) const {
257
771
  assert(0 < Level && Level <= Levels && "Level out of range");
258
771
  return DV[Level - 1].Distance;
259
771
}
260
261
262
// Returns true if a particular level is scalar; that is,
263
// if no subscript in the source or destination mention the induction
264
// variable associated with the loop at this level.
265
633
bool FullDependence::isScalar(unsigned Level) const {
266
633
  assert(0 < Level && Level <= Levels && "Level out of range");
267
633
  return DV[Level - 1].Scalar;
268
633
}
269
270
271
// Returns true if peeling the first iteration from this loop
272
// will break this dependence.
273
714
bool FullDependence::isPeelFirst(unsigned Level) const {
274
714
  assert(0 < Level && Level <= Levels && "Level out of range");
275
714
  return DV[Level - 1].PeelFirst;
276
714
}
277
278
279
// Returns true if peeling the last iteration from this loop
280
// will break this dependence.
281
714
bool FullDependence::isPeelLast(unsigned Level) const {
282
714
  assert(0 < Level && Level <= Levels && "Level out of range");
283
714
  return DV[Level - 1].PeelLast;
284
714
}
285
286
287
// Returns true if splitting this loop will break the dependence.
288
1.42k
bool FullDependence::isSplitable(unsigned Level) const {
289
1.42k
  assert(0 < Level && Level <= Levels && "Level out of range");
290
1.42k
  return DV[Level - 1].Splitable;
291
1.42k
}
292
293
294
//===----------------------------------------------------------------------===//
295
// DependenceInfo::Constraint methods
296
297
// If constraint is a point <X, Y>, returns X.
298
// Otherwise assert.
299
41
const SCEV *DependenceInfo::Constraint::getX() const {
300
41
  assert(Kind == Point && "Kind should be Point");
301
41
  return A;
302
41
}
303
304
305
// If constraint is a point <X, Y>, returns Y.
306
// Otherwise assert.
307
41
const SCEV *DependenceInfo::Constraint::getY() const {
308
41
  assert(Kind == Point && "Kind should be Point");
309
41
  return B;
310
41
}
311
312
313
// If constraint is a line AX + BY = C, returns A.
314
// Otherwise assert.
315
112
const SCEV *DependenceInfo::Constraint::getA() const {
316
112
  assert((Kind == Line || Kind == Distance) &&
317
112
         "Kind should be Line (or Distance)");
318
112
  return A;
319
112
}
320
321
322
// If constraint is a line AX + BY = C, returns B.
323
// Otherwise assert.
324
120
const SCEV *DependenceInfo::Constraint::getB() const {
325
120
  assert((Kind == Line || Kind == Distance) &&
326
120
         "Kind should be Line (or Distance)");
327
120
  return B;
328
120
}
329
330
331
// If constraint is a line AX + BY = C, returns C.
332
// Otherwise assert.
333
81
const SCEV *DependenceInfo::Constraint::getC() const {
334
81
  assert((Kind == Line || Kind == Distance) &&
335
81
         "Kind should be Line (or Distance)");
336
81
  return C;
337
81
}
338
339
340
// If constraint is a distance, returns D.
341
// Otherwise assert.
342
279
const SCEV *DependenceInfo::Constraint::getD() const {
343
279
  assert(Kind == Distance && "Kind should be Distance");
344
279
  return SE->getNegativeSCEV(C);
345
279
}
346
347
348
// Returns the loop associated with this constraint.
349
94
const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {
350
94
  assert((Kind == Distance || Kind == Line || Kind == Point) &&
351
94
         "Kind should be Distance, Line, or Point");
352
94
  return AssociatedLoop;
353
94
}
354
355
void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,
356
13
                                          const Loop *CurLoop) {
357
13
  Kind = Point;
358
13
  A = X;
359
13
  B = Y;
360
13
  AssociatedLoop = CurLoop;
361
13
}
362
363
void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,
364
118
                                         const SCEV *CC, const Loop *CurLoop) {
365
118
  Kind = Line;
366
118
  A = AA;
367
118
  B = BB;
368
118
  C = CC;
369
118
  AssociatedLoop = CurLoop;
370
118
}
371
372
void DependenceInfo::Constraint::setDistance(const SCEV *D,
373
529
                                             const Loop *CurLoop) {
374
529
  Kind = Distance;
375
529
  A = SE->getOne(D->getType());
376
529
  B = SE->getNegativeSCEV(A);
377
529
  C = SE->getNegativeSCEV(D);
378
529
  AssociatedLoop = CurLoop;
379
529
}
380
381
7
void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
382
383
1.26k
void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) {
384
1.26k
  SE = NewSE;
385
1.26k
  Kind = Any;
386
1.26k
}
387
388
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
389
// For debugging purposes. Dumps the constraint out to OS.
390
LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
391
  if (isEmpty())
392
    OS << " Empty\n";
393
  else if (isAny())
394
    OS << " Any\n";
395
  else if (isPoint())
396
    OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
397
  else if (isDistance())
398
    OS << " Distance is " << *getD() <<
399
      " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
400
  else if (isLine())
401
    OS << " Line is " << *getA() << "*X + " <<
402
      *getB() << "*Y = " << *getC() << "\n";
403
  else
404
    llvm_unreachable("unknown constraint type in Constraint::dump");
405
}
406
#endif
407
408
409
// Updates X with the intersection
410
// of the Constraints X and Y. Returns true if X has changed.
411
// Corresponds to Figure 4 from the paper
412
//
413
//            Practical Dependence Testing
414
//            Goff, Kennedy, Tseng
415
//            PLDI 1991
416
236
bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
417
236
  ++DeltaApplications;
418
236
  DEBUG(dbgs() << "\tintersect constraints\n");
419
236
  DEBUG(dbgs() << "\t    X ="; X->dump(dbgs()));
420
236
  DEBUG(dbgs() << "\t    Y ="; Y->dump(dbgs()));
421
236
  assert(!Y->isPoint() && "Y must not be a Point");
422
236
  if (
X->isAny()236
) {
423
165
    if (Y->isAny())
424
0
      return false;
425
165
    *X = *Y;
426
165
    return true;
427
165
  }
428
71
  
if (71
X->isEmpty()71
)
429
0
    return false;
430
71
  
if (71
Y->isEmpty()71
) {
431
0
    X->setEmpty();
432
0
    return true;
433
0
  }
434
71
435
71
  
if (71
X->isDistance() && 71
Y->isDistance()53
) {
436
49
    DEBUG(dbgs() << "\t    intersect 2 distances\n");
437
49
    if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
438
48
      return false;
439
1
    
if (1
isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())1
) {
440
1
      X->setEmpty();
441
1
      ++DeltaSuccesses;
442
1
      return true;
443
1
    }
444
0
    // Hmmm, interesting situation.
445
0
    // I guess if either is constant, keep it and ignore the other.
446
0
    
if (0
isa<SCEVConstant>(Y->getD())0
) {
447
0
      *X = *Y;
448
0
      return true;
449
0
    }
450
0
    return false;
451
0
  }
452
22
453
22
  // At this point, the pseudo-code in Figure 4 of the paper
454
22
  // checks if (X->isPoint() && Y->isPoint()).
455
22
  // This case can't occur in our implementation,
456
22
  // since a Point can only arise as the result of intersecting
457
22
  // two Line constraints, and the right-hand value, Y, is never
458
22
  // the result of an intersection.
459
71
  assert(!(X->isPoint() && Y->isPoint()) &&
460
22
         "We shouldn't ever see X->isPoint() && Y->isPoint()");
461
22
462
22
  if (
X->isLine() && 22
Y->isLine()20
) {
463
20
    DEBUG(dbgs() << "\t    intersect 2 lines\n");
464
20
    const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
465
20
    const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
466
20
    if (
isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)20
) {
467
4
      // slopes are equal, so lines are parallel
468
4
      DEBUG(dbgs() << "\t\tsame slope\n");
469
4
      Prod1 = SE->getMulExpr(X->getC(), Y->getB());
470
4
      Prod2 = SE->getMulExpr(X->getB(), Y->getC());
471
4
      if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
472
1
        return false;
473
3
      
if (3
isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)3
) {
474
2
        X->setEmpty();
475
2
        ++DeltaSuccesses;
476
2
        return true;
477
2
      }
478
1
      return false;
479
1
    }
480
16
    
if (16
isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)16
) {
481
16
      // slopes differ, so lines intersect
482
16
      DEBUG(dbgs() << "\t\tdifferent slopes\n");
483
16
      const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
484
16
      const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
485
16
      const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
486
16
      const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
487
16
      const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
488
16
      const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
489
16
      const SCEVConstant *C1A2_C2A1 =
490
16
        dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
491
16
      const SCEVConstant *C1B2_C2B1 =
492
16
        dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
493
16
      const SCEVConstant *A1B2_A2B1 =
494
16
        dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
495
16
      const SCEVConstant *A2B1_A1B2 =
496
16
        dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
497
16
      if (
!C1B2_C2B1 || 16
!C1A2_C2A116
||
498
16
          
!A1B2_A2B116
||
!A2B1_A1B216
)
499
0
        return false;
500
16
      APInt Xtop = C1B2_C2B1->getAPInt();
501
16
      APInt Xbot = A1B2_A2B1->getAPInt();
502
16
      APInt Ytop = C1A2_C2A1->getAPInt();
503
16
      APInt Ybot = A2B1_A1B2->getAPInt();
504
16
      DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
505
16
      DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
506
16
      DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
507
16
      DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
508
16
      APInt Xq = Xtop; // these need to be initialized, even
509
16
      APInt Xr = Xtop; // though they're just going to be overwritten
510
16
      APInt::sdivrem(Xtop, Xbot, Xq, Xr);
511
16
      APInt Yq = Ytop;
512
16
      APInt Yr = Ytop;
513
16
      APInt::sdivrem(Ytop, Ybot, Yq, Yr);
514
16
      if (
Xr != 0 || 16
Yr != 015
) {
515
1
        X->setEmpty();
516
1
        ++DeltaSuccesses;
517
1
        return true;
518
1
      }
519
15
      
DEBUG15
(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
520
15
      if (
Xq.slt(0) || 15
Yq.slt(0)15
) {
521
1
        X->setEmpty();
522
1
        ++DeltaSuccesses;
523
1
        return true;
524
1
      }
525
14
      
if (const SCEVConstant *14
CUB14
=
526
13
          collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
527
13
        const APInt &UpperBound = CUB->getAPInt();
528
13
        DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
529
13
        if (
Xq.sgt(UpperBound) || 13
Yq.sgt(UpperBound)13
) {
530
1
          X->setEmpty();
531
1
          ++DeltaSuccesses;
532
1
          return true;
533
1
        }
534
13
      }
535
13
      X->setPoint(SE->getConstant(Xq),
536
13
                  SE->getConstant(Yq),
537
13
                  X->getAssociatedLoop());
538
13
      ++DeltaSuccesses;
539
13
      return true;
540
13
    }
541
0
    return false;
542
0
  }
543
2
544
2
  // if (X->isLine() && Y->isPoint()) This case can't occur.
545
22
  assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
546
2
547
2
  if (
X->isPoint() && 2
Y->isLine()2
) {
548
2
    DEBUG(dbgs() << "\t    intersect Point and Line\n");
549
2
    const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
550
2
    const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
551
2
    const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
552
2
    if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
553
1
      return false;
554
1
    
if (1
isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())1
) {
555
1
      X->setEmpty();
556
1
      ++DeltaSuccesses;
557
1
      return true;
558
1
    }
559
0
    return false;
560
0
  }
561
0
562
0
  
llvm_unreachable0
("shouldn't reach the end of Constraint intersection");
563
0
  return false;
564
236
}
565
566
567
//===----------------------------------------------------------------------===//
568
// DependenceInfo methods
569
570
// For debugging purposes. Dumps a dependence to OS.
571
922
void Dependence::dump(raw_ostream &OS) const {
572
922
  bool Splitable = false;
573
922
  if (isConfused())
574
546
    OS << "confused";
575
376
  else {
576
376
    if (isConsistent())
577
160
      OS << "consistent ";
578
376
    if (isFlow())
579
99
      OS << "flow";
580
277
    else 
if (277
isOutput()277
)
581
150
      OS << "output";
582
127
    else 
if (127
isAnti()127
)
583
22
      OS << "anti";
584
105
    else 
if (105
isInput()105
)
585
105
      OS << "input";
586
376
    unsigned Levels = getLevels();
587
376
    OS << " [";
588
1.09k
    for (unsigned II = 1; 
II <= Levels1.09k
;
++II714
) {
589
714
      if (isSplitable(II))
590
10
        Splitable = true;
591
714
      if (isPeelFirst(II))
592
7
        OS << 'p';
593
714
      const SCEV *Distance = getDistance(II);
594
714
      if (Distance)
595
94
        OS << *Distance;
596
620
      else 
if (620
isScalar(II)620
)
597
245
        OS << "S";
598
375
      else {
599
375
        unsigned Direction = getDirection(II);
600
375
        if (Direction == DVEntry::ALL)
601
291
          OS << "*";
602
84
        else {
603
84
          if (Direction & DVEntry::LT)
604
42
            OS << "<";
605
84
          if (Direction & DVEntry::EQ)
606
39
            OS << "=";
607
84
          if (Direction & DVEntry::GT)
608
50
            OS << ">";
609
84
        }
610
620
      }
611
714
      if (isPeelLast(II))
612
2
        OS << 'p';
613
714
      if (II < Levels)
614
352
        OS << " ";
615
714
    }
616
376
    if (isLoopIndependent())
617
174
      OS << "|<";
618
376
    OS << "]";
619
376
    if (Splitable)
620
10
      OS << " splitable";
621
376
  }
622
922
  OS << "!\n";
623
922
}
624
625
static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
626
                                          const DataLayout &DL, const Value *A,
627
1.72k
                                          const Value *B) {
628
1.72k
  const Value *AObj = GetUnderlyingObject(A, DL);
629
1.72k
  const Value *BObj = GetUnderlyingObject(B, DL);
630
1.72k
  return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()),
631
1.72k
                   BObj, DL.getTypeStoreSize(BObj->getType()));
632
1.72k
}
633
634
635
// Returns true if the load or store can be analyzed. Atomic and volatile
636
// operations have properties which this analysis does not understand.
637
static
638
3.45k
bool isLoadOrStore(const Instruction *I) {
639
3.45k
  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
640
1.33k
    return LI->isUnordered();
641
2.11k
  else 
if (const StoreInst *2.11k
SI2.11k
= dyn_cast<StoreInst>(I))
642
2.11k
    return SI->isUnordered();
643
0
  return false;
644
0
}
645
646
647
static
648
3.67k
Value *getPointerOperand(Instruction *I) {
649
3.67k
  if (LoadInst *LI = dyn_cast<LoadInst>(I))
650
1.42k
    return LI->getPointerOperand();
651
2.25k
  
if (StoreInst *2.25k
SI2.25k
= dyn_cast<StoreInst>(I))
652
2.25k
    return SI->getPointerOperand();
653
0
  
llvm_unreachable0
("Value is not load or store instruction");
654
0
  return nullptr;
655
3.67k
}
656
657
658
// Examines the loop nesting of the Src and Dst
659
// instructions and establishes their shared loops. Sets the variables
660
// CommonLevels, SrcLevels, and MaxLevels.
661
// The source and destination instructions needn't be contained in the same
662
// loop. The routine establishNestingLevels finds the level of most deeply
663
// nested loop that contains them both, CommonLevels. An instruction that's
664
// not contained in a loop is at level = 0. MaxLevels is equal to the level
665
// of the source plus the level of the destination, minus CommonLevels.
666
// This lets us allocate vectors MaxLevels in length, with room for every
667
// distinct loop referenced in both the source and destination subscripts.
668
// The variable SrcLevels is the nesting depth of the source instruction.
669
// It's used to help calculate distinct loops referenced by the destination.
670
// Here's the map from loops to levels:
671
//            0 - unused
672
//            1 - outermost common loop
673
//          ... - other common loops
674
// CommonLevels - innermost common loop
675
//          ... - loops containing Src but not Dst
676
//    SrcLevels - innermost loop containing Src but not Dst
677
//          ... - loops containing Dst but not Src
678
//    MaxLevels - innermost loops containing Dst but not Src
679
// Consider the follow code fragment:
680
//   for (a = ...) {
681
//     for (b = ...) {
682
//       for (c = ...) {
683
//         for (d = ...) {
684
//           A[] = ...;
685
//         }
686
//       }
687
//       for (e = ...) {
688
//         for (f = ...) {
689
//           for (g = ...) {
690
//             ... = A[];
691
//           }
692
//         }
693
//       }
694
//     }
695
//   }
696
// If we're looking at the possibility of a dependence between the store
697
// to A (the Src) and the load from A (the Dst), we'll note that they
698
// have 2 loops in common, so CommonLevels will equal 2 and the direction
699
// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
700
// A map from loop names to loop numbers would look like
701
//     a - 1
702
//     b - 2 = CommonLevels
703
//     c - 3
704
//     d - 4 = SrcLevels
705
//     e - 5
706
//     f - 6
707
//     g - 7 = MaxLevels
708
void DependenceInfo::establishNestingLevels(const Instruction *Src,
709
915
                                            const Instruction *Dst) {
710
915
  const BasicBlock *SrcBlock = Src->getParent();
711
915
  const BasicBlock *DstBlock = Dst->getParent();
712
915
  unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
713
915
  unsigned DstLevel = LI->getLoopDepth(DstBlock);
714
915
  const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
715
915
  const Loop *DstLoop = LI->getLoopFor(DstBlock);
716
915
  SrcLevels = SrcLevel;
717
915
  MaxLevels = SrcLevel + DstLevel;
718
927
  while (
SrcLevel > DstLevel927
) {
719
12
    SrcLoop = SrcLoop->getParentLoop();
720
12
    SrcLevel--;
721
12
  }
722
935
  while (
DstLevel > SrcLevel935
) {
723
20
    DstLoop = DstLoop->getParentLoop();
724
20
    DstLevel--;
725
20
  }
726
955
  while (
SrcLoop != DstLoop955
) {
727
40
    SrcLoop = SrcLoop->getParentLoop();
728
40
    DstLoop = DstLoop->getParentLoop();
729
40
    SrcLevel--;
730
40
  }
731
915
  CommonLevels = SrcLevel;
732
915
  MaxLevels -= CommonLevels;
733
915
}
734
735
736
// Given one of the loops containing the source, return
737
// its level index in our numbering scheme.
738
2.31k
unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
739
2.31k
  return SrcLoop->getLoopDepth();
740
2.31k
}
741
742
743
// Given one of the loops containing the destination,
744
// return its level index in our numbering scheme.
745
1.58k
unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
746
1.58k
  unsigned D = DstLoop->getLoopDepth();
747
1.58k
  if (D > CommonLevels)
748
22
    return D - CommonLevels + SrcLevels;
749
1.58k
  else
750
1.56k
    return D;
751
0
}
752
753
754
// Returns true if Expression is loop invariant in LoopNest.
755
bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
756
18.4k
                                     const Loop *LoopNest) const {
757
18.4k
  if (!LoopNest)
758
6.10k
    return true;
759
12.3k
  return SE->isLoopInvariant(Expression, LoopNest) &&
760
12.2k
    isLoopInvariant(Expression, LoopNest->getParentLoop());
761
18.4k
}
762
763
764
765
// Finds the set of loops from the LoopNest that
766
// have a level <= CommonLevels and are referred to by the SCEV Expression.
767
void DependenceInfo::collectCommonLoops(const SCEV *Expression,
768
                                        const Loop *LoopNest,
769
162
                                        SmallBitVector &Loops) const {
770
492
  while (
LoopNest492
) {
771
330
    unsigned Level = LoopNest->getLoopDepth();
772
330
    if (
Level <= CommonLevels && 330
!SE->isLoopInvariant(Expression, LoopNest)314
)
773
314
      Loops.set(Level);
774
330
    LoopNest = LoopNest->getParentLoop();
775
330
  }
776
162
}
777
778
1.02k
void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
779
1.02k
780
1.02k
  unsigned widestWidthSeen = 0;
781
1.02k
  Type *widestType;
782
1.02k
783
1.02k
  // Go through each pair and find the widest bit to which we need
784
1.02k
  // to extend all of them.
785
1.16k
  for (Subscript *Pair : Pairs) {
786
1.16k
    const SCEV *Src = Pair->Src;
787
1.16k
    const SCEV *Dst = Pair->Dst;
788
1.16k
    IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
789
1.16k
    IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
790
1.16k
    if (
SrcTy == nullptr || 1.16k
DstTy == nullptr1.16k
) {
791
0
      assert(SrcTy == DstTy && "This function only unify integer types and "
792
0
             "expect Src and Dst share the same type "
793
0
             "otherwise.");
794
0
      continue;
795
0
    }
796
1.16k
    
if (1.16k
SrcTy->getBitWidth() > widestWidthSeen1.16k
) {
797
1.02k
      widestWidthSeen = SrcTy->getBitWidth();
798
1.02k
      widestType = SrcTy;
799
1.02k
    }
800
1.16k
    if (
DstTy->getBitWidth() > widestWidthSeen1.16k
) {
801
0
      widestWidthSeen = DstTy->getBitWidth();
802
0
      widestType = DstTy;
803
0
    }
804
1.16k
  }
805
1.02k
806
1.02k
807
1.02k
  assert(widestWidthSeen > 0);
808
1.02k
809
1.02k
  // Now extend each pair to the widest seen.
810
1.16k
  for (Subscript *Pair : Pairs) {
811
1.16k
    const SCEV *Src = Pair->Src;
812
1.16k
    const SCEV *Dst = Pair->Dst;
813
1.16k
    IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
814
1.16k
    IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
815
1.16k
    if (
SrcTy == nullptr || 1.16k
DstTy == nullptr1.16k
) {
816
0
      assert(SrcTy == DstTy && "This function only unify integer types and "
817
0
             "expect Src and Dst share the same type "
818
0
             "otherwise.");
819
0
      continue;
820
0
    }
821
1.16k
    
if (1.16k
SrcTy->getBitWidth() < widestWidthSeen1.16k
)
822
1.16k
      // Sign-extend Src to widestType
823
2
      Pair->Src = SE->getSignExtendExpr(Src, widestType);
824
1.16k
    if (
DstTy->getBitWidth() < widestWidthSeen1.16k
) {
825
4
      // Sign-extend Dst to widestType
826
4
      Pair->Dst = SE->getSignExtendExpr(Dst, widestType);
827
4
    }
828
1.16k
  }
829
1.02k
}
830
831
// removeMatchingExtensions - Examines a subscript pair.
832
// If the source and destination are identically sign (or zero)
833
// extended, it strips off the extension in an effect to simplify
834
// the actual analysis.
835
1.21k
void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
836
1.21k
  const SCEV *Src = Pair->Src;
837
1.21k
  const SCEV *Dst = Pair->Dst;
838
1.21k
  if (
(isa<SCEVZeroExtendExpr>(Src) && 1.21k
isa<SCEVZeroExtendExpr>(Dst)8
) ||
839
1.21k
      
(isa<SCEVSignExtendExpr>(Src) && 1.21k
isa<SCEVSignExtendExpr>(Dst)19
)) {
840
22
    const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
841
22
    const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
842
22
    const SCEV *SrcCastOp = SrcCast->getOperand();
843
22
    const SCEV *DstCastOp = DstCast->getOperand();
844
22
    if (
SrcCastOp->getType() == DstCastOp->getType()22
) {
845
22
      Pair->Src = SrcCastOp;
846
22
      Pair->Dst = DstCastOp;
847
22
    }
848
22
  }
849
1.21k
}
850
851
852
// Examine the scev and return true iff it's linear.
853
// Collect any loops mentioned in the set of "Loops".
854
bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
855
2.54k
                                       SmallBitVector &Loops) {
856
2.54k
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src);
857
2.54k
  if (!AddRec)
858
1.27k
    return isLoopInvariant(Src, LoopNest);
859
1.27k
  const SCEV *Start = AddRec->getStart();
860
1.27k
  const SCEV *Step = AddRec->getStepRecurrence(*SE);
861
1.27k
  const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
862
1.27k
  if (
!isa<SCEVCouldNotCompute>(UB)1.27k
) {
863
1.26k
    if (SE->getTypeSizeInBits(Start->getType()) <
864
1.26k
        SE->getTypeSizeInBits(UB->getType())) {
865
8
      if (!AddRec->getNoWrapFlags())
866
4
        return false;
867
1.26k
    }
868
1.26k
  }
869
1.26k
  
if (1.26k
!isLoopInvariant(Step, LoopNest)1.26k
)
870
0
    return false;
871
1.26k
  Loops.set(mapSrcLoop(AddRec->getLoop()));
872
1.26k
  return checkSrcSubscript(Start, LoopNest, Loops);
873
1.26k
}
874
875
876
877
// Examine the scev and return true iff it's linear.
878
// Collect any loops mentioned in the set of "Loops".
879
bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
880
2.38k
                                       SmallBitVector &Loops) {
881
2.38k
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst);
882
2.38k
  if (!AddRec)
883
1.20k
    return isLoopInvariant(Dst, LoopNest);
884
1.18k
  const SCEV *Start = AddRec->getStart();
885
1.18k
  const SCEV *Step = AddRec->getStepRecurrence(*SE);
886
1.18k
  const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
887
1.18k
  if (
!isa<SCEVCouldNotCompute>(UB)1.18k
) {
888
1.18k
    if (SE->getTypeSizeInBits(Start->getType()) <
889
1.18k
        SE->getTypeSizeInBits(UB->getType())) {
890
3
      if (!AddRec->getNoWrapFlags())
891
0
        return false;
892
1.18k
    }
893
1.18k
  }
894
1.18k
  
if (1.18k
!isLoopInvariant(Step, LoopNest)1.18k
)
895
0
    return false;
896
1.18k
  Loops.set(mapDstLoop(AddRec->getLoop()));
897
1.18k
  return checkDstSubscript(Start, LoopNest, Loops);
898
1.18k
}
899
900
901
// Examines the subscript pair (the Src and Dst SCEVs)
902
// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
903
// Collects the associated loops in a set.
904
DependenceInfo::Subscript::ClassificationKind
905
DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
906
                             const SCEV *Dst, const Loop *DstLoopNest,
907
1.28k
                             SmallBitVector &Loops) {
908
1.28k
  SmallBitVector SrcLoops(MaxLevels + 1);
909
1.28k
  SmallBitVector DstLoops(MaxLevels + 1);
910
1.28k
  if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
911
81
    return Subscript::NonLinear;
912
1.20k
  
if (1.20k
!checkDstSubscript(Dst, DstLoopNest, DstLoops)1.20k
)
913
0
    return Subscript::NonLinear;
914
1.20k
  Loops = SrcLoops;
915
1.20k
  Loops |= DstLoops;
916
1.20k
  unsigned N = Loops.count();
917
1.20k
  if (N == 0)
918
250
    return Subscript::ZIV;
919
950
  
if (950
N == 1950
)
920
652
    return Subscript::SIV;
921
298
  
if (298
N == 2 && 298
(SrcLoops.count() == 0 ||
922
281
                 DstLoops.count() == 0 ||
923
270
                 
(SrcLoops.count() == 1 && 270
DstLoops.count() == 131
)))
924
35
    return Subscript::RDIV;
925
263
  return Subscript::MIV;
926
263
}
927
928
929
// A wrapper around SCEV::isKnownPredicate.
930
// Looks for cases where we're interested in comparing for equality.
931
// If both X and Y have been identically sign or zero extended,
932
// it strips off the (confusing) extensions before invoking
933
// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
934
// will be similarly updated.
935
//
936
// If SCEV::isKnownPredicate can't prove the predicate,
937
// we try simple subtraction, which seems to help in some cases
938
// involving symbolics.
939
bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,
940
6.30k
                                      const SCEV *Y) const {
941
6.30k
  if (Pred == CmpInst::ICMP_EQ ||
942
6.30k
      
Pred == CmpInst::ICMP_NE5.15k
) {
943
1.18k
    if ((isa<SCEVSignExtendExpr>(X) &&
944
8
         isa<SCEVSignExtendExpr>(Y)) ||
945
1.18k
        (isa<SCEVZeroExtendExpr>(X) &&
946
1.18k
         
isa<SCEVZeroExtendExpr>(Y)0
)) {
947
0
      const SCEVCastExpr *CX = cast<SCEVCastExpr>(X);
948
0
      const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y);
949
0
      const SCEV *Xop = CX->getOperand();
950
0
      const SCEV *Yop = CY->getOperand();
951
0
      if (
Xop->getType() == Yop->getType()0
) {
952
0
        X = Xop;
953
0
        Y = Yop;
954
0
      }
955
0
    }
956
1.18k
  }
957
6.30k
  if (SE->isKnownPredicate(Pred, X, Y))
958
1.89k
    return true;
959
4.40k
  // If SE->isKnownPredicate can't prove the condition,
960
4.40k
  // we try the brute-force approach of subtracting
961
4.40k
  // and testing the difference.
962
4.40k
  // By testing with SE->isKnownPredicate first, we avoid
963
4.40k
  // the possibility of overflow when the arguments are constants.
964
4.40k
  const SCEV *Delta = SE->getMinusSCEV(X, Y);
965
4.40k
  switch (Pred) {
966
96
  case CmpInst::ICMP_EQ:
967
96
    return Delta->isZero();
968
10
  case CmpInst::ICMP_NE:
969
10
    return SE->isKnownNonZero(Delta);
970
4
  case CmpInst::ICMP_SGE:
971
4
    return SE->isKnownNonNegative(Delta);
972
2
  case CmpInst::ICMP_SLE:
973
2
    return SE->isKnownNonPositive(Delta);
974
3.72k
  case CmpInst::ICMP_SGT:
975
3.72k
    return SE->isKnownPositive(Delta);
976
567
  case CmpInst::ICMP_SLT:
977
567
    return SE->isKnownNegative(Delta);
978
0
  default:
979
0
    llvm_unreachable("unexpected predicate in isKnownPredicate");
980
0
  }
981
0
}
982
983
984
// All subscripts are all the same type.
985
// Loop bound may be smaller (e.g., a char).
986
// Should zero extend loop bound, since it's always >= 0.
987
// This routine collects upper bound and extends or truncates if needed.
988
// Truncating is safe when subscripts are known not to wrap. Cases without
989
// nowrap flags should have been rejected earlier.
990
// Return null if no bound available.
991
2.68k
const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
992
2.68k
  if (
SE->hasLoopInvariantBackedgeTakenCount(L)2.68k
) {
993
2.66k
    const SCEV *UB = SE->getBackedgeTakenCount(L);
994
2.66k
    return SE->getTruncateOrZeroExtend(UB, T);
995
2.66k
  }
996
20
  return nullptr;
997
20
}
998
999
1000
// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
1001
// If the cast fails, returns NULL.
1002
const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,
1003
92
                                                              Type *T) const {
1004
92
  if (const SCEV *UB = collectUpperBound(L, T))
1005
92
    return dyn_cast<SCEVConstant>(UB);
1006
0
  return nullptr;
1007
0
}
1008
1009
1010
// testZIV -
1011
// When we have a pair of subscripts of the form [c1] and [c2],
1012
// where c1 and c2 are both loop invariant, we attack it using
1013
// the ZIV test. Basically, we test by comparing the two values,
1014
// but there are actually three possible results:
1015
// 1) the values are equal, so there's a dependence
1016
// 2) the values are different, so there's no dependence
1017
// 3) the values might be equal, so we have to assume a dependence.
1018
//
1019
// Return true if dependence disproved.
1020
bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
1021
250
                             FullDependence &Result) const {
1022
250
  DEBUG(dbgs() << "    src = " << *Src << "\n");
1023
250
  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
1024
250
  ++ZIVapplications;
1025
250
  if (
isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)250
) {
1026
242
    DEBUG(dbgs() << "    provably dependent\n");
1027
242
    return false; // provably dependent
1028
242
  }
1029
8
  
if (8
isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)8
) {
1030
5
    DEBUG(dbgs() << "    provably independent\n");
1031
5
    ++ZIVindependence;
1032
5
    return true; // provably independent
1033
5
  }
1034
3
  
DEBUG3
(dbgs() << " possibly dependent\n");
1035
3
  Result.Consistent = false;
1036
3
  return false; // possibly dependent
1037
3
}
1038
1039
1040
// strongSIVtest -
1041
// From the paper, Practical Dependence Testing, Section 4.2.1
1042
//
1043
// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
1044
// where i is an induction variable, c1 and c2 are loop invariant,
1045
//  and a is a constant, we can solve it exactly using the Strong SIV test.
1046
//
1047
// Can prove independence. Failing that, can compute distance (and direction).
1048
// In the presence of symbolic terms, we can sometimes make progress.
1049
//
1050
// If there's a dependence,
1051
//
1052
//    c1 + a*i = c2 + a*i'
1053
//
1054
// The dependence distance is
1055
//
1056
//    d = i' - i = (c1 - c2)/a
1057
//
1058
// A dependence only exists if d is an integer and abs(d) <= U, where U is the
1059
// loop's upper bound. If a dependence exists, the dependence direction is
1060
// defined as
1061
//
1062
//                { < if d > 0
1063
//    direction = { = if d = 0
1064
//                { > if d < 0
1065
//
1066
// Return true if dependence disproved.
1067
bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
1068
                                   const SCEV *DstConst, const Loop *CurLoop,
1069
                                   unsigned Level, FullDependence &Result,
1070
535
                                   Constraint &NewConstraint) const {
1071
535
  DEBUG(dbgs() << "\tStrong SIV test\n");
1072
535
  DEBUG(dbgs() << "\t    Coeff = " << *Coeff);
1073
535
  DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
1074
535
  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst);
1075
535
  DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
1076
535
  DEBUG(dbgs() << "\t    DstConst = " << *DstConst);
1077
535
  DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
1078
535
  ++StrongSIVapplications;
1079
535
  assert(0 < Level && Level <= CommonLevels && "level out of range");
1080
535
  Level--;
1081
535
1082
535
  const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1083
535
  DEBUG(dbgs() << "\t    Delta = " << *Delta);
1084
535
  DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
1085
535
1086
535
  // check that |Delta| < iteration count
1087
535
  if (const SCEV *
UpperBound535
= collectUpperBound(CurLoop, Delta->getType())) {
1088
529
    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound);
1089
529
    DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
1090
529
    const SCEV *AbsDelta =
1091
529
      SE->isKnownNonNegative(Delta) ? 
Delta497
:
SE->getNegativeSCEV(Delta)32
;
1092
529
    const SCEV *AbsCoeff =
1093
529
      SE->isKnownNonNegative(Coeff) ? 
Coeff464
:
SE->getNegativeSCEV(Coeff)65
;
1094
529
    const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
1095
529
    if (
isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)529
) {
1096
2
      // Distance greater than trip count - no dependence
1097
2
      ++StrongSIVindependence;
1098
2
      ++StrongSIVsuccesses;
1099
2
      return true;
1100
2
    }
1101
533
  }
1102
533
1103
533
  // Can we compute distance?
1104
533
  
if (533
isa<SCEVConstant>(Delta) && 533
isa<SCEVConstant>(Coeff)529
) {
1105
516
    APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();
1106
516
    APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();
1107
516
    APInt Distance  = ConstDelta; // these need to be initialized
1108
516
    APInt Remainder = ConstDelta;
1109
516
    APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
1110
516
    DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
1111
516
    DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1112
516
    // Make sure Coeff divides Delta exactly
1113
516
    if (
Remainder != 0516
) {
1114
1
      // Coeff doesn't divide Distance, no dependence
1115
1
      ++StrongSIVindependence;
1116
1
      ++StrongSIVsuccesses;
1117
1
      return true;
1118
1
    }
1119
515
    Result.DV[Level].Distance = SE->getConstant(Distance);
1120
515
    NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
1121
515
    if (Distance.sgt(0))
1122
18
      Result.DV[Level].Direction &= Dependence::DVEntry::LT;
1123
497
    else 
if (497
Distance.slt(0)497
)
1124
27
      Result.DV[Level].Direction &= Dependence::DVEntry::GT;
1125
497
    else
1126
470
      Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1127
516
    ++StrongSIVsuccesses;
1128
516
  }
1129
17
  else 
if (17
Delta->isZero()17
) {
1130
13
    // since 0/X == 0
1131
13
    Result.DV[Level].Distance = Delta;
1132
13
    NewConstraint.setDistance(Delta, CurLoop);
1133
13
    Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1134
13
    ++StrongSIVsuccesses;
1135
13
  }
1136
4
  else {
1137
4
    if (
Coeff->isOne()4
) {
1138
1
      DEBUG(dbgs() << "\t    Distance = " << *Delta << "\n");
1139
1
      Result.DV[Level].Distance = Delta; // since X/1 == X
1140
1
      NewConstraint.setDistance(Delta, CurLoop);
1141
1
    }
1142
3
    else {
1143
3
      Result.Consistent = false;
1144
3
      NewConstraint.setLine(Coeff,
1145
3
                            SE->getNegativeSCEV(Coeff),
1146
3
                            SE->getNegativeSCEV(Delta), CurLoop);
1147
3
    }
1148
4
1149
4
    // maybe we can get a useful direction
1150
4
    bool DeltaMaybeZero     = !SE->isKnownNonZero(Delta);
1151
4
    bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
1152
4
    bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
1153
4
    bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
1154
4
    bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
1155
4
    // The double negatives above are confusing.
1156
4
    // It helps to read !SE->isKnownNonZero(Delta)
1157
4
    // as "Delta might be Zero"
1158
4
    unsigned NewDirection = Dependence::DVEntry::NONE;
1159
4
    if (
(DeltaMaybePositive && 4
CoeffMaybePositive4
) ||
1160
0
        
(DeltaMaybeNegative && 0
CoeffMaybeNegative0
))
1161
4
      NewDirection = Dependence::DVEntry::LT;
1162
4
    if (DeltaMaybeZero)
1163
4
      NewDirection |= Dependence::DVEntry::EQ;
1164
4
    if (
(DeltaMaybeNegative && 4
CoeffMaybePositive4
) ||
1165
0
        
(DeltaMaybePositive && 0
CoeffMaybeNegative0
))
1166
4
      NewDirection |= Dependence::DVEntry::GT;
1167
4
    if (NewDirection < Result.DV[Level].Direction)
1168
0
      ++StrongSIVsuccesses;
1169
17
    Result.DV[Level].Direction &= NewDirection;
1170
17
  }
1171
532
  return false;
1172
535
}
1173
1174
1175
// weakCrossingSIVtest -
1176
// From the paper, Practical Dependence Testing, Section 4.2.2
1177
//
1178
// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1179
// where i is an induction variable, c1 and c2 are loop invariant,
1180
// and a is a constant, we can solve it exactly using the
1181
// Weak-Crossing SIV test.
1182
//
1183
// Given c1 + a*i = c2 - a*i', we can look for the intersection of
1184
// the two lines, where i = i', yielding
1185
//
1186
//    c1 + a*i = c2 - a*i
1187
//    2a*i = c2 - c1
1188
//    i = (c2 - c1)/2a
1189
//
1190
// If i < 0, there is no dependence.
1191
// If i > upperbound, there is no dependence.
1192
// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1193
// If i = upperbound, there's a dependence with distance = 0.
1194
// If i is integral, there's a dependence (all directions).
1195
// If the non-integer part = 1/2, there's a dependence (<> directions).
1196
// Otherwise, there's no dependence.
1197
//
1198
// Can prove independence. Failing that,
1199
// can sometimes refine the directions.
1200
// Can determine iteration for splitting.
1201
//
1202
// Return true if dependence disproved.
1203
bool DependenceInfo::weakCrossingSIVtest(
1204
    const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
1205
    const Loop *CurLoop, unsigned Level, FullDependence &Result,
1206
29
    Constraint &NewConstraint, const SCEV *&SplitIter) const {
1207
29
  DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1208
29
  DEBUG(dbgs() << "\t    Coeff = " << *Coeff << "\n");
1209
29
  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1210
29
  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1211
29
  ++WeakCrossingSIVapplications;
1212
29
  assert(0 < Level && Level <= CommonLevels && "Level out of range");
1213
29
  Level--;
1214
29
  Result.Consistent = false;
1215
29
  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1216
29
  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1217
29
  NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
1218
29
  if (
Delta->isZero()29
) {
1219
1
    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
1220
1
    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
1221
1
    ++WeakCrossingSIVsuccesses;
1222
1
    if (
!Result.DV[Level].Direction1
) {
1223
0
      ++WeakCrossingSIVindependence;
1224
0
      return true;
1225
0
    }
1226
1
    Result.DV[Level].Distance = Delta; // = 0
1227
1
    return false;
1228
1
  }
1229
28
  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
1230
28
  if (!ConstCoeff)
1231
0
    return false;
1232
28
1233
28
  Result.DV[Level].Splitable = true;
1234
28
  if (
SE->isKnownNegative(ConstCoeff)28
) {
1235
14
    ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
1236
14
    assert(ConstCoeff &&
1237
14
           "dynamic cast of negative of ConstCoeff should yield constant");
1238
14
    Delta = SE->getNegativeSCEV(Delta);
1239
14
  }
1240
28
  assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
1241
28
1242
28
  // compute SplitIter for use by DependenceInfo::getSplitIteration()
1243
28
  SplitIter = SE->getUDivExpr(
1244
28
      SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
1245
28
      SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
1246
28
  DEBUG(dbgs() << "\t    Split iter = " << *SplitIter << "\n");
1247
28
1248
28
  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1249
28
  if (!ConstDelta)
1250
2
    return false;
1251
26
1252
26
  // We're certain that ConstCoeff > 0; therefore,
1253
26
  // if Delta < 0, then no dependence.
1254
26
  
DEBUG26
(dbgs() << "\t Delta = " << *Delta << "\n");
1255
26
  DEBUG(dbgs() << "\t    ConstCoeff = " << *ConstCoeff << "\n");
1256
26
  if (
SE->isKnownNegative(Delta)26
) {
1257
1
    // No dependence, Delta < 0
1258
1
    ++WeakCrossingSIVindependence;
1259
1
    ++WeakCrossingSIVsuccesses;
1260
1
    return true;
1261
1
  }
1262
25
1263
25
  // We're certain that Delta > 0 and ConstCoeff > 0.
1264
25
  // Check Delta/(2*ConstCoeff) against upper loop bound
1265
25
  
if (const SCEV *25
UpperBound25
= collectUpperBound(CurLoop, Delta->getType())) {
1266
25
    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1267
25
    const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
1268
25
    const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
1269
25
                                    ConstantTwo);
1270
25
    DEBUG(dbgs() << "\t    ML = " << *ML << "\n");
1271
25
    if (
isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)25
) {
1272
1
      // Delta too big, no dependence
1273
1
      ++WeakCrossingSIVindependence;
1274
1
      ++WeakCrossingSIVsuccesses;
1275
1
      return true;
1276
1
    }
1277
24
    
if (24
isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)24
) {
1278
2
      // i = i' = UB
1279
2
      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
1280
2
      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
1281
2
      ++WeakCrossingSIVsuccesses;
1282
2
      if (
!Result.DV[Level].Direction2
) {
1283
0
        ++WeakCrossingSIVindependence;
1284
0
        return true;
1285
0
      }
1286
2
      Result.DV[Level].Splitable = false;
1287
2
      Result.DV[Level].Distance = SE->getZero(Delta->getType());
1288
2
      return false;
1289
2
    }
1290
25
  }
1291
22
1292
22
  // check that Coeff divides Delta
1293
22
  APInt APDelta = ConstDelta->getAPInt();
1294
22
  APInt APCoeff = ConstCoeff->getAPInt();
1295
22
  APInt Distance = APDelta; // these need to be initialzed
1296
22
  APInt Remainder = APDelta;
1297
22
  APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
1298
22
  DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1299
22
  if (
Remainder != 022
) {
1300
1
    // Coeff doesn't divide Delta, no dependence
1301
1
    ++WeakCrossingSIVindependence;
1302
1
    ++WeakCrossingSIVsuccesses;
1303
1
    return true;
1304
1
  }
1305
21
  
DEBUG21
(dbgs() << "\t Distance = " << Distance << "\n");
1306
21
1307
21
  // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1308
21
  APInt Two = APInt(Distance.getBitWidth(), 2, true);
1309
21
  Remainder = Distance.srem(Two);
1310
21
  DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1311
21
  if (
Remainder != 021
) {
1312
5
    // Equal direction isn't possible
1313
5
    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
1314
5
    ++WeakCrossingSIVsuccesses;
1315
5
  }
1316
29
  return false;
1317
29
}
1318
1319
1320
// Kirch's algorithm, from
1321
//
1322
//        Optimizing Supercompilers for Supercomputers
1323
//        Michael Wolfe
1324
//        MIT Press, 1989
1325
//
1326
// Program 2.1, page 29.
1327
// Computes the GCD of AM and BM.
1328
// Also finds a solution to the equation ax - by = gcd(a, b).
1329
// Returns true if dependence disproved; i.e., gcd does not divide Delta.
1330
static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
1331
62
                    const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
1332
62
  APInt A0(Bits, 1, true), A1(Bits, 0, true);
1333
62
  APInt B0(Bits, 0, true), B1(Bits, 1, true);
1334
62
  APInt G0 = AM.abs();
1335
62
  APInt G1 = BM.abs();
1336
62
  APInt Q = G0; // these need to be initialized
1337
62
  APInt R = G0;
1338
62
  APInt::sdivrem(G0, G1, Q, R);
1339
71
  while (
R != 071
) {
1340
9
    APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
1341
9
    APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
1342
9
    G0 = G1; G1 = R;
1343
9
    APInt::sdivrem(G0, G1, Q, R);
1344
9
  }
1345
62
  G = G1;
1346
62
  DEBUG(dbgs() << "\t    GCD = " << G << "\n");
1347
62
  X = AM.slt(0) ? 
-A111
:
A151
;
1348
62
  Y = BM.slt(0) ? 
B112
:
-B150
;
1349
62
1350
62
  // make sure gcd divides Delta
1351
62
  R = Delta.srem(G);
1352
62
  if (R != 0)
1353
4
    return true; // gcd doesn't divide Delta, no dependence
1354
58
  Q = Delta.sdiv(G);
1355
58
  X *= Q;
1356
58
  Y *= Q;
1357
58
  return false;
1358
58
}
1359
1360
173
static APInt floorOfQuotient(const APInt &A, const APInt &B) {
1361
173
  APInt Q = A; // these need to be initialized
1362
173
  APInt R = A;
1363
173
  APInt::sdivrem(A, B, Q, R);
1364
173
  if (R == 0)
1365
120
    return Q;
1366
53
  
if (53
(A.sgt(0) && 53
B.sgt(0)27
) ||
1367
38
      
(A.slt(0) && 38
B.slt(0)26
))
1368
41
    return Q;
1369
53
  else
1370
12
    return Q - 1;
1371
0
}
1372
1373
187
static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
1374
187
  APInt Q = A; // these need to be initialized
1375
187
  APInt R = A;
1376
187
  APInt::sdivrem(A, B, Q, R);
1377
187
  if (R == 0)
1378
134
    return Q;
1379
53
  
if (53
(A.sgt(0) && 53
B.sgt(0)45
) ||
1380
14
      
(A.slt(0) && 14
B.slt(0)8
))
1381
39
    return Q + 1;
1382
53
  else
1383
14
    return Q;
1384
0
}
1385
1386
1387
static
1388
187
APInt maxAPInt(APInt A, APInt B) {
1389
187
  return A.sgt(B) ? 
A10
:
B177
;
1390
187
}
1391
1392
1393
static
1394
173
APInt minAPInt(APInt A, APInt B) {
1395
173
  return A.slt(B) ? 
A10
:
B163
;
1396
173
}
1397
1398
1399
// exactSIVtest -
1400
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1401
// where i is an induction variable, c1 and c2 are loop invariant, and a1
1402
// and a2 are constant, we can solve it exactly using an algorithm developed
1403
// by Banerjee and Wolfe. See Section 2.5.3 in
1404
//
1405
//        Optimizing Supercompilers for Supercomputers
1406
//        Michael Wolfe
1407
//        MIT Press, 1989
1408
//
1409
// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1410
// so use them if possible. They're also a bit better with symbolics and,
1411
// in the case of the strong SIV test, can compute Distances.
1412
//
1413
// Return true if dependence disproved.
1414
bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1415
                                  const SCEV *SrcConst, const SCEV *DstConst,
1416
                                  const Loop *CurLoop, unsigned Level,
1417
                                  FullDependence &Result,
1418
53
                                  Constraint &NewConstraint) const {
1419
53
  DEBUG(dbgs() << "\tExact SIV test\n");
1420
53
  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
1421
53
  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
1422
53
  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1423
53
  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1424
53
  ++ExactSIVapplications;
1425
53
  assert(0 < Level && Level <= CommonLevels && "Level out of range");
1426
53
  Level--;
1427
53
  Result.Consistent = false;
1428
53
  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1429
53
  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1430
53
  NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff),
1431
53
                        Delta, CurLoop);
1432
53
  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1433
53
  const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1434
53
  const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1435
53
  if (
!ConstDelta || 53
!ConstSrcCoeff41
||
!ConstDstCoeff41
)
1436
12
    return false;
1437
41
1438
41
  // find gcd
1439
41
  APInt G, X, Y;
1440
41
  APInt AM = ConstSrcCoeff->getAPInt();
1441
41
  APInt BM = ConstDstCoeff->getAPInt();
1442
41
  unsigned Bits = AM.getBitWidth();
1443
41
  if (
findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)41
) {
1444
3
    // gcd doesn't divide Delta, no dependence
1445
3
    ++ExactSIVindependence;
1446
3
    ++ExactSIVsuccesses;
1447
3
    return true;
1448
3
  }
1449
38
1450
38
  
DEBUG38
(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1451
38
1452
38
  // since SCEV construction normalizes, LM = 0
1453
38
  APInt UM(Bits, 1, true);
1454
38
  bool UMvalid = false;
1455
38
  // UM is perhaps unavailable, let's check
1456
38
  if (const SCEVConstant *CUB =
1457
36
      collectConstantUpperBound(CurLoop, Delta->getType())) {
1458
36
    UM = CUB->getAPInt();
1459
36
    DEBUG(dbgs() << "\t    UM = " << UM << "\n");
1460
36
    UMvalid = true;
1461
36
  }
1462
38
1463
38
  APInt TU(APInt::getSignedMaxValue(Bits));
1464
38
  APInt TL(APInt::getSignedMinValue(Bits));
1465
38
1466
38
  // test(BM/G, LM-X) and test(-BM/G, X-UM)
1467
38
  APInt TMUL = BM.sdiv(G);
1468
38
  if (
TMUL.sgt(0)38
) {
1469
32
    TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
1470
32
    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1471
32
    if (
UMvalid32
) {
1472
30
      TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL));
1473
30
      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1474
30
    }
1475
32
  }
1476
6
  else {
1477
6
    TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
1478
6
    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1479
6
    if (
UMvalid6
) {
1480
6
      TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL));
1481
6
      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1482
6
    }
1483
6
  }
1484
38
1485
38
  // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
1486
38
  TMUL = AM.sdiv(G);
1487
38
  if (
TMUL.sgt(0)38
) {
1488
32
    TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
1489
32
    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1490
32
    if (
UMvalid32
) {
1491
30
      TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL));
1492
30
      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1493
30
    }
1494
32
  }
1495
6
  else {
1496
6
    TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
1497
6
    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1498
6
    if (
UMvalid6
) {
1499
6
      TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL));
1500
6
      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1501
6
    }
1502
6
  }
1503
38
  if (
TL.sgt(TU)38
) {
1504
2
    ++ExactSIVindependence;
1505
2
    ++ExactSIVsuccesses;
1506
2
    return true;
1507
2
  }
1508
36
1509
36
  // explore directions
1510
36
  unsigned NewDirection = Dependence::DVEntry::NONE;
1511
36
1512
36
  // less than
1513
36
  APInt SaveTU(TU); // save these
1514
36
  APInt SaveTL(TL);
1515
36
  DEBUG(dbgs() << "\t    exploring LT direction\n");
1516
36
  TMUL = AM - BM;
1517
36
  if (
TMUL.sgt(0)36
) {
1518
26
    TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL));
1519
26
    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1520
26
  }
1521
10
  else {
1522
10
    TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL));
1523
10
    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1524
10
  }
1525
36
  if (
TL.sle(TU)36
) {
1526
23
    NewDirection |= Dependence::DVEntry::LT;
1527
23
    ++ExactSIVsuccesses;
1528
23
  }
1529
36
1530
36
  // equal
1531
36
  TU = SaveTU; // restore
1532
36
  TL = SaveTL;
1533
36
  DEBUG(dbgs() << "\t    exploring EQ direction\n");
1534
36
  if (
TMUL.sgt(0)36
) {
1535
26
    TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL));
1536
26
    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1537
26
  }
1538
10
  else {
1539
10
    TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL));
1540
10
    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1541
10
  }
1542
36
  TMUL = BM - AM;
1543
36
  if (
TMUL.sgt(0)36
) {
1544
10
    TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL));
1545
10
    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1546
10
  }
1547
26
  else {
1548
26
    TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL));
1549
26
    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1550
26
  }
1551
36
  if (
TL.sle(TU)36
) {
1552
29
    NewDirection |= Dependence::DVEntry::EQ;
1553
29
    ++ExactSIVsuccesses;
1554
29
  }
1555
36
1556
36
  // greater than
1557
36
  TU = SaveTU; // restore
1558
36
  TL = SaveTL;
1559
36
  DEBUG(dbgs() << "\t    exploring GT direction\n");
1560
36
  if (
TMUL.sgt(0)36
) {
1561
10
    TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL));
1562
10
    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1563
10
  }
1564
26
  else {
1565
26
    TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL));
1566
26
    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1567
26
  }
1568
36
  if (
TL.sle(TU)36
) {
1569
35
    NewDirection |= Dependence::DVEntry::GT;
1570
35
    ++ExactSIVsuccesses;
1571
35
  }
1572
36
1573
36
  // finished
1574
36
  Result.DV[Level].Direction &= NewDirection;
1575
36
  if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
1576
1
    ++ExactSIVindependence;
1577
53
  return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
1578
53
}
1579
1580
1581
1582
// Return true if the divisor evenly divides the dividend.
1583
static
1584
bool isRemainderZero(const SCEVConstant *Dividend,
1585
14
                     const SCEVConstant *Divisor) {
1586
14
  const APInt &ConstDividend = Dividend->getAPInt();
1587
14
  const APInt &ConstDivisor = Divisor->getAPInt();
1588
14
  return ConstDividend.srem(ConstDivisor) == 0;
1589
14
}
1590
1591
1592
// weakZeroSrcSIVtest -
1593
// From the paper, Practical Dependence Testing, Section 4.2.2
1594
//
1595
// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1596
// where i is an induction variable, c1 and c2 are loop invariant,
1597
// and a is a constant, we can solve it exactly using the
1598
// Weak-Zero SIV test.
1599
//
1600
// Given
1601
//
1602
//    c1 = c2 + a*i
1603
//
1604
// we get
1605
//
1606
//    (c1 - c2)/a = i
1607
//
1608
// If i is not an integer, there's no dependence.
1609
// If i < 0 or > UB, there's no dependence.
1610
// If i = 0, the direction is <= and peeling the
1611
// 1st iteration will break the dependence.
1612
// If i = UB, the direction is >= and peeling the
1613
// last iteration will break the dependence.
1614
// Otherwise, the direction is *.
1615
//
1616
// Can prove independence. Failing that, we can sometimes refine
1617
// the directions. Can sometimes show that first or last
1618
// iteration carries all the dependences (so worth peeling).
1619
//
1620
// (see also weakZeroDstSIVtest)
1621
//
1622
// Return true if dependence disproved.
1623
bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
1624
                                        const SCEV *SrcConst,
1625
                                        const SCEV *DstConst,
1626
                                        const Loop *CurLoop, unsigned Level,
1627
                                        FullDependence &Result,
1628
13
                                        Constraint &NewConstraint) const {
1629
13
  // For the WeakSIV test, it's possible the loop isn't common to
1630
13
  // the Src and Dst loops. If it isn't, then there's no need to
1631
13
  // record a direction.
1632
13
  DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1633
13
  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << "\n");
1634
13
  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1635
13
  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1636
13
  ++WeakZeroSIVapplications;
1637
13
  assert(0 < Level && Level <= MaxLevels && "Level out of range");
1638
13
  Level--;
1639
13
  Result.Consistent = false;
1640
13
  const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1641
13
  NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
1642
13
                        CurLoop);
1643
13
  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1644
13
  if (
isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)13
) {
1645
5
    if (
Level < CommonLevels5
) {
1646
4
      Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1647
4
      Result.DV[Level].PeelFirst = true;
1648
4
      ++WeakZeroSIVsuccesses;
1649
4
    }
1650
5
    return false; // dependences caused by first iteration
1651
5
  }
1652
8
  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1653
8
  if (!ConstCoeff)
1654
0
    return false;
1655
8
  const SCEV *AbsCoeff =
1656
8
    SE->isKnownNegative(ConstCoeff) ?
1657
8
    
SE->getNegativeSCEV(ConstCoeff)0
:
ConstCoeff8
;
1658
8
  const SCEV *NewDelta =
1659
8
    SE->isKnownNegative(ConstCoeff) ? 
SE->getNegativeSCEV(Delta)0
:
Delta8
;
1660
8
1661
8
  // check that Delta/SrcCoeff < iteration count
1662
8
  // really check NewDelta < count*AbsCoeff
1663
8
  if (const SCEV *
UpperBound8
= collectUpperBound(CurLoop, Delta->getType())) {
1664
8
    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1665
8
    const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1666
8
    if (
isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)8
) {
1667
1
      ++WeakZeroSIVindependence;
1668
1
      ++WeakZeroSIVsuccesses;
1669
1
      return true;
1670
1
    }
1671
7
    
if (7
isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)7
) {
1672
1
      // dependences caused by last iteration
1673
1
      if (
Level < CommonLevels1
) {
1674
1
        Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1675
1
        Result.DV[Level].PeelLast = true;
1676
1
        ++WeakZeroSIVsuccesses;
1677
1
      }
1678
1
      return false;
1679
1
    }
1680
6
  }
1681
6
1682
6
  // check that Delta/SrcCoeff >= 0
1683
6
  // really check that NewDelta >= 0
1684
6
  
if (6
SE->isKnownNegative(NewDelta)6
) {
1685
2
    // No dependence, newDelta < 0
1686
2
    ++WeakZeroSIVindependence;
1687
2
    ++WeakZeroSIVsuccesses;
1688
2
    return true;
1689
2
  }
1690
4
1691
4
  // if SrcCoeff doesn't divide Delta, then no dependence
1692
4
  
if (4
isa<SCEVConstant>(Delta) &&
1693
4
      
!isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)4
) {
1694
1
    ++WeakZeroSIVindependence;
1695
1
    ++WeakZeroSIVsuccesses;
1696
1
    return true;
1697
1
  }
1698
3
  return false;
1699
3
}
1700
1701
1702
// weakZeroDstSIVtest -
1703
// From the paper, Practical Dependence Testing, Section 4.2.2
1704
//
1705
// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
1706
// where i is an induction variable, c1 and c2 are loop invariant,
1707
// and a is a constant, we can solve it exactly using the
1708
// Weak-Zero SIV test.
1709
//
1710
// Given
1711
//
1712
//    c1 + a*i = c2
1713
//
1714
// we get
1715
//
1716
//    i = (c2 - c1)/a
1717
//
1718
// If i is not an integer, there's no dependence.
1719
// If i < 0 or > UB, there's no dependence.
1720
// If i = 0, the direction is <= and peeling the
1721
// 1st iteration will break the dependence.
1722
// If i = UB, the direction is >= and peeling the
1723
// last iteration will break the dependence.
1724
// Otherwise, the direction is *.
1725
//
1726
// Can prove independence. Failing that, we can sometimes refine
1727
// the directions. Can sometimes show that first or last
1728
// iteration carries all the dependences (so worth peeling).
1729
//
1730
// (see also weakZeroSrcSIVtest)
1731
//
1732
// Return true if dependence disproved.
1733
bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
1734
                                        const SCEV *SrcConst,
1735
                                        const SCEV *DstConst,
1736
                                        const Loop *CurLoop, unsigned Level,
1737
                                        FullDependence &Result,
1738
20
                                        Constraint &NewConstraint) const {
1739
20
  // For the WeakSIV test, it's possible the loop isn't common to the
1740
20
  // Src and Dst loops. If it isn't, then there's no need to record a direction.
1741
20
  DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1742
20
  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << "\n");
1743
20
  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1744
20
  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1745
20
  ++WeakZeroSIVapplications;
1746
20
  assert(0 < Level && Level <= SrcLevels && "Level out of range");
1747
20
  Level--;
1748
20
  Result.Consistent = false;
1749
20
  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1750
20
  NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
1751
20
                        CurLoop);
1752
20
  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1753
20
  if (
isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)20
) {
1754
5
    if (
Level < CommonLevels5
) {
1755
3
      Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1756
3
      Result.DV[Level].PeelFirst = true;
1757
3
      ++WeakZeroSIVsuccesses;
1758
3
    }
1759
5
    return false; // dependences caused by first iteration
1760
5
  }
1761
15
  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1762
15
  if (!ConstCoeff)
1763
0
    return false;
1764
15
  const SCEV *AbsCoeff =
1765
15
    SE->isKnownNegative(ConstCoeff) ?
1766
15
    
SE->getNegativeSCEV(ConstCoeff)0
:
ConstCoeff15
;
1767
15
  const SCEV *NewDelta =
1768
15
    SE->isKnownNegative(ConstCoeff) ? 
SE->getNegativeSCEV(Delta)0
:
Delta15
;
1769
15
1770
15
  // check that Delta/SrcCoeff < iteration count
1771
15
  // really check NewDelta < count*AbsCoeff
1772
15
  if (const SCEV *
UpperBound15
= collectUpperBound(CurLoop, Delta->getType())) {
1773
15
    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1774
15
    const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1775
15
    if (
isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)15
) {
1776
1
      ++WeakZeroSIVindependence;
1777
1
      ++WeakZeroSIVsuccesses;
1778
1
      return true;
1779
1
    }
1780
14
    
if (14
isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)14
) {
1781
1
      // dependences caused by last iteration
1782
1
      if (
Level < CommonLevels1
) {
1783
1
        Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1784
1
        Result.DV[Level].PeelLast = true;
1785
1
        ++WeakZeroSIVsuccesses;
1786
1
      }
1787
1
      return false;
1788
1
    }
1789
13
  }
1790
13
1791
13
  // check that Delta/SrcCoeff >= 0
1792
13
  // really check that NewDelta >= 0
1793
13
  
if (13
SE->isKnownNegative(NewDelta)13
) {
1794
1
    // No dependence, newDelta < 0
1795
1
    ++WeakZeroSIVindependence;
1796
1
    ++WeakZeroSIVsuccesses;
1797
1
    return true;
1798
1
  }
1799
12
1800
12
  // if SrcCoeff doesn't divide Delta, then no dependence
1801
12
  
if (12
isa<SCEVConstant>(Delta) &&
1802
12
      
!isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)10
) {
1803
1
    ++WeakZeroSIVindependence;
1804
1
    ++WeakZeroSIVsuccesses;
1805
1
    return true;
1806
1
  }
1807
11
  return false;
1808
11
}
1809
1810
1811
// exactRDIVtest - Tests the RDIV subscript pair for dependence.
1812
// Things of the form [c1 + a*i] and [c2 + b*j],
1813
// where i and j are induction variable, c1 and c2 are loop invariant,
1814
// and a and b are constants.
1815
// Returns true if any possible dependence is disproved.
1816
// Marks the result as inconsistent.
1817
// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
1818
bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1819
                                   const SCEV *SrcConst, const SCEV *DstConst,
1820
                                   const Loop *SrcLoop, const Loop *DstLoop,
1821
29
                                   FullDependence &Result) const {
1822
29
  DEBUG(dbgs() << "\tExact RDIV test\n");
1823
29
  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
1824
29
  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
1825
29
  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1826
29
  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1827
29
  ++ExactRDIVapplications;
1828
29
  Result.Consistent = false;
1829
29
  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1830
29
  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1831
29
  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1832
29
  const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1833
29
  const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1834
29
  if (
!ConstDelta || 29
!ConstSrcCoeff21
||
!ConstDstCoeff21
)
1835
8
    return false;
1836
21
1837
21
  // find gcd
1838
21
  APInt G, X, Y;
1839
21
  APInt AM = ConstSrcCoeff->getAPInt();
1840
21
  APInt BM = ConstDstCoeff->getAPInt();
1841
21
  unsigned Bits = AM.getBitWidth();
1842
21
  if (
findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)21
) {
1843
1
    // gcd doesn't divide Delta, no dependence
1844
1
    ++ExactRDIVindependence;
1845
1
    return true;
1846
1
  }
1847
20
1848
20
  
DEBUG20
(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1849
20
1850
20
  // since SCEV construction seems to normalize, LM = 0
1851
20
  APInt SrcUM(Bits, 1, true);
1852
20
  bool SrcUMvalid = false;
1853
20
  // SrcUM is perhaps unavailable, let's check
1854
20
  if (const SCEVConstant *UpperBound =
1855
14
      collectConstantUpperBound(SrcLoop, Delta->getType())) {
1856
14
    SrcUM = UpperBound->getAPInt();
1857
14
    DEBUG(dbgs() << "\t    SrcUM = " << SrcUM << "\n");
1858
14
    SrcUMvalid = true;
1859
14
  }
1860
20
1861
20
  APInt DstUM(Bits, 1, true);
1862
20
  bool DstUMvalid = false;
1863
20
  // UM is perhaps unavailable, let's check
1864
20
  if (const SCEVConstant *UpperBound =
1865
14
      collectConstantUpperBound(DstLoop, Delta->getType())) {
1866
14
    DstUM = UpperBound->getAPInt();
1867
14
    DEBUG(dbgs() << "\t    DstUM = " << DstUM << "\n");
1868
14
    DstUMvalid = true;
1869
14
  }
1870
20
1871
20
  APInt TU(APInt::getSignedMaxValue(Bits));
1872
20
  APInt TL(APInt::getSignedMinValue(Bits));
1873
20
1874
20
  // test(BM/G, LM-X) and test(-BM/G, X-UM)
1875
20
  APInt TMUL = BM.sdiv(G);
1876
20
  if (
TMUL.sgt(0)20
) {
1877
14
    TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
1878
14
    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1879
14
    if (
SrcUMvalid14
) {
1880
9
      TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL));
1881
9
      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1882
9
    }
1883
14
  }
1884
6
  else {
1885
6
    TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
1886
6
    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1887
6
    if (
SrcUMvalid6
) {
1888
5
      TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL));
1889
5
      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1890
5
    }
1891
6
  }
1892
20
1893
20
  // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
1894
20
  TMUL = AM.sdiv(G);
1895
20
  if (
TMUL.sgt(0)20
) {
1896
15
    TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
1897
15
    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1898
15
    if (
DstUMvalid15
) {
1899
9
      TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL));
1900
9
      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1901
9
    }
1902
15
  }
1903
5
  else {
1904
5
    TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
1905
5
    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1906
5
    if (
DstUMvalid5
) {
1907
5
      TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL));
1908
5
      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1909
5
    }
1910
5
  }
1911
20
  if (TL.sgt(TU))
1912
11
    ++ExactRDIVindependence;
1913
29
  return TL.sgt(TU);
1914
29
}
1915
1916
1917
// symbolicRDIVtest -
1918
// In Section 4.5 of the Practical Dependence Testing paper,the authors
1919
// introduce a special case of Banerjee's Inequalities (also called the
1920
// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
1921
// particularly cases with symbolics. Since it's only able to disprove
1922
// dependence (not compute distances or directions), we'll use it as a
1923
// fall back for the other tests.
1924
//
1925
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
1926
// where i and j are induction variables and c1 and c2 are loop invariants,
1927
// we can use the symbolic tests to disprove some dependences, serving as a
1928
// backup for the RDIV test. Note that i and j can be the same variable,
1929
// letting this test serve as a backup for the various SIV tests.
1930
//
1931
// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
1932
//  0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
1933
// loop bounds for the i and j loops, respectively. So, ...
1934
//
1935
// c1 + a1*i = c2 + a2*j
1936
// a1*i - a2*j = c2 - c1
1937
//
1938
// To test for a dependence, we compute c2 - c1 and make sure it's in the
1939
// range of the maximum and minimum possible values of a1*i - a2*j.
1940
// Considering the signs of a1 and a2, we have 4 possible cases:
1941
//
1942
// 1) If a1 >= 0 and a2 >= 0, then
1943
//        a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
1944
//              -a2*N2 <= c2 - c1 <= a1*N1
1945
//
1946
// 2) If a1 >= 0 and a2 <= 0, then
1947
//        a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
1948
//                  0 <= c2 - c1 <= a1*N1 - a2*N2
1949
//
1950
// 3) If a1 <= 0 and a2 >= 0, then
1951
//        a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
1952
//        a1*N1 - a2*N2 <= c2 - c1 <= 0
1953
//
1954
// 4) If a1 <= 0 and a2 <= 0, then
1955
//        a1*N1 - a2*0  <= c2 - c1 <= a1*0 - a2*N2
1956
//        a1*N1         <= c2 - c1 <=       -a2*N2
1957
//
1958
// return true if dependence disproved
1959
bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
1960
                                      const SCEV *C1, const SCEV *C2,
1961
                                      const Loop *Loop1,
1962
621
                                      const Loop *Loop2) const {
1963
621
  ++SymbolicRDIVapplications;
1964
621
  DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
1965
621
  DEBUG(dbgs() << "\t    A1 = " << *A1);
1966
621
  DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
1967
621
  DEBUG(dbgs() << "\t    A2 = " << *A2 << "\n");
1968
621
  DEBUG(dbgs() << "\t    C1 = " << *C1 << "\n");
1969
621
  DEBUG(dbgs() << "\t    C2 = " << *C2 << "\n");
1970
621
  const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
1971
621
  const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
1972
621
  DEBUG(if (N1) dbgs() << "\t    N1 = " << *N1 << "\n");
1973
621
  DEBUG(if (N2) dbgs() << "\t    N2 = " << *N2 << "\n");
1974
621
  const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
1975
621
  const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
1976
621
  DEBUG(dbgs() << "\t    C2 - C1 = " << *C2_C1 << "\n");
1977
621
  DEBUG(dbgs() << "\t    C1 - C2 = " << *C1_C2 << "\n");
1978
621
  if (
SE->isKnownNonNegative(A1)621
) {
1979
526
    if (
SE->isKnownNonNegative(A2)526
) {
1980
512
      // A1 >= 0 && A2 >= 0
1981
512
      if (
N1512
) {
1982
508
        // make sure that c2 - c1 <= a1*N1
1983
508
        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
1984
508
        DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
1985
508
        if (
isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)508
) {
1986
2
          ++SymbolicRDIVindependence;
1987
2
          return true;
1988
2
        }
1989
510
      }
1990
510
      
if (510
N2510
) {
1991
506
        // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
1992
506
        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
1993
506
        DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
1994
506
        if (
isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)506
) {
1995
2
          ++SymbolicRDIVindependence;
1996
2
          return true;
1997
2
        }
1998
526
      }
1999
512
    }
2000
14
    else 
if (14
SE->isKnownNonPositive(A2)14
) {
2001
14
      // a1 >= 0 && a2 <= 0
2002
14
      if (
N1 && 14
N214
) {
2003
14
        // make sure that c2 - c1 <= a1*N1 - a2*N2
2004
14
        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2005
14
        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2006
14
        const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
2007
14
        DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2008
14
        if (
isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)14
) {
2009
2
          ++SymbolicRDIVindependence;
2010
2
          return true;
2011
2
        }
2012
12
      }
2013
12
      // make sure that 0 <= c2 - c1
2014
12
      
if (12
SE->isKnownNegative(C2_C1)12
) {
2015
0
        ++SymbolicRDIVindependence;
2016
0
        return true;
2017
0
      }
2018
621
    }
2019
526
  }
2020
95
  else 
if (95
SE->isKnownNonPositive(A1)95
) {
2021
79
    if (
SE->isKnownNonNegative(A2)79
) {
2022
16
      // a1 <= 0 && a2 >= 0
2023
16
      if (
N1 && 16
N216
) {
2024
16
        // make sure that a1*N1 - a2*N2 <= c2 - c1
2025
16
        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2026
16
        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2027
16
        const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
2028
16
        DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2029
16
        if (
isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)16
) {
2030
2
          ++SymbolicRDIVindependence;
2031
2
          return true;
2032
2
        }
2033
14
      }
2034
14
      // make sure that c2 - c1 <= 0
2035
14
      
if (14
SE->isKnownPositive(C2_C1)14
) {
2036
0
        ++SymbolicRDIVindependence;
2037
0
        return true;
2038
0
      }
2039
79
    }
2040
63
    else 
if (63
SE->isKnownNonPositive(A2)63
) {
2041
63
      // a1 <= 0 && a2 <= 0
2042
63
      if (
N163
) {
2043
63
        // make sure that a1*N1 <= c2 - c1
2044
63
        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2045
63
        DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
2046
63
        if (
isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)63
) {
2047
2
          ++SymbolicRDIVindependence;
2048
2
          return true;
2049
2
        }
2050
61
      }
2051
61
      
if (61
N261
) {
2052
61
        // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
2053
61
        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2054
61
        DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
2055
61
        if (
isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)61
) {
2056
3
          ++SymbolicRDIVindependence;
2057
3
          return true;
2058
3
        }
2059
608
      }
2060
63
    }
2061
95
  }
2062
608
  return false;
2063
608
}
2064
2065
2066
// testSIV -
2067
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
2068
// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
2069
// a2 are constant, we attack it with an SIV test. While they can all be
2070
// solved with the Exact SIV test, it's worthwhile to use simpler tests when
2071
// they apply; they're cheaper and sometimes more precise.
2072
//
2073
// Return true if dependence disproved.
2074
bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
2075
                             FullDependence &Result, Constraint &NewConstraint,
2076
650
                             const SCEV *&SplitIter) const {
2077
650
  DEBUG(dbgs() << "    src = " << *Src << "\n");
2078
650
  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2079
650
  const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2080
650
  const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2081
650
  if (
SrcAddRec && 650
DstAddRec637
) {
2082
617
    const SCEV *SrcConst = SrcAddRec->getStart();
2083
617
    const SCEV *DstConst = DstAddRec->getStart();
2084
617
    const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2085
617
    const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2086
617
    const Loop *CurLoop = SrcAddRec->getLoop();
2087
617
    assert(CurLoop == DstAddRec->getLoop() &&
2088
617
           "both loops in SIV should be same");
2089
617
    Level = mapSrcLoop(CurLoop);
2090
617
    bool disproven;
2091
617
    if (SrcCoeff == DstCoeff)
2092
535
      disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2093
535
                                Level, Result, NewConstraint);
2094
82
    else 
if (82
SrcCoeff == SE->getNegativeSCEV(DstCoeff)82
)
2095
29
      disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2096
29
                                      Level, Result, NewConstraint, SplitIter);
2097
82
    else
2098
53
      disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
2099
53
                               Level, Result, NewConstraint);
2100
617
    return disproven ||
2101
605
      gcdMIVtest(Src, Dst, Result) ||
2102
604
      symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
2103
617
  }
2104
33
  
if (33
SrcAddRec33
) {
2105
20
    const SCEV *SrcConst = SrcAddRec->getStart();
2106
20
    const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2107
20
    const SCEV *DstConst = Dst;
2108
20
    const Loop *CurLoop = SrcAddRec->getLoop();
2109
20
    Level = mapSrcLoop(CurLoop);
2110
20
    return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2111
20
                              Level, Result, NewConstraint) ||
2112
17
      gcdMIVtest(Src, Dst, Result);
2113
20
  }
2114
13
  
if (13
DstAddRec13
) {
2115
13
    const SCEV *DstConst = DstAddRec->getStart();
2116
13
    const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2117
13
    const SCEV *SrcConst = Src;
2118
13
    const Loop *CurLoop = DstAddRec->getLoop();
2119
13
    Level = mapDstLoop(CurLoop);
2120
13
    return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
2121
13
                              CurLoop, Level, Result, NewConstraint) ||
2122
9
      gcdMIVtest(Src, Dst, Result);
2123
13
  }
2124
0
  
llvm_unreachable0
("SIV test expected at least one AddRec");
2125
0
  return false;
2126
650
}
2127
2128
2129
// testRDIV -
2130
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2131
// where i and j are induction variables, c1 and c2 are loop invariant,
2132
// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2133
// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2134
// It doesn't make sense to talk about distance or direction in this case,
2135
// so there's no point in making special versions of the Strong SIV test or
2136
// the Weak-crossing SIV test.
2137
//
2138
// With minor algebra, this test can also be used for things like
2139
// [c1 + a1*i + a2*j][c2].
2140
//
2141
// Return true if dependence disproved.
2142
bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
2143
29
                              FullDependence &Result) const {
2144
29
  // we have 3 possible situations here:
2145
29
  //   1) [a*i + b] and [c*j + d]
2146
29
  //   2) [a*i + c*j + b] and [d]
2147
29
  //   3) [b] and [a*i + c*j + d]
2148
29
  // We need to find what we've got and get organized
2149
29
2150
29
  const SCEV *SrcConst, *DstConst;
2151
29
  const SCEV *SrcCoeff, *DstCoeff;
2152
29
  const Loop *SrcLoop, *DstLoop;
2153
29
2154
29
  DEBUG(dbgs() << "    src = " << *Src << "\n");
2155
29
  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2156
29
  const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2157
29
  const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2158
29
  if (
SrcAddRec && 29
DstAddRec27
) {
2159
20
    SrcConst = SrcAddRec->getStart();
2160
20
    SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2161
20
    SrcLoop = SrcAddRec->getLoop();
2162
20
    DstConst = DstAddRec->getStart();
2163
20
    DstCoeff = DstAddRec->getStepRecurrence(*SE);
2164
20
    DstLoop = DstAddRec->getLoop();
2165
20
  }
2166
9
  else 
if (9
SrcAddRec9
) {
2167
7
    if (const SCEVAddRecExpr *tmpAddRec =
2168
7
        dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
2169
7
      SrcConst = tmpAddRec->getStart();
2170
7
      SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
2171
7
      SrcLoop = tmpAddRec->getLoop();
2172
7
      DstConst = Dst;
2173
7
      DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
2174
7
      DstLoop = SrcAddRec->getLoop();
2175
7
    }
2176
7
    else
2177
0
      llvm_unreachable("RDIV reached by surprising SCEVs");
2178
7
  }
2179
2
  else 
if (2
DstAddRec2
) {
2180
2
    if (const SCEVAddRecExpr *tmpAddRec =
2181
2
        dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
2182
2
      DstConst = tmpAddRec->getStart();
2183
2
      DstCoeff = tmpAddRec->getStepRecurrence(*SE);
2184
2
      DstLoop = tmpAddRec->getLoop();
2185
2
      SrcConst = Src;
2186
2
      SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
2187
2
      SrcLoop = DstAddRec->getLoop();
2188
2
    }
2189
2
    else
2190
0
      llvm_unreachable("RDIV reached by surprising SCEVs");
2191
2
  }
2192
2
  else
2193
0
    llvm_unreachable("RDIV expected at least one AddRec");
2194
29
  return exactRDIVtest(SrcCoeff, DstCoeff,
2195
29
                       SrcConst, DstConst,
2196
29
                       SrcLoop, DstLoop,
2197
29
                       Result) ||
2198
17
    gcdMIVtest(Src, Dst, Result) ||
2199
17
    symbolicRDIVtest(SrcCoeff, DstCoeff,
2200
17
                     SrcConst, DstConst,
2201
17
                     SrcLoop, DstLoop);
2202
29
}
2203
2204
2205
// Tests the single-subscript MIV pair (Src and Dst) for dependence.
2206
// Return true if dependence disproved.
2207
// Can sometimes refine direction vectors.
2208
bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
2209
                             const SmallBitVector &Loops,
2210
200
                             FullDependence &Result) const {
2211
200
  DEBUG(dbgs() << "    src = " << *Src << "\n");
2212
200
  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2213
200
  Result.Consistent = false;
2214
200
  return gcdMIVtest(Src, Dst, Result) ||
2215
191
    banerjeeMIVtest(Src, Dst, Loops, Result);
2216
200
}
2217
2218
2219
// Given a product, e.g., 10*X*Y, returns the first constant operand,
2220
// in this case 10. If there is no constant part, returns NULL.
2221
static
2222
2.37k
const SCEVConstant *getConstantPart(const SCEV *Expr) {
2223
2.37k
  if (const auto *Constant = dyn_cast<SCEVConstant>(Expr))
2224
2.29k
    return Constant;
2225
76
  else 
if (const auto *76
Product76
= dyn_cast<SCEVMulExpr>(Expr))
2226
66
    
if (const auto *66
Constant66
= dyn_cast<SCEVConstant>(Product->getOperand(0)))
2227
65
      return Constant;
2228
11
  return nullptr;
2229
11
}
2230
2231
2232
//===----------------------------------------------------------------------===//
2233
// gcdMIVtest -
2234
// Tests an MIV subscript pair for dependence.
2235
// Returns true if any possible dependence is disproved.
2236
// Marks the result as inconsistent.
2237
// Can sometimes disprove the equal direction for 1 or more loops,
2238
// as discussed in Michael Wolfe's book,
2239
// High Performance Compilers for Parallel Computing, page 235.
2240
//
2241
// We spend some effort (code!) to handle cases like
2242
// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
2243
// but M and N are just loop-invariant variables.
2244
// This should help us handle linearized subscripts;
2245
// also makes this test a useful backup to the various SIV tests.
2246
//
2247
// It occurs to me that the presence of loop-invariant variables
2248
// changes the nature of the test from "greatest common divisor"
2249
// to "a common divisor".
2250
bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
2251
848
                                FullDependence &Result) const {
2252
848
  DEBUG(dbgs() << "starting gcd\n");
2253
848
  ++GCDapplications;
2254
848
  unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
2255
848
  APInt RunningGCD = APInt::getNullValue(BitWidth);
2256
848
2257
848
  // Examine Src coefficients.
2258
848
  // Compute running GCD and record source constant.
2259
848
  // Because we're looking for the constant at the end of the chain,
2260
848
  // we can't quit the loop just because the GCD == 1.
2261
848
  const SCEV *Coefficients = Src;
2262
1.88k
  while (const SCEVAddRecExpr *AddRec =
2263
1.04k
         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2264
1.04k
    const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2265
1.04k
    // If the coefficient is the product of a constant and other stuff,
2266
1.04k
    // we can use the constant in the GCD computation.
2267
1.04k
    const auto *Constant = getConstantPart(Coeff);
2268
1.04k
    if (!Constant)
2269
9
      return false;
2270
1.03k
    APInt ConstCoeff = Constant->getAPInt();
2271
1.03k
    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2272
1.03k
    Coefficients = AddRec->getStart();
2273
1.03k
  }
2274
839
  const SCEV *SrcConst = Coefficients;
2275
839
2276
839
  // Examine Dst coefficients.
2277
839
  // Compute running GCD and record destination constant.
2278
839
  // Because we're looking for the constant at the end of the chain,
2279
839
  // we can't quit the loop just because the GCD == 1.
2280
839
  Coefficients = Dst;
2281
1.86k
  while (const SCEVAddRecExpr *AddRec =
2282
1.02k
         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2283
1.02k
    const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2284
1.02k
    // If the coefficient is the product of a constant and other stuff,
2285
1.02k
    // we can use the constant in the GCD computation.
2286
1.02k
    const auto *Constant = getConstantPart(Coeff);
2287
1.02k
    if (!Constant)
2288
1
      return false;
2289
1.02k
    APInt ConstCoeff = Constant->getAPInt();
2290
1.02k
    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2291
1.02k
    Coefficients = AddRec->getStart();
2292
1.02k
  }
2293
838
  const SCEV *DstConst = Coefficients;
2294
838
2295
838
  APInt ExtraGCD = APInt::getNullValue(BitWidth);
2296
838
  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
2297
838
  DEBUG(dbgs() << "    Delta = " << *Delta << "\n");
2298
838
  const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
2299
838
  if (const SCEVAddExpr *
Sum838
= dyn_cast<SCEVAddExpr>(Delta)) {
2300
14
    // If Delta is a sum of products, we may be able to make further progress.
2301
38
    for (unsigned Op = 0, Ops = Sum->getNumOperands(); 
Op < Ops38
;
Op++24
) {
2302
29
      const SCEV *Operand = Sum->getOperand(Op);
2303
29
      if (
isa<SCEVConstant>(Operand)29
) {
2304
12
        assert(!Constant && "Surprised to find multiple constants");
2305
12
        Constant = cast<SCEVConstant>(Operand);
2306
12
      }
2307
17
      else 
if (const SCEVMulExpr *17
Product17
= dyn_cast<SCEVMulExpr>(Operand)) {
2308
12
        // Search for constant operand to participate in GCD;
2309
12
        // If none found; return false.
2310
12
        const SCEVConstant *ConstOp = getConstantPart(Product);
2311
12
        if (!ConstOp)
2312
0
          return false;
2313
12
        APInt ConstOpValue = ConstOp->getAPInt();
2314
12
        ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
2315
12
                                                   ConstOpValue.abs());
2316
12
      }
2317
17
      else
2318
5
        return false;
2319
29
    }
2320
14
  }
2321
833
  
if (833
!Constant833
)
2322
17
    return false;
2323
816
  APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
2324
816
  DEBUG(dbgs() << "    ConstDelta = " << ConstDelta << "\n");
2325
816
  if (ConstDelta == 0)
2326
645
    return false;
2327
171
  RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
2328
171
  DEBUG(dbgs() << "    RunningGCD = " << RunningGCD << "\n");
2329
171
  APInt Remainder = ConstDelta.srem(RunningGCD);
2330
171
  if (
Remainder != 0171
) {
2331
10
    ++GCDindependence;
2332
10
    return true;
2333
10
  }
2334
161
2335
161
  // Try to disprove equal directions.
2336
161
  // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
2337
161
  // the code above can't disprove the dependence because the GCD = 1.
2338
161
  // So we consider what happen if i = i' and what happens if j = j'.
2339
161
  // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
2340
161
  // which is infeasible, so we can disallow the = direction for the i level.
2341
161
  // Setting j = j' doesn't help matters, so we end up with a direction vector
2342
161
  // of [<>, *]
2343
161
  //
2344
161
  // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
2345
161
  // we need to remember that the constant part is 5 and the RunningGCD should
2346
161
  // be initialized to ExtraGCD = 30.
2347
161
  
DEBUG161
(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');
2348
161
2349
161
  bool Improved = false;
2350
161
  Coefficients = Src;
2351
351
  while (const SCEVAddRecExpr *AddRec =
2352
190
         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2353
190
    Coefficients = AddRec->getStart();
2354
190
    const Loop *CurLoop = AddRec->getLoop();
2355
190
    RunningGCD = ExtraGCD;
2356
190
    const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
2357
190
    const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
2358
190
    const SCEV *Inner = Src;
2359
418
    while (
RunningGCD != 1 && 418
isa<SCEVAddRecExpr>(Inner)383
) {
2360
228
      AddRec = cast<SCEVAddRecExpr>(Inner);
2361
228
      const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2362
228
      if (CurLoop == AddRec->getLoop())
2363
162
        ; // SrcCoeff == Coeff
2364
66
      else {
2365
66
        // If the coefficient is the product of a constant and other stuff,
2366
66
        // we can use the constant in the GCD computation.
2367
66
        Constant = getConstantPart(Coeff);
2368
66
        if (!Constant)
2369
0
          return false;
2370
66
        APInt ConstCoeff = Constant->getAPInt();
2371
66
        RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2372
66
      }
2373
228
      Inner = AddRec->getStart();
2374
228
    }
2375
190
    Inner = Dst;
2376
368
    while (
RunningGCD != 1 && 368
isa<SCEVAddRecExpr>(Inner)324
) {
2377
178
      AddRec = cast<SCEVAddRecExpr>(Inner);
2378
178
      const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2379
178
      if (CurLoop == AddRec->getLoop())
2380
139
        DstCoeff = Coeff;
2381
39
      else {
2382
39
        // If the coefficient is the product of a constant and other stuff,
2383
39
        // we can use the constant in the GCD computation.
2384
39
        Constant = getConstantPart(Coeff);
2385
39
        if (!Constant)
2386
0
          return false;
2387
39
        APInt ConstCoeff = Constant->getAPInt();
2388
39
        RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2389
39
      }
2390
178
      Inner = AddRec->getStart();
2391
178
    }
2392
190
    Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
2393
190
    // If the coefficient is the product of a constant and other stuff,
2394
190
    // we can use the constant in the GCD computation.
2395
190
    Constant = getConstantPart(Delta);
2396
190
    if (!Constant)
2397
190
      // The difference of the two coefficients might not be a product
2398
190
      // or constant, in which case we give up on this direction.
2399
1
      continue;
2400
189
    APInt ConstCoeff = Constant->getAPInt();
2401
189
    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2402
189
    DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
2403
189
    if (
RunningGCD != 0189
) {
2404
144
      Remainder = ConstDelta.srem(RunningGCD);
2405
144
      DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
2406
144
      if (
Remainder != 0144
) {
2407
30
        unsigned Level = mapSrcLoop(CurLoop);
2408
30
        Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
2409
30
        Improved = true;
2410
30
      }
2411
144
    }
2412
190
  }
2413
161
  
if (161
Improved161
)
2414
30
    ++GCDsuccesses;
2415
161
  DEBUG(dbgs() << "all done\n");
2416
161
  return false;
2417
848
}
2418
2419
2420
//===----------------------------------------------------------------------===//
2421
// banerjeeMIVtest -
2422
// Use Banerjee's Inequalities to test an MIV subscript pair.
2423
// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2424
// Generally follows the discussion in Section 2.5.2 of
2425
//
2426
//    Optimizing Supercompilers for Supercomputers
2427
//    Michael Wolfe
2428
//
2429
// The inequalities given on page 25 are simplified in that loops are
2430
// normalized so that the lower bound is always 0 and the stride is always 1.
2431
// For example, Wolfe gives
2432
//
2433
//     LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2434
//
2435
// where A_k is the coefficient of the kth index in the source subscript,
2436
// B_k is the coefficient of the kth index in the destination subscript,
2437
// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2438
// index, and N_k is the stride of the kth index. Since all loops are normalized
2439
// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2440
// equation to
2441
//
2442
//     LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2443
//            = (A^-_k - B_k)^- (U_k - 1)  - B_k
2444
//
2445
// Similar simplifications are possible for the other equations.
2446
//
2447
// When we can't determine the number of iterations for a loop,
2448
// we use NULL as an indicator for the worst case, infinity.
2449
// When computing the upper bound, NULL denotes +inf;
2450
// for the lower bound, NULL denotes -inf.
2451
//
2452
// Return true if dependence disproved.
2453
bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
2454
                                     const SmallBitVector &Loops,
2455
191
                                     FullDependence &Result) const {
2456
191
  DEBUG(dbgs() << "starting Banerjee\n");
2457
191
  ++BanerjeeApplications;
2458
191
  DEBUG(dbgs() << "    Src = " << *Src << '\n');
2459
191
  const SCEV *A0;
2460
191
  CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
2461
191
  DEBUG(dbgs() << "    Dst = " << *Dst << '\n');
2462
191
  const SCEV *B0;
2463
191
  CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
2464
191
  BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
2465
191
  const SCEV *Delta = SE->getMinusSCEV(B0, A0);
2466
191
  DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
2467
191
2468
191
  // Compute bounds for all the * directions.
2469
191
  DEBUG(dbgs() << "\tBounds[*]\n");
2470
646
  for (unsigned K = 1; 
K <= MaxLevels646
;
++K455
) {
2471
455
    Bound[K].Iterations = A[K].Iterations ? 
A[K].Iterations384
:
B[K].Iterations71
;
2472
455
    Bound[K].Direction = Dependence::DVEntry::ALL;
2473
455
    Bound[K].DirSet = Dependence::DVEntry::NONE;
2474
455
    findBoundsALL(A, B, Bound, K);
2475
#ifndef NDEBUG
2476
    DEBUG(dbgs() << "\t    " << K << '\t');
2477
    if (Bound[K].Lower[Dependence::DVEntry::ALL])
2478
      DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
2479
    else
2480
      DEBUG(dbgs() << "-inf\t");
2481
    if (Bound[K].Upper[Dependence::DVEntry::ALL])
2482
      DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
2483
    else
2484
      DEBUG(dbgs() << "+inf\n");
2485
#endif
2486
  }
2487
191
2488
191
  // Test the *, *, *, ... case.
2489
191
  bool Disproved = false;
2490
191
  if (
testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)191
) {
2491
187
    // Explore the direction vector hierarchy.
2492
187
    unsigned DepthExpanded = 0;
2493
187
    unsigned NewDeps = exploreDirections(1, A, B, Bound,
2494
187
                                         Loops, DepthExpanded, Delta);
2495
187
    if (
NewDeps > 0187
) {
2496
187
      bool Improved = false;
2497
632
      for (unsigned K = 1; 
K <= CommonLevels632
;
++K445
) {
2498
445
        if (
Loops[K]445
) {
2499
386
          unsigned Old = Result.DV[K - 1].Direction;
2500
386
          Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
2501
386
          Improved |= Old != Result.DV[K - 1].Direction;
2502
386
          if (
!Result.DV[K - 1].Direction386
) {
2503
0
            Improved = false;
2504
0
            Disproved = true;
2505
0
            break;
2506
0
          }
2507
386
        }
2508
445
      }
2509
187
      if (Improved)
2510
128
        ++BanerjeeSuccesses;
2511
187
    }
2512
0
    else {
2513
0
      ++BanerjeeIndependence;
2514
0
      Disproved = true;
2515
0
    }
2516
187
  }
2517
4
  else {
2518
4
    ++BanerjeeIndependence;
2519
4
    Disproved = true;
2520
4
  }
2521
191
  delete [] Bound;
2522
191
  delete [] A;
2523
191
  delete [] B;
2524
191
  return Disproved;
2525
191
}
2526
2527
2528
// Hierarchically expands the direction vector
2529
// search space, combining the directions of discovered dependences
2530
// in the DirSet field of Bound. Returns the number of distinct
2531
// dependences discovered. If the dependence is disproved,
2532
// it will return 0.
2533
unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
2534
                                           CoefficientInfo *B, BoundInfo *Bound,
2535
                                           const SmallBitVector &Loops,
2536
                                           unsigned &DepthExpanded,
2537
1.19k
                                           const SCEV *Delta) const {
2538
1.19k
  if (
Level > CommonLevels1.19k
) {
2539
521
    // record result
2540
521
    DEBUG(dbgs() << "\t[");
2541
2.17k
    for (unsigned K = 1; 
K <= CommonLevels2.17k
;
++K1.65k
) {
2542
1.65k
      if (
Loops[K]1.65k
) {
2543
1.15k
        Bound[K].DirSet |= Bound[K].Direction;
2544
#ifndef NDEBUG
2545
        switch (Bound[K].Direction) {
2546
        case Dependence::DVEntry::LT:
2547
          DEBUG(dbgs() << " <");
2548
          break;
2549
        case Dependence::DVEntry::EQ:
2550
          DEBUG(dbgs() << " =");
2551
          break;
2552
        case Dependence::DVEntry::GT:
2553
          DEBUG(dbgs() << " >");
2554
          break;
2555
        case Dependence::DVEntry::ALL:
2556
          DEBUG(dbgs() << " *");
2557
          break;
2558
        default:
2559
          llvm_unreachable("unexpected Bound[K].Direction");
2560
        }
2561
#endif
2562
      }
2563
1.65k
    }
2564
521
    DEBUG(dbgs() << " ]\n");
2565
521
    return 1;
2566
521
  }
2567
673
  
if (673
Loops[Level]673
) {
2568
562
    if (
Level > DepthExpanded562
) {
2569
386
      DepthExpanded = Level;
2570
386
      // compute bounds for <, =, > at current level
2571
386
      findBoundsLT(A, B, Bound, Level);
2572
386
      findBoundsGT(A, B, Bound, Level);
2573
386
      findBoundsEQ(A, B, Bound, Level);
2574
#ifndef NDEBUG
2575
      DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
2576
      DEBUG(dbgs() << "\t    <\t");
2577
      if (Bound[Level].Lower[Dependence::DVEntry::LT])
2578
        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t');
2579
      else
2580
        DEBUG(dbgs() << "-inf\t");
2581
      if (Bound[Level].Upper[Dependence::DVEntry::LT])
2582
        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n');
2583
      else
2584
        DEBUG(dbgs() << "+inf\n");
2585
      DEBUG(dbgs() << "\t    =\t");
2586
      if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2587
        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t');
2588
      else
2589
        DEBUG(dbgs() << "-inf\t");
2590
      if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2591
        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n');
2592
      else
2593
        DEBUG(dbgs() << "+inf\n");
2594
      DEBUG(dbgs() << "\t    >\t");
2595
      if (Bound[Level].Lower[Dependence::DVEntry::GT])
2596
        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t');
2597
      else
2598
        DEBUG(dbgs() << "-inf\t");
2599
      if (Bound[Level].Upper[Dependence::DVEntry::GT])
2600
        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n');
2601
      else
2602
        DEBUG(dbgs() << "+inf\n");
2603
#endif
2604
    }
2605
562
2606
562
    unsigned NewDeps = 0;
2607
562
2608
562
    // test bounds for <, *, *, ...
2609
562
    if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
2610
228
      NewDeps += exploreDirections(Level + 1, A, B, Bound,
2611
228
                                   Loops, DepthExpanded, Delta);
2612
562
2613
562
    // Test bounds for =, *, *, ...
2614
562
    if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
2615
442
      NewDeps += exploreDirections(Level + 1, A, B, Bound,
2616
442
                                   Loops, DepthExpanded, Delta);
2617
562
2618
562
    // test bounds for >, *, *, ...
2619
562
    if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
2620
226
      NewDeps += exploreDirections(Level + 1, A, B, Bound,
2621
226
                                   Loops, DepthExpanded, Delta);
2622
562
2623
562
    Bound[Level].Direction = Dependence::DVEntry::ALL;
2624
562
    return NewDeps;
2625
562
  }
2626
673
  else
2627
111
    return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
2628
0
}
2629
2630
2631
// Returns true iff the current bounds are plausible.
2632
bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
2633
1.87k
                                BoundInfo *Bound, const SCEV *Delta) const {
2634
1.87k
  Bound[Level].Direction = DirKind;
2635
1.87k
  if (const SCEV *LowerBound = getLowerBound(Bound))
2636
1.87k
    
if (1.87k
isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)1.87k
)
2637
407
      return false;
2638
1.47k
  
if (const SCEV *1.47k
UpperBound1.47k
= getUpperBound(Bound))
2639
1.47k
    
if (1.47k
isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)1.47k
)
2640
387
      return false;
2641
1.08k
  return true;
2642
1.08k
}
2643
2644
2645
// Computes the upper and lower bounds for level K
2646
// using the * direction. Records them in Bound.
2647
// Wolfe gives the equations
2648
//
2649
//    LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2650
//    UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2651
//
2652
// Since we normalize loops, we can simplify these equations to
2653
//
2654
//    LB^*_k = (A^-_k - B^+_k)U_k
2655
//    UB^*_k = (A^+_k - B^-_k)U_k
2656
//
2657
// We must be careful to handle the case where the upper bound is unknown.
2658
// Note that the lower bound is always <= 0
2659
// and the upper bound is always >= 0.
2660
void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,
2661
455
                                   BoundInfo *Bound, unsigned K) const {
2662
455
  Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity.
2663
455
  Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity.
2664
455
  if (
Bound[K].Iterations455
) {
2665
391
    Bound[K].Lower[Dependence::DVEntry::ALL] =
2666
391
      SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
2667
391
                     Bound[K].Iterations);
2668
391
    Bound[K].Upper[Dependence::DVEntry::ALL] =
2669
391
      SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
2670
391
                     Bound[K].Iterations);
2671
391
  }
2672
64
  else {
2673
64
    // If the difference is 0, we won't need to know the number of iterations.
2674
64
    if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
2675
64
      Bound[K].Lower[Dependence::DVEntry::ALL] =
2676
64
          SE->getZero(A[K].Coeff->getType());
2677
64
    if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
2678
64
      Bound[K].Upper[Dependence::DVEntry::ALL] =
2679
64
          SE->getZero(A[K].Coeff->getType());
2680
64
  }
2681
455
}
2682
2683
2684
// Computes the upper and lower bounds for level K
2685
// using the = direction. Records them in Bound.
2686
// Wolfe gives the equations
2687
//
2688
//    LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2689
//    UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2690
//
2691
// Since we normalize loops, we can simplify these equations to
2692
//
2693
//    LB^=_k = (A_k - B_k)^- U_k
2694
//    UB^=_k = (A_k - B_k)^+ U_k
2695
//
2696
// We must be careful to handle the case where the upper bound is unknown.
2697
// Note that the lower bound is always <= 0
2698
// and the upper bound is always >= 0.
2699
void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,
2700
386
                                  BoundInfo *Bound, unsigned K) const {
2701
386
  Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity.
2702
386
  Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity.
2703
386
  if (
Bound[K].Iterations386
) {
2704
382
    const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2705
382
    const SCEV *NegativePart = getNegativePart(Delta);
2706
382
    Bound[K].Lower[Dependence::DVEntry::EQ] =
2707
382
      SE->getMulExpr(NegativePart, Bound[K].Iterations);
2708
382
    const SCEV *PositivePart = getPositivePart(Delta);
2709
382
    Bound[K].Upper[Dependence::DVEntry::EQ] =
2710
382
      SE->getMulExpr(PositivePart, Bound[K].Iterations);
2711
382
  }
2712
4
  else {
2713
4
    // If the positive/negative part of the difference is 0,
2714
4
    // we won't need to know the number of iterations.
2715
4
    const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2716
4
    const SCEV *NegativePart = getNegativePart(Delta);
2717
4
    if (NegativePart->isZero())
2718
4
      Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
2719
4
    const SCEV *PositivePart = getPositivePart(Delta);
2720
4
    if (PositivePart->isZero())
2721
4
      Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
2722
4
  }
2723
386
}
2724
2725
2726
// Computes the upper and lower bounds for level K
2727
// using the < direction. Records them in Bound.
2728
// Wolfe gives the equations
2729
//
2730
//    LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2731
//    UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2732
//
2733
// Since we normalize loops, we can simplify these equations to
2734
//
2735
//    LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2736
//    UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2737
//
2738
// We must be careful to handle the case where the upper bound is unknown.
2739
void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,
2740
386
                                  BoundInfo *Bound, unsigned K) const {
2741
386
  Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
2742
386
  Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
2743
386
  if (
Bound[K].Iterations386
) {
2744
382
    const SCEV *Iter_1 = SE->getMinusSCEV(
2745
382
        Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
2746
382
    const SCEV *NegPart =
2747
382
      getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2748
382
    Bound[K].Lower[Dependence::DVEntry::LT] =
2749
382
      SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
2750
382
    const SCEV *PosPart =
2751
382
      getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2752
382
    Bound[K].Upper[Dependence::DVEntry::LT] =
2753
382
      SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
2754
382
  }
2755
4
  else {
2756
4
    // If the positive/negative part of the difference is 0,
2757
4
    // we won't need to know the number of iterations.
2758
4
    const SCEV *NegPart =
2759
4
      getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2760
4
    if (NegPart->isZero())
2761
4
      Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2762
4
    const SCEV *PosPart =
2763
4
      getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2764
4
    if (PosPart->isZero())
2765
4
      Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2766
4
  }
2767
386
}
2768
2769
2770
// Computes the upper and lower bounds for level K
2771
// using the > direction. Records them in Bound.
2772
// Wolfe gives the equations
2773
//
2774
//    LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2775
//    UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2776
//
2777
// Since we normalize loops, we can simplify these equations to
2778
//
2779
//    LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2780
//    UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2781
//
2782
// We must be careful to handle the case where the upper bound is unknown.
2783
void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,
2784
386
                                  BoundInfo *Bound, unsigned K) const {
2785
386
  Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
2786
386
  Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
2787
386
  if (
Bound[K].Iterations386
) {
2788
382
    const SCEV *Iter_1 = SE->getMinusSCEV(
2789
382
        Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
2790
382
    const SCEV *NegPart =
2791
382
      getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2792
382
    Bound[K].Lower[Dependence::DVEntry::GT] =
2793
382
      SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
2794
382
    const SCEV *PosPart =
2795
382
      getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2796
382
    Bound[K].Upper[Dependence::DVEntry::GT] =
2797
382
      SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
2798
382
  }
2799
4
  else {
2800
4
    // If the positive/negative part of the difference is 0,
2801
4
    // we won't need to know the number of iterations.
2802
4
    const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2803
4
    if (NegPart->isZero())
2804
4
      Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
2805
4
    const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2806
4
    if (PosPart->isZero())
2807
4
      Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
2808
4
  }
2809
386
}
2810
2811
2812
// X^+ = max(X, 0)
2813
1.92k
const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
2814
1.92k
  return SE->getSMaxExpr(X, SE->getZero(X->getType()));
2815
1.92k
}
2816
2817
2818
// X^- = min(X, 0)
2819
1.92k
const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
2820
1.92k
  return SE->getSMinExpr(X, SE->getZero(X->getType()));
2821
1.92k
}
2822
2823
2824
// Walks through the subscript,
2825
// collecting each coefficient, the associated loop bounds,
2826
// and recording its positive and negative parts for later use.
2827
DependenceInfo::CoefficientInfo *
2828
DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
2829
382
                                 const SCEV *&Constant) const {
2830
382
  const SCEV *Zero = SE->getZero(Subscript->getType());
2831
382
  CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
2832
1.29k
  for (unsigned K = 1; 
K <= MaxLevels1.29k
;
++K910
) {
2833
910
    CI[K].Coeff = Zero;
2834
910
    CI[K].PosPart = Zero;
2835
910
    CI[K].NegPart = Zero;
2836
910
    CI[K].Iterations = nullptr;
2837
910
  }
2838
1.15k
  while (const SCEVAddRecExpr *
AddRec1.15k
= dyn_cast<SCEVAddRecExpr>(Subscript)) {
2839
768
    const Loop *L = AddRec->getLoop();
2840
768
    unsigned K = SrcFlag ? 
mapSrcLoop(L)384
:
mapDstLoop(L)384
;
2841
768
    CI[K].Coeff = AddRec->getStepRecurrence(*SE);
2842
768
    CI[K].PosPart = getPositivePart(CI[K].Coeff);
2843
768
    CI[K].NegPart = getNegativePart(CI[K].Coeff);
2844
768
    CI[K].Iterations = collectUpperBound(L, Subscript->getType());
2845
768
    Subscript = AddRec->getStart();
2846
768
  }
2847
382
  Constant = Subscript;
2848
#ifndef NDEBUG
2849
  DEBUG(dbgs() << "\tCoefficient Info\n");
2850
  for (unsigned K = 1; K <= MaxLevels; ++K) {
2851
    DEBUG(dbgs() << "\t    " << K << "\t" << *CI[K].Coeff);
2852
    DEBUG(dbgs() << "\tPos Part = ");
2853
    DEBUG(dbgs() << *CI[K].PosPart);
2854
    DEBUG(dbgs() << "\tNeg Part = ");
2855
    DEBUG(dbgs() << *CI[K].NegPart);
2856
    DEBUG(dbgs() << "\tUpper Bound = ");
2857
    if (CI[K].Iterations)
2858
      DEBUG(dbgs() << *CI[K].Iterations);
2859
    else
2860
      DEBUG(dbgs() << "+inf");
2861
    DEBUG(dbgs() << '\n');
2862
  }
2863
  DEBUG(dbgs() << "\t    Constant = " << *Subscript << '\n');
2864
#endif
2865
  return CI;
2866
382
}
2867
2868
2869
// Looks through all the bounds info and
2870
// computes the lower bound given the current direction settings
2871
// at each level. If the lower bound for any level is -inf,
2872
// the result is -inf.
2873
1.87k
const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {
2874
1.87k
  const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
2875
5.10k
  for (unsigned K = 2; 
Sum && 5.10k
K <= MaxLevels5.10k
;
++K3.22k
) {
2876
3.22k
    if (Bound[K].Lower[Bound[K].Direction])
2877
3.22k
      Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
2878
3.22k
    else
2879
0
      Sum = nullptr;
2880
3.22k
  }
2881
1.87k
  return Sum;
2882
1.87k
}
2883
2884
2885
// Looks through all the bounds info and
2886
// computes the upper bound given the current direction settings
2887
// at each level. If the upper bound at any level is +inf,
2888
// the result is +inf.
2889
1.47k
const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {
2890
1.47k
  const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
2891
4.13k
  for (unsigned K = 2; 
Sum && 4.13k
K <= MaxLevels4.13k
;
++K2.66k
) {
2892
2.66k
    if (Bound[K].Upper[Bound[K].Direction])
2893
2.66k
      Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
2894
2.66k
    else
2895
0
      Sum = nullptr;
2896
2.66k
  }
2897
1.47k
  return Sum;
2898
1.47k
}
2899
2900
2901
//===----------------------------------------------------------------------===//
2902
// Constraint manipulation for Delta test.
2903
2904
// Given a linear SCEV,
2905
// return the coefficient (the step)
2906
// corresponding to the specified loop.
2907
// If there isn't one, return 0.
2908
// For example, given a*i + b*j + c*k, finding the coefficient
2909
// corresponding to the j loop would yield b.
2910
const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr,
2911
252
                                            const Loop *TargetLoop) const {
2912
252
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
2913
252
  if (!AddRec)
2914
63
    return SE->getZero(Expr->getType());
2915
189
  
if (189
AddRec->getLoop() == TargetLoop189
)
2916
67
    return AddRec->getStepRecurrence(*SE);
2917
122
  return findCoefficient(AddRec->getStart(), TargetLoop);
2918
122
}
2919
2920
2921
// Given a linear SCEV,
2922
// return the SCEV given by zeroing out the coefficient
2923
// corresponding to the specified loop.
2924
// For example, given a*i + b*j + c*k, zeroing the coefficient
2925
// corresponding to the j loop would yield a*i + c*k.
2926
const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr,
2927
129
                                            const Loop *TargetLoop) const {
2928
129
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
2929
129
  if (!AddRec)
2930
2
    return Expr; // ignore
2931
127
  
if (127
AddRec->getLoop() == TargetLoop127
)
2932
64
    return AddRec->getStart();
2933
63
  return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
2934
63
                           AddRec->getStepRecurrence(*SE),
2935
63
                           AddRec->getLoop(),
2936
63
                           AddRec->getNoWrapFlags());
2937
63
}
2938
2939
2940
// Given a linear SCEV Expr,
2941
// return the SCEV given by adding some Value to the
2942
// coefficient corresponding to the specified TargetLoop.
2943
// For example, given a*i + b*j + c*k, adding 1 to the coefficient
2944
// corresponding to the j loop would yield a*i + (b+1)*j + c*k.
2945
const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr,
2946
                                             const Loop *TargetLoop,
2947
111
                                             const SCEV *Value) const {
2948
111
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
2949
111
  if (!AddRec) // create a new addRec
2950
0
    return SE->getAddRecExpr(Expr,
2951
0
                             Value,
2952
0
                             TargetLoop,
2953
0
                             SCEV::FlagAnyWrap); // Worst case, with no info.
2954
111
  
if (111
AddRec->getLoop() == TargetLoop111
) {
2955
55
    const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
2956
55
    if (Sum->isZero())
2957
53
      return AddRec->getStart();
2958
2
    return SE->getAddRecExpr(AddRec->getStart(),
2959
2
                             Sum,
2960
2
                             AddRec->getLoop(),
2961
2
                             AddRec->getNoWrapFlags());
2962
2
  }
2963
56
  
if (56
SE->isLoopInvariant(AddRec, TargetLoop)56
)
2964
1
    return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap);
2965
55
  return SE->getAddRecExpr(
2966
55
      addToCoefficient(AddRec->getStart(), TargetLoop, Value),
2967
55
      AddRec->getStepRecurrence(*SE), AddRec->getLoop(),
2968
55
      AddRec->getNoWrapFlags());
2969
55
}
2970
2971
2972
// Review the constraints, looking for opportunities
2973
// to simplify a subscript pair (Src and Dst).
2974
// Return true if some simplification occurs.
2975
// If the simplification isn't exact (that is, if it is conservative
2976
// in terms of dependence), set consistent to false.
2977
// Corresponds to Figure 5 from the paper
2978
//
2979
//            Practical Dependence Testing
2980
//            Goff, Kennedy, Tseng
2981
//            PLDI 1991
2982
bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
2983
                               SmallBitVector &Loops,
2984
                               SmallVectorImpl<Constraint> &Constraints,
2985
72
                               bool &Consistent) {
2986
72
  bool Result = false;
2987
149
  for (unsigned LI : Loops.set_bits()) {
2988
149
    DEBUG(dbgs() << "\t    Constraint[" << LI << "] is");
2989
149
    DEBUG(Constraints[LI].dump(dbgs()));
2990
149
    if (Constraints[LI].isDistance())
2991
58
      Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
2992
91
    else 
if (91
Constraints[LI].isLine()91
)
2993
6
      Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
2994
85
    else 
if (85
Constraints[LI].isPoint()85
)
2995
3
      Result |= propagatePoint(Src, Dst, Constraints[LI]);
2996
149
  }
2997
72
  return Result;
2998
72
}
2999
3000
3001
// Attempt to propagate a distance
3002
// constraint into a subscript pair (Src and Dst).
3003
// Return true if some simplification occurs.
3004
// If the simplification isn't exact (that is, if it is conservative
3005
// in terms of dependence), set consistent to false.
3006
bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
3007
                                       Constraint &CurConstraint,
3008
58
                                       bool &Consistent) {
3009
58
  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3010
58
  DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3011
58
  const SCEV *A_K = findCoefficient(Src, CurLoop);
3012
58
  if (A_K->isZero())
3013
4
    return false;
3014
54
  const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
3015
54
  Src = SE->getMinusSCEV(Src, DA_K);
3016
54
  Src = zeroCoefficient(Src, CurLoop);
3017
54
  DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3018
54
  DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3019
54
  Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
3020
54
  DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3021
54
  if (!findCoefficient(Dst, CurLoop)->isZero())
3022
3
    Consistent = false;
3023
58
  return true;
3024
58
}
3025
3026
3027
// Attempt to propagate a line
3028
// constraint into a subscript pair (Src and Dst).
3029
// Return true if some simplification occurs.
3030
// If the simplification isn't exact (that is, if it is conservative
3031
// in terms of dependence), set consistent to false.
3032
bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
3033
                                   Constraint &CurConstraint,
3034
6
                                   bool &Consistent) {
3035
6
  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3036
6
  const SCEV *A = CurConstraint.getA();
3037
6
  const SCEV *B = CurConstraint.getB();
3038
6
  const SCEV *C = CurConstraint.getC();
3039
6
  DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n");
3040
6
  DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
3041
6
  DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
3042
6
  if (
A->isZero()6
) {
3043
1
    const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
3044
1
    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3045
1
    if (
!Bconst || 1
!Cconst1
)
return false0
;
3046
1
    APInt Beta = Bconst->getAPInt();
3047
1
    APInt Charlie = Cconst->getAPInt();
3048
1
    APInt CdivB = Charlie.sdiv(Beta);
3049
1
    assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
3050
1
    const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3051
1
    //    Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3052
1
    Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3053
1
    Dst = zeroCoefficient(Dst, CurLoop);
3054
1
    if (!findCoefficient(Src, CurLoop)->isZero())
3055
0
      Consistent = false;
3056
1
  }
3057
5
  else 
if (5
B->isZero()5
) {
3058
3
    const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3059
3
    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3060
3
    if (
!Aconst || 3
!Cconst3
)
return false0
;
3061
3
    APInt Alpha = Aconst->getAPInt();
3062
3
    APInt Charlie = Cconst->getAPInt();
3063
3
    APInt CdivA = Charlie.sdiv(Alpha);
3064
3
    assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3065
3
    const SCEV *A_K = findCoefficient(Src, CurLoop);
3066
3
    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3067
3
    Src = zeroCoefficient(Src, CurLoop);
3068
3
    if (!findCoefficient(Dst, CurLoop)->isZero())
3069
0
      Consistent = false;
3070
3
  }
3071
2
  else 
if (2
isKnownPredicate(CmpInst::ICMP_EQ, A, B)2
) {
3072
1
    const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3073
1
    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3074
1
    if (
!Aconst || 1
!Cconst1
)
return false0
;
3075
1
    APInt Alpha = Aconst->getAPInt();
3076
1
    APInt Charlie = Cconst->getAPInt();
3077
1
    APInt CdivA = Charlie.sdiv(Alpha);
3078
1
    assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3079
1
    const SCEV *A_K = findCoefficient(Src, CurLoop);
3080
1
    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3081
1
    Src = zeroCoefficient(Src, CurLoop);
3082
1
    Dst = addToCoefficient(Dst, CurLoop, A_K);
3083
1
    if (!findCoefficient(Dst, CurLoop)->isZero())
3084
0
      Consistent = false;
3085
1
  }
3086
1
  else {
3087
1
    // paper is incorrect here, or perhaps just misleading
3088
1
    const SCEV *A_K = findCoefficient(Src, CurLoop);
3089
1
    Src = SE->getMulExpr(Src, A);
3090
1
    Dst = SE->getMulExpr(Dst, A);
3091
1
    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
3092
1
    Src = zeroCoefficient(Src, CurLoop);
3093
1
    Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
3094
1
    if (!findCoefficient(Dst, CurLoop)->isZero())
3095
0
      Consistent = false;
3096
5
  }
3097
6
  
DEBUG6
(dbgs() << "\t\tnew Src = " << *Src << "\n");
3098
6
  DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
3099
6
  return true;
3100
6
}
3101
3102
3103
// Attempt to propagate a point
3104
// constraint into a subscript pair (Src and Dst).
3105
// Return true if some simplification occurs.
3106
bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
3107
3
                                    Constraint &CurConstraint) {
3108
3
  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3109
3
  const SCEV *A_K = findCoefficient(Src, CurLoop);
3110
3
  const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3111
3
  const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
3112
3
  const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
3113
3
  DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3114
3
  Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
3115
3
  Src = zeroCoefficient(Src, CurLoop);
3116
3
  DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3117
3
  DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3118
3
  Dst = zeroCoefficient(Dst, CurLoop);
3119
3
  DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3120
3
  return true;
3121
3
}
3122
3123
3124
// Update direction vector entry based on the current constraint.
3125
void DependenceInfo::updateDirection(Dependence::DVEntry &Level,
3126
143
                                     const Constraint &CurConstraint) const {
3127
143
  DEBUG(dbgs() << "\tUpdate direction, constraint =");
3128
143
  DEBUG(CurConstraint.dump(dbgs()));
3129
143
  if (CurConstraint.isAny())
3130
0
    ; // use defaults
3131
143
  else 
if (143
CurConstraint.isDistance()143
) {
3132
125
    // this one is consistent, the others aren't
3133
125
    Level.Scalar = false;
3134
125
    Level.Distance = CurConstraint.getD();
3135
125
    unsigned NewDirection = Dependence::DVEntry::NONE;
3136
125
    if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
3137
111
      NewDirection = Dependence::DVEntry::EQ;
3138
125
    if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
3139
6
      NewDirection |= Dependence::DVEntry::LT;
3140
125
    if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
3141
8
      NewDirection |= Dependence::DVEntry::GT;
3142
125
    Level.Direction &= NewDirection;
3143
125
  }
3144
18
  else 
if (18
CurConstraint.isLine()18
) {
3145
6
    Level.Scalar = false;
3146
6
    Level.Distance = nullptr;
3147
6
    // direction should be accurate
3148
6
  }
3149
12
  else 
if (12
CurConstraint.isPoint()12
) {
3150
12
    Level.Scalar = false;
3151
12
    Level.Distance = nullptr;
3152
12
    unsigned NewDirection = Dependence::DVEntry::NONE;
3153
12
    if (!isKnownPredicate(CmpInst::ICMP_NE,
3154
12
                          CurConstraint.getY(),
3155
12
                          CurConstraint.getX()))
3156
12
      // if X may be = Y
3157
6
      NewDirection |= Dependence::DVEntry::EQ;
3158
12
    if (!isKnownPredicate(CmpInst::ICMP_SLE,
3159
12
                          CurConstraint.getY(),
3160
12
                          CurConstraint.getX()))
3161
12
      // if Y may be > X
3162
2
      NewDirection |= Dependence::DVEntry::LT;
3163
12
    if (!isKnownPredicate(CmpInst::ICMP_SGE,
3164
12
                          CurConstraint.getY(),
3165
12
                          CurConstraint.getX()))
3166
12
      // if Y may be < X
3167
4
      NewDirection |= Dependence::DVEntry::GT;
3168
12
    Level.Direction &= NewDirection;
3169
12
  }
3170
12
  else
3171
0
    llvm_unreachable("constraint has unexpected kind");
3172
143
}
3173
3174
/// Check if we can delinearize the subscripts. If the SCEVs representing the
3175
/// source and destination array references are recurrences on a nested loop,
3176
/// this function flattens the nested recurrences into separate recurrences
3177
/// for each loop level.
3178
bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
3179
100
                                    SmallVectorImpl<Subscript> &Pair) {
3180
100
  Value *SrcPtr = getPointerOperand(Src);
3181
100
  Value *DstPtr = getPointerOperand(Dst);
3182
100
3183
100
  Loop *SrcLoop = LI->getLoopFor(Src->getParent());
3184
100
  Loop *DstLoop = LI->getLoopFor(Dst->getParent());
3185
100
3186
100
  // Below code mimics the code in Delinearization.cpp
3187
100
  const SCEV *SrcAccessFn =
3188
100
    SE->getSCEVAtScope(SrcPtr, SrcLoop);
3189
100
  const SCEV *DstAccessFn =
3190
100
    SE->getSCEVAtScope(DstPtr, DstLoop);
3191
100
3192
100
  const SCEVUnknown *SrcBase =
3193
100
      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3194
100
  const SCEVUnknown *DstBase =
3195
100
      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3196
100
3197
100
  if (
!SrcBase || 100
!DstBase100
||
SrcBase != DstBase100
)
3198
0
    return false;
3199
100
3200
100
  const SCEV *ElementSize = SE->getElementSize(Src);
3201
100
  if (ElementSize != SE->getElementSize(Dst))
3202
0
    return false;
3203
100
3204
100
  const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
3205
100
  const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
3206
100
3207
100
  const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
3208
100
  const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
3209
100
  if (
!SrcAR || 100
!DstAR88
||
!SrcAR->isAffine()88
||
!DstAR->isAffine()88
)
3210
12
    return false;
3211
88
3212
88
  // First step: collect parametric terms in both array references.
3213
88
  SmallVector<const SCEV *, 4> Terms;
3214
88
  SE->collectParametricTerms(SrcAR, Terms);
3215
88
  SE->collectParametricTerms(DstAR, Terms);
3216
88
3217
88
  // Second step: find subscript sizes.
3218
88
  SmallVector<const SCEV *, 4> Sizes;
3219
88
  SE->findArrayDimensions(Terms, Sizes, ElementSize);
3220
88
3221
88
  // Third step: compute the access functions for each subscript.
3222
88
  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
3223
88
  SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
3224
88
  SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
3225
88
3226
88
  // Fail when there is only a subscript: that's a linearized access function.
3227
88
  if (
SrcSubscripts.size() < 2 || 88
DstSubscripts.size() < 211
||
3228
11
      SrcSubscripts.size() != DstSubscripts.size())
3229
77
    return false;
3230
11
3231
11
  int size = SrcSubscripts.size();
3232
11
3233
11
  DEBUG({
3234
11
      dbgs() << "\nSrcSubscripts: ";
3235
11
    for (int i = 0; i < size; i++)
3236
11
      dbgs() << *SrcSubscripts[i];
3237
11
    dbgs() << "\nDstSubscripts: ";
3238
11
    for (int i = 0; i < size; i++)
3239
11
      dbgs() << *DstSubscripts[i];
3240
11
    });
3241
11
3242
11
  // The delinearization transforms a single-subscript MIV dependence test into
3243
11
  // a multi-subscript SIV dependence test that is easier to compute. So we
3244
11
  // resize Pair to contain as many pairs of subscripts as the delinearization
3245
11
  // has found, and then initialize the pairs following the delinearization.
3246
11
  Pair.resize(size);
3247
33
  for (int i = 0; 
i < size33
;
++i22
) {
3248
22
    Pair[i].Src = SrcSubscripts[i];
3249
22
    Pair[i].Dst = DstSubscripts[i];
3250
22
    unifySubscriptType(&Pair[i]);
3251
22
3252
22
    // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the
3253
22
    // delinearization has found, and add these constraints to the dependence
3254
22
    // check to avoid memory accesses overflow from one dimension into another.
3255
22
    // This is related to the problem of determining the existence of data
3256
22
    // dependences in array accesses using a different number of subscripts: in
3257
22
    // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc.
3258
22
  }
3259
100
3260
100
  return true;
3261
100
}
3262
3263
//===----------------------------------------------------------------------===//
3264
3265
#ifndef NDEBUG
3266
// For debugging purposes, dump a small bit vector to dbgs().
3267
static void dumpSmallBitVector(SmallBitVector &BV) {
3268
  dbgs() << "{";
3269
  for (unsigned VI : BV.set_bits()) {
3270
    dbgs() << VI;
3271
    if (BV.find_next(VI) >= 0)
3272
      dbgs() << ' ';
3273
  }
3274
  dbgs() << "}\n";
3275
}
3276
#endif
3277
3278
// depends -
3279
// Returns NULL if there is no dependence.
3280
// Otherwise, return a Dependence with as many details as possible.
3281
// Corresponds to Section 3.1 in the paper
3282
//
3283
//            Practical Dependence Testing
3284
//            Goff, Kennedy, Tseng
3285
//            PLDI 1991
3286
//
3287
// Care is required to keep the routine below, getSplitIteration(),
3288
// up to date with respect to this routine.
3289
std::unique_ptr<Dependence>
3290
DependenceInfo::depends(Instruction *Src, Instruction *Dst,
3291
1.72k
                        bool PossiblyLoopIndependent) {
3292
1.72k
  if (Src == Dst)
3293
571
    PossiblyLoopIndependent = false;
3294
1.72k
3295
1.72k
  if (
(!Src->mayReadFromMemory() && 1.72k
!Src->mayWriteToMemory()977
) ||
3296
1.72k
      
(!Dst->mayReadFromMemory() && 1.72k
!Dst->mayWriteToMemory()1.14k
))
3297
1.72k
    // if both instructions don't reference memory, there's no dependence
3298
0
    return nullptr;
3299
1.72k
3300
1.72k
  
if (1.72k
!isLoadOrStore(Src) || 1.72k
!isLoadOrStore(Dst)1.72k
) {
3301
0
    // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
3302
0
    DEBUG(dbgs() << "can only handle simple loads and stores\n");
3303
0
    return make_unique<Dependence>(Src, Dst);
3304
0
  }
3305
1.72k
3306
1.72k
  Value *SrcPtr = getPointerOperand(Src);
3307
1.72k
  Value *DstPtr = getPointerOperand(Dst);
3308
1.72k
3309
1.72k
  switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
3310
1.72k
                                 SrcPtr)) {
3311
546
  case MayAlias:
3312
546
  case PartialAlias:
3313
546
    // cannot analyse objects if we don't understand their aliasing.
3314
546
    DEBUG(dbgs() << "can't analyze may or partial alias\n");
3315
546
    return make_unique<Dependence>(Src, Dst);
3316
275
  case NoAlias:
3317
275
    // If the objects noalias, they are distinct, accesses are independent.
3318
275
    DEBUG(dbgs() << "no alias\n");
3319
275
    return nullptr;
3320
905
  case MustAlias:
3321
905
    break; // The underlying objects alias; test accesses for dependence.
3322
905
  }
3323
905
3324
905
  // establish loop nesting levels
3325
905
  establishNestingLevels(Src, Dst);
3326
905
  DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
3327
905
  DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
3328
905
3329
905
  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
3330
905
  ++TotalArrayPairs;
3331
905
3332
905
  // See if there are GEPs we can use.
3333
905
  bool UsefulGEP = false;
3334
905
  GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
3335
905
  GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
3336
905
  if (
SrcGEP && 905
DstGEP620
&&
3337
905
      
SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()620
) {
3338
620
    const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
3339
620
    const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
3340
620
    DEBUG(dbgs() << "    SrcPtrSCEV = " << *SrcPtrSCEV << "\n");
3341
620
    DEBUG(dbgs() << "    DstPtrSCEV = " << *DstPtrSCEV << "\n");
3342
620
3343
620
    UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
3344
616
                isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
3345
616
                (SrcGEP->getNumOperands() == DstGEP->getNumOperands()) &&
3346
615
                isKnownPredicate(CmpInst::ICMP_EQ, SrcPtrSCEV, DstPtrSCEV);
3347
620
  }
3348
905
  unsigned Pairs = UsefulGEP ? 
SrcGEP->idx_end() - SrcGEP->idx_begin()613
:
1292
;
3349
905
  SmallVector<Subscript, 4> Pair(Pairs);
3350
905
  if (
UsefulGEP905
) {
3351
613
    DEBUG(dbgs() << "    using GEPs\n");
3352
613
    unsigned P = 0;
3353
613
    for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
3354
613
           SrcEnd = SrcGEP->idx_end(),
3355
613
           DstIdx = DstGEP->idx_begin();
3356
1.50k
         SrcIdx != SrcEnd;
3357
890
         
++SrcIdx, ++DstIdx, ++P890
) {
3358
890
      Pair[P].Src = SE->getSCEV(*SrcIdx);
3359
890
      Pair[P].Dst = SE->getSCEV(*DstIdx);
3360
890
      unifySubscriptType(&Pair[P]);
3361
890
    }
3362
613
  }
3363
292
  else {
3364
292
    DEBUG(dbgs() << "    ignoring GEPs\n");
3365
292
    const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
3366
292
    const SCEV *DstSCEV = SE->getSCEV(DstPtr);
3367
292
    DEBUG(dbgs() << "    SrcSCEV = " << *SrcSCEV << "\n");
3368
292
    DEBUG(dbgs() << "    DstSCEV = " << *DstSCEV << "\n");
3369
292
    Pair[0].Src = SrcSCEV;
3370
292
    Pair[0].Dst = DstSCEV;
3371
292
  }
3372
905
3373
905
  if (
Delinearize && 905
CommonLevels > 1103
) {
3374
100
    if (
tryDelinearize(Src, Dst, Pair)100
) {
3375
11
      DEBUG(dbgs() << "    delinearized GEP\n");
3376
11
      Pairs = Pair.size();
3377
11
    }
3378
100
  }
3379
905
3380
2.09k
  for (unsigned P = 0; 
P < Pairs2.09k
;
++P1.19k
) {
3381
1.19k
    Pair[P].Loops.resize(MaxLevels + 1);
3382
1.19k
    Pair[P].GroupLoops.resize(MaxLevels + 1);
3383
1.19k
    Pair[P].Group.resize(Pairs);
3384
1.19k
    removeMatchingExtensions(&Pair[P]);
3385
1.19k
    Pair[P].Classification =
3386
1.19k
      classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
3387
1.19k
                   Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
3388
1.19k
                   Pair[P].Loops);
3389
1.19k
    Pair[P].GroupLoops = Pair[P].Loops;
3390
1.19k
    Pair[P].Group.set(P);
3391
1.19k
    DEBUG(dbgs() << "    subscript " << P << "\n");
3392
1.19k
    DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
3393
1.19k
    DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
3394
1.19k
    DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
3395
1.19k
    DEBUG(dbgs() << "\tloops = ");
3396
1.19k
    DEBUG(dumpSmallBitVector(Pair[P].Loops));
3397
1.19k
  }
3398
905
3399
905
  SmallBitVector Separable(Pairs);
3400
905
  SmallBitVector Coupled(Pairs);
3401
905
3402
905
  // Partition subscripts into separable and minimally-coupled groups
3403
905
  // Algorithm in paper is algorithmically better;
3404
905
  // this may be faster in practice. Check someday.
3405
905
  //
3406
905
  // Here's an example of how it works. Consider this code:
3407
905
  //
3408
905
  //   for (i = ...) {
3409
905
  //     for (j = ...) {
3410
905
  //       for (k = ...) {
3411
905
  //         for (l = ...) {
3412
905
  //           for (m = ...) {
3413
905
  //             A[i][j][k][m] = ...;
3414
905
  //             ... = A[0][j][l][i + j];
3415
905
  //           }
3416
905
  //         }
3417
905
  //       }
3418
905
  //     }
3419
905
  //   }
3420
905
  //
3421
905
  // There are 4 subscripts here:
3422
905
  //    0 [i] and [0]
3423
905
  //    1 [j] and [j]
3424
905
  //    2 [k] and [l]
3425
905
  //    3 [m] and [i + j]
3426
905
  //
3427
905
  // We've already classified each subscript pair as ZIV, SIV, etc.,
3428
905
  // and collected all the loops mentioned by pair P in Pair[P].Loops.
3429
905
  // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
3430
905
  // and set Pair[P].Group = {P}.
3431
905
  //
3432
905
  //      Src Dst    Classification Loops  GroupLoops Group
3433
905
  //    0 [i] [0]         SIV       {1}      {1}        {0}
3434
905
  //    1 [j] [j]         SIV       {2}      {2}        {1}
3435
905
  //    2 [k] [l]         RDIV      {3,4}    {3,4}      {2}
3436
905
  //    3 [m] [i + j]     MIV       {1,2,5}  {1,2,5}    {3}
3437
905
  //
3438
905
  // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
3439
905
  // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
3440
905
  //
3441
905
  // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
3442
905
  // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
3443
905
  // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
3444
905
  // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
3445
905
  // to either Separable or Coupled).
3446
905
  //
3447
905
  // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
3448
905
  // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty,
3449
905
  // so Pair[3].Group = {0, 1, 3} and Done = false.
3450
905
  //
3451
905
  // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
3452
905
  // Since Done remains true, we add 2 to the set of Separable pairs.
3453
905
  //
3454
905
  // Finally, we consider 3. There's nothing to compare it with,
3455
905
  // so Done remains true and we add it to the Coupled set.
3456
905
  // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
3457
905
  //
3458
905
  // In the end, we've got 1 separable subscript and 1 coupled group.
3459
2.09k
  for (unsigned SI = 0; 
SI < Pairs2.09k
;
++SI1.19k
) {
3460
1.19k
    if (
Pair[SI].Classification == Subscript::NonLinear1.19k
) {
3461
81
      // ignore these, but collect loops for later
3462
81
      ++NonlinearSubscriptPairs;
3463
81
      collectCommonLoops(Pair[SI].Src,
3464
81
                         LI->getLoopFor(Src->getParent()),
3465
81
                         Pair[SI].Loops);
3466
81
      collectCommonLoops(Pair[SI].Dst,
3467
81
                         LI->getLoopFor(Dst->getParent()),
3468
81
                         Pair[SI].Loops);
3469
81
      Result.Consistent = false;
3470
1.19k
    } else 
if (1.11k
Pair[SI].Classification == Subscript::ZIV1.11k
) {
3471
250
      // always separable
3472
250
      Separable.set(SI);
3473
250
    }
3474
862
    else {
3475
862
      // SIV, RDIV, or MIV, so check for coupled group
3476
862
      bool Done = true;
3477
1.17k
      for (unsigned SJ = SI + 1; 
SJ < Pairs1.17k
;
++SJ310
) {
3478
310
        SmallBitVector Intersection = Pair[SI].GroupLoops;
3479
310
        Intersection &= Pair[SJ].GroupLoops;
3480
310
        if (
Intersection.any()310
) {
3481
166
          // accumulate set of all the loops in group
3482
166
          Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
3483
166
          // accumulate set of all subscripts in group
3484
166
          Pair[SJ].Group |= Pair[SI].Group;
3485
166
          Done = false;
3486
166
        }
3487
310
      }
3488
862
      if (
Done862
) {
3489
722
        if (
Pair[SI].Group.count() == 1722
) {
3490
610
          Separable.set(SI);
3491
610
          ++SeparableSubscriptPairs;
3492
610
        }
3493
112
        else {
3494
112
          Coupled.set(SI);
3495
112
          ++CoupledSubscriptPairs;
3496
112
        }
3497
722
      }
3498
1.11k
    }
3499
1.19k
  }
3500
905
3501
905
  DEBUG(dbgs() << "    Separable = ");
3502
905
  DEBUG(dumpSmallBitVector(Separable));
3503
905
  DEBUG(dbgs() << "    Coupled = ");
3504
905
  DEBUG(dumpSmallBitVector(Coupled));
3505
905
3506
905
  Constraint NewConstraint;
3507
905
  NewConstraint.setAny(SE);
3508
905
3509
905
  // test separable subscripts
3510
860
  for (unsigned SI : Separable.set_bits()) {
3511
860
    DEBUG(dbgs() << "testing subscript " << SI);
3512
860
    switch (Pair[SI].Classification) {
3513
250
    case Subscript::ZIV:
3514
250
      DEBUG(dbgs() << ", ZIV\n");
3515
250
      if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
3516
5
        return nullptr;
3517
245
      break;
3518
399
    case Subscript::SIV: {
3519
399
      DEBUG(dbgs() << ", SIV\n");
3520
399
      unsigned Level;
3521
399
      const SCEV *SplitIter = nullptr;
3522
399
      if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
3523
399
                  SplitIter))
3524
24
        return nullptr;
3525
375
      break;
3526
375
    }
3527
23
    case Subscript::RDIV:
3528
23
      DEBUG(dbgs() << ", RDIV\n");
3529
23
      if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
3530
17
        return nullptr;
3531
6
      break;
3532
188
    case Subscript::MIV:
3533
188
      DEBUG(dbgs() << ", MIV\n");
3534
188
      if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
3535
12
        return nullptr;
3536
176
      break;
3537
0
    default:
3538
0
      llvm_unreachable("subscript has unexpected classification");
3539
847
    }
3540
847
  }
3541
847
3542
847
  
if (847
Coupled.count()847
) {
3543
112
    // test coupled subscript groups
3544
112
    DEBUG(dbgs() << "starting on coupled subscripts\n");
3545
112
    DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
3546
112
    SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
3547
445
    for (unsigned II = 0; 
II <= MaxLevels445
;
++II333
)
3548
333
      Constraints[II].setAny(SE);
3549
112
    for (unsigned SI : Coupled.set_bits()) {
3550
112
      DEBUG(dbgs() << "testing subscript group " << SI << " { ");
3551
112
      SmallBitVector Group(Pair[SI].Group);
3552
112
      SmallBitVector Sivs(Pairs);
3553
112
      SmallBitVector Mivs(Pairs);
3554
112
      SmallBitVector ConstrainedLevels(MaxLevels + 1);
3555
112
      SmallVector<Subscript *, 4> PairsInGroup;
3556
252
      for (unsigned SJ : Group.set_bits()) {
3557
252
        DEBUG(dbgs() << SJ << " ");
3558
252
        if (Pair[SJ].Classification == Subscript::SIV)
3559
178
          Sivs.set(SJ);
3560
252
        else
3561
74
          Mivs.set(SJ);
3562
252
        PairsInGroup.push_back(&Pair[SJ]);
3563
252
      }
3564
112
      unifySubscriptType(PairsInGroup);
3565
112
      DEBUG(dbgs() << "}\n");
3566
259
      while (
Sivs.any()259
) {
3567
156
        bool Changed = false;
3568
233
        for (unsigned SJ : Sivs.set_bits()) {
3569
233
          DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
3570
233
          // SJ is an SIV subscript that's part of the current coupled group
3571
233
          unsigned Level;
3572
233
          const SCEV *SplitIter = nullptr;
3573
233
          DEBUG(dbgs() << "SIV\n");
3574
233
          if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
3575
233
                      SplitIter))
3576
2
            return nullptr;
3577
231
          ConstrainedLevels.set(Level);
3578
231
          if (
intersectConstraints(&Constraints[Level], &NewConstraint)231
) {
3579
180
            if (
Constraints[Level].isEmpty()180
) {
3580
7
              ++DeltaIndependence;
3581
7
              return nullptr;
3582
7
            }
3583
173
            Changed = true;
3584
173
          }
3585
224
          Sivs.reset(SJ);
3586
224
        }
3587
147
        
if (147
Changed147
) {
3588
147
          // propagate, possibly creating new SIVs and ZIVs
3589
147
          DEBUG(dbgs() << "    propagating\n");
3590
147
          DEBUG(dbgs() << "\tMivs = ");
3591
147
          DEBUG(dumpSmallBitVector(Mivs));
3592
72
          for (unsigned SJ : Mivs.set_bits()) {
3593
72
            // SJ is an MIV subscript that's part of the current coupled group
3594
72
            DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
3595
72
            if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
3596
72
                          Constraints, Result.Consistent)) {
3597
63
              DEBUG(dbgs() << "\t    Changed\n");
3598
63
              ++DeltaPropagations;
3599
63
              Pair[SJ].Classification =
3600
63
                classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
3601
63
                             Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
3602
63
                             Pair[SJ].Loops);
3603
63
              switch (Pair[SJ].Classification) {
3604
0
              case Subscript::ZIV:
3605
0
                DEBUG(dbgs() << "ZIV\n");
3606
0
                if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3607
0
                  return nullptr;
3608
0
                Mivs.reset(SJ);
3609
0
                break;
3610
55
              case Subscript::SIV:
3611
55
                Sivs.set(SJ);
3612
55
                Mivs.reset(SJ);
3613
55
                break;
3614
8
              case Subscript::RDIV:
3615
8
              case Subscript::MIV:
3616
8
                break;
3617
0
              default:
3618
0
                llvm_unreachable("bad subscript classification");
3619
63
              }
3620
63
            }
3621
72
          }
3622
147
        }
3623
156
      }
3624
112
3625
112
      // test & propagate remaining RDIVs
3626
103
      
for (unsigned SJ : Mivs.set_bits()) 103
{
3627
18
        if (
Pair[SJ].Classification == Subscript::RDIV18
) {
3628
6
          DEBUG(dbgs() << "RDIV test\n");
3629
6
          if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3630
2
            return nullptr;
3631
4
          // I don't yet understand how to propagate RDIV results
3632
4
          Mivs.reset(SJ);
3633
4
        }
3634
18
      }
3635
103
3636
103
      // test remaining MIVs
3637
103
      // This code is temporary.
3638
103
      // Better to somehow test all remaining subscripts simultaneously.
3639
101
      
for (unsigned SJ : Mivs.set_bits()) 101
{
3640
12
        if (
Pair[SJ].Classification == Subscript::MIV12
) {
3641
12
          DEBUG(dbgs() << "MIV test\n");
3642
12
          if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
3643
1
            return nullptr;
3644
12
        }
3645
12
        else
3646
12
          llvm_unreachable("expected only MIV subscripts at this point");
3647
12
      }
3648
101
3649
101
      // update Result.DV from constraint vector
3650
100
      
DEBUG100
(dbgs() << " updating\n");
3651
146
      for (unsigned SJ : ConstrainedLevels.set_bits()) {
3652
146
        if (SJ > CommonLevels)
3653
3
          break;
3654
143
        updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
3655
143
        if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
3656
0
          return nullptr;
3657
835
      }
3658
112
    }
3659
112
  }
3660
835
3661
835
  // Make sure the Scalar flags are set correctly.
3662
835
  SmallBitVector CompleteLoops(MaxLevels + 1);
3663
1.94k
  for (unsigned SI = 0; 
SI < Pairs1.94k
;
++SI1.10k
)
3664
1.10k
    CompleteLoops |= Pair[SI].Loops;
3665
2.15k
  for (unsigned II = 1; 
II <= CommonLevels2.15k
;
++II1.31k
)
3666
1.31k
    
if (1.31k
CompleteLoops[II]1.31k
)
3667
1.05k
      Result.DV[II - 1].Scalar = false;
3668
835
3669
835
  if (
PossiblyLoopIndependent835
) {
3670
265
    // Make sure the LoopIndependent flag is set correctly.
3671
265
    // All directions must include equal, otherwise no
3672
265
    // loop-independent dependence is possible.
3673
599
    for (unsigned II = 1; 
II <= CommonLevels599
;
++II334
) {
3674
402
      if (
!(Result.getDirection(II) & Dependence::DVEntry::EQ)402
) {
3675
68
        Result.LoopIndependent = false;
3676
68
        break;
3677
68
      }
3678
402
    }
3679
265
  }
3680
570
  else {
3681
570
    // On the other hand, if all directions are equal and there's no
3682
570
    // loop-independent dependence possible, then no dependence exists.
3683
570
    bool AllEqual = true;
3684
1.13k
    for (unsigned II = 1; 
II <= CommonLevels1.13k
;
++II564
) {
3685
704
      if (
Result.getDirection(II) != Dependence::DVEntry::EQ704
) {
3686
140
        AllEqual = false;
3687
140
        break;
3688
140
      }
3689
704
    }
3690
570
    if (AllEqual)
3691
430
      return nullptr;
3692
405
  }
3693
405
3694
405
  return make_unique<FullDependence>(std::move(Result));
3695
405
}
3696
3697
3698
3699
//===----------------------------------------------------------------------===//
3700
// getSplitIteration -
3701
// Rather than spend rarely-used space recording the splitting iteration
3702
// during the Weak-Crossing SIV test, we re-compute it on demand.
3703
// The re-computation is basically a repeat of the entire dependence test,
3704
// though simplified since we know that the dependence exists.
3705
// It's tedious, since we must go through all propagations, etc.
3706
//
3707
// Care is required to keep this code up to date with respect to the routine
3708
// above, depends().
3709
//
3710
// Generally, the dependence analyzer will be used to build
3711
// a dependence graph for a function (basically a map from instructions
3712
// to dependences). Looking for cycles in the graph shows us loops
3713
// that cannot be trivially vectorized/parallelized.
3714
//
3715
// We can try to improve the situation by examining all the dependences
3716
// that make up the cycle, looking for ones we can break.
3717
// Sometimes, peeling the first or last iteration of a loop will break
3718
// dependences, and we've got flags for those possibilities.
3719
// Sometimes, splitting a loop at some other iteration will do the trick,
3720
// and we've got a flag for that case. Rather than waste the space to
3721
// record the exact iteration (since we rarely know), we provide
3722
// a method that calculates the iteration. It's a drag that it must work
3723
// from scratch, but wonderful in that it's possible.
3724
//
3725
// Here's an example:
3726
//
3727
//    for (i = 0; i < 10; i++)
3728
//        A[i] = ...
3729
//        ... = A[11 - i]
3730
//
3731
// There's a loop-carried flow dependence from the store to the load,
3732
// found by the weak-crossing SIV test. The dependence will have a flag,
3733
// indicating that the dependence can be broken by splitting the loop.
3734
// Calling getSplitIteration will return 5.
3735
// Splitting the loop breaks the dependence, like so:
3736
//
3737
//    for (i = 0; i <= 5; i++)
3738
//        A[i] = ...
3739
//        ... = A[11 - i]
3740
//    for (i = 6; i < 10; i++)
3741
//        A[i] = ...
3742
//        ... = A[11 - i]
3743
//
3744
// breaks the dependence and allows us to vectorize/parallelize
3745
// both loops.
3746
const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
3747
10
                                              unsigned SplitLevel) {
3748
10
  assert(Dep.isSplitable(SplitLevel) &&
3749
10
         "Dep should be splitable at SplitLevel");
3750
10
  Instruction *Src = Dep.getSrc();
3751
10
  Instruction *Dst = Dep.getDst();
3752
10
  assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
3753
10
  assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
3754
10
  assert(isLoadOrStore(Src));
3755
10
  assert(isLoadOrStore(Dst));
3756
10
  Value *SrcPtr = getPointerOperand(Src);
3757
10
  Value *DstPtr = getPointerOperand(Dst);
3758
10
  assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
3759
10
                                SrcPtr) == MustAlias);
3760
10
3761
10
  // establish loop nesting levels
3762
10
  establishNestingLevels(Src, Dst);
3763
10
3764
10
  FullDependence Result(Src, Dst, false, CommonLevels);
3765
10
3766
10
  // See if there are GEPs we can use.
3767
10
  bool UsefulGEP = false;
3768
10
  GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
3769
10
  GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
3770
10
  if (
SrcGEP && 10
DstGEP10
&&
3771
10
      
SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()10
) {
3772
10
    const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
3773
10
    const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
3774
10
    UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
3775
10
                isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
3776
10
                (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
3777
10
  }
3778
10
  unsigned Pairs = UsefulGEP ? 
SrcGEP->idx_end() - SrcGEP->idx_begin()10
:
10
;
3779
10
  SmallVector<Subscript, 4> Pair(Pairs);
3780
10
  if (
UsefulGEP10
) {
3781
10
    unsigned P = 0;
3782
10
    for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
3783
10
           SrcEnd = SrcGEP->idx_end(),
3784
10
           DstIdx = DstGEP->idx_begin();
3785
35
         SrcIdx != SrcEnd;
3786
25
         
++SrcIdx, ++DstIdx, ++P25
) {
3787
25
      Pair[P].Src = SE->getSCEV(*SrcIdx);
3788
25
      Pair[P].Dst = SE->getSCEV(*DstIdx);
3789
25
    }
3790
10
  }
3791
0
  else {
3792
0
    const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
3793
0
    const SCEV *DstSCEV = SE->getSCEV(DstPtr);
3794
0
    Pair[0].Src = SrcSCEV;
3795
0
    Pair[0].Dst = DstSCEV;
3796
0
  }
3797
10
3798
10
  if (
Delinearize && 10
CommonLevels > 10
) {
3799
0
    if (
tryDelinearize(Src, Dst, Pair)0
) {
3800
0
      DEBUG(dbgs() << "    delinearized GEP\n");
3801
0
      Pairs = Pair.size();
3802
0
    }
3803
0
  }
3804
10
3805
35
  for (unsigned P = 0; 
P < Pairs35
;
++P25
) {
3806
25
    Pair[P].Loops.resize(MaxLevels + 1);
3807
25
    Pair[P].GroupLoops.resize(MaxLevels + 1);
3808
25
    Pair[P].Group.resize(Pairs);
3809
25
    removeMatchingExtensions(&Pair[P]);
3810
25
    Pair[P].Classification =
3811
25
      classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
3812
25
                   Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
3813
25
                   Pair[P].Loops);
3814
25
    Pair[P].GroupLoops = Pair[P].Loops;
3815
25
    Pair[P].Group.set(P);
3816
25
  }
3817
10
3818
10
  SmallBitVector Separable(Pairs);
3819
10
  SmallBitVector Coupled(Pairs);
3820
10
3821
10
  // partition subscripts into separable and minimally-coupled groups
3822
35
  for (unsigned SI = 0; 
SI < Pairs35
;
++SI25
) {
3823
25
    if (
Pair[SI].Classification == Subscript::NonLinear25
) {
3824
0
      // ignore these, but collect loops for later
3825
0
      collectCommonLoops(Pair[SI].Src,
3826
0
                         LI->getLoopFor(Src->getParent()),
3827
0
                         Pair[SI].Loops);
3828
0
      collectCommonLoops(Pair[SI].Dst,
3829
0
                         LI->getLoopFor(Dst->getParent()),
3830
0
                         Pair[SI].Loops);
3831
0
      Result.Consistent = false;
3832
0
    }
3833
25
    else 
if (25
Pair[SI].Classification == Subscript::ZIV25
)
3834
0
      Separable.set(SI);
3835
25
    else {
3836
25
      // SIV, RDIV, or MIV, so check for coupled group
3837
25
      bool Done = true;
3838
63
      for (unsigned SJ = SI + 1; 
SJ < Pairs63
;
++SJ38
) {
3839
38
        SmallBitVector Intersection = Pair[SI].GroupLoops;
3840
38
        Intersection &= Pair[SJ].GroupLoops;
3841
38
        if (
Intersection.any()38
) {
3842
10
          // accumulate set of all the loops in group
3843
10
          Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
3844
10
          // accumulate set of all subscripts in group
3845
10
          Pair[SJ].Group |= Pair[SI].Group;
3846
10
          Done = false;
3847
10
        }
3848
38
      }
3849
25
      if (
Done25
) {
3850
17
        if (Pair[SI].Group.count() == 1)
3851
11
          Separable.set(SI);
3852
17
        else
3853
6
          Coupled.set(SI);
3854
17
      }
3855
25
    }
3856
25
  }
3857
10
3858
10
  Constraint NewConstraint;
3859
10
  NewConstraint.setAny(SE);
3860
10
3861
10
  // test separable subscripts
3862
7
  for (unsigned SI : Separable.set_bits()) {
3863
7
    switch (Pair[SI].Classification) {
3864
7
    case Subscript::SIV: {
3865
7
      unsigned Level;
3866
7
      const SCEV *SplitIter = nullptr;
3867
7
      (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
3868
7
                     Result, NewConstraint, SplitIter);
3869
7
      if (
Level == SplitLevel7
) {
3870
4
        assert(SplitIter != nullptr);
3871
4
        return SplitIter;
3872
4
      }
3873
3
      break;
3874
3
    }
3875
0
    case Subscript::ZIV:
3876
0
    case Subscript::RDIV:
3877
0
    case Subscript::MIV:
3878
0
      break;
3879
0
    default:
3880
0
      llvm_unreachable("subscript has unexpected classification");
3881
6
    }
3882
6
  }
3883
6
3884
6
  
if (6
Coupled.count()6
) {
3885
6
    // test coupled subscript groups
3886
6
    SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
3887
20
    for (unsigned II = 0; 
II <= MaxLevels20
;
++II14
)
3888
14
      Constraints[II].setAny(SE);
3889
6
    for (unsigned SI : Coupled.set_bits()) {
3890
6
      SmallBitVector Group(Pair[SI].Group);
3891
6
      SmallBitVector Sivs(Pairs);
3892
6
      SmallBitVector Mivs(Pairs);
3893
6
      SmallBitVector ConstrainedLevels(MaxLevels + 1);
3894
14
      for (unsigned SJ : Group.set_bits()) {
3895
14
        if (Pair[SJ].Classification == Subscript::SIV)
3896
12
          Sivs.set(SJ);
3897
14
        else
3898
2
          Mivs.set(SJ);
3899
14
      }
3900
6
      while (
Sivs.any()6
) {
3901
6
        bool Changed = false;
3902
11
        for (unsigned SJ : Sivs.set_bits()) {
3903
11
          // SJ is an SIV subscript that's part of the current coupled group
3904
11
          unsigned Level;
3905
11
          const SCEV *SplitIter = nullptr;
3906
11
          (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
3907
11
                         Result, NewConstraint, SplitIter);
3908
11
          if (
Level == SplitLevel && 11
SplitIter11
)
3909
6
            return SplitIter;
3910
5
          ConstrainedLevels.set(Level);
3911
5
          if (intersectConstraints(&Constraints[Level], &NewConstraint))
3912
5
            Changed = true;
3913
11
          Sivs.reset(SJ);
3914
11
        }
3915
0
        
if (0
Changed0
) {
3916
0
          // propagate, possibly creating new SIVs and ZIVs
3917
0
          for (unsigned SJ : Mivs.set_bits()) {
3918
0
            // SJ is an MIV subscript that's part of the current coupled group
3919
0
            if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
3920
0
                          Pair[SJ].Loops, Constraints, Result.Consistent)) {
3921
0
              Pair[SJ].Classification =
3922
0
                classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
3923
0
                             Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
3924
0
                             Pair[SJ].Loops);
3925
0
              switch (Pair[SJ].Classification) {
3926
0
              case Subscript::ZIV:
3927
0
                Mivs.reset(SJ);
3928
0
                break;
3929
0
              case Subscript::SIV:
3930
0
                Sivs.set(SJ);
3931
0
                Mivs.reset(SJ);
3932
0
                break;
3933
0
              case Subscript::RDIV:
3934
0
              case Subscript::MIV:
3935
0
                break;
3936
0
              default:
3937
0
                llvm_unreachable("bad subscript classification");
3938
0
              }
3939
0
            }
3940
0
          }
3941
0
        }
3942
6
      }
3943
6
    }
3944
6
  }
3945
0
  
llvm_unreachable0
("somehow reached end of routine");
3946
0
  return nullptr;
3947
10
}