Coverage Report

Created: 2017-04-29 12:21

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/External/isl/isl_scheduler.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2011      INRIA Saclay
3
 * Copyright 2012-2014 Ecole Normale Superieure
4
 * Copyright 2015-2016 Sven Verdoolaege
5
 * Copyright 2016      INRIA Paris
6
 *
7
 * Use of this software is governed by the MIT license
8
 *
9
 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
10
 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
11
 * 91893 Orsay, France
12
 * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
13
 * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
14
 * CS 42112, 75589 Paris Cedex 12, France
15
 */
16
17
#include <isl_ctx_private.h>
18
#include <isl_map_private.h>
19
#include <isl_space_private.h>
20
#include <isl_aff_private.h>
21
#include <isl/hash.h>
22
#include <isl/constraint.h>
23
#include <isl/schedule.h>
24
#include <isl_schedule_constraints.h>
25
#include <isl/schedule_node.h>
26
#include <isl_mat_private.h>
27
#include <isl_vec_private.h>
28
#include <isl/set.h>
29
#include <isl/union_set.h>
30
#include <isl_seq.h>
31
#include <isl_tab.h>
32
#include <isl_dim_map.h>
33
#include <isl/map_to_basic_set.h>
34
#include <isl_sort.h>
35
#include <isl_options_private.h>
36
#include <isl_tarjan.h>
37
#include <isl_morph.h>
38
#include <isl/ilp.h>
39
#include <isl_val_private.h>
40
41
/*
42
 * The scheduling algorithm implemented in this file was inspired by
43
 * Bondhugula et al., "Automatic Transformations for Communication-Minimized
44
 * Parallelization and Locality Optimization in the Polyhedral Model".
45
 */
46
47
48
/* Internal information about a node that is used during the construction
49
 * of a schedule.
50
 * space represents the space in which the domain lives
51
 * sched is a matrix representation of the schedule being constructed
52
 *  for this node; if compressed is set, then this schedule is
53
 *  defined over the compressed domain space
54
 * sched_map is an isl_map representation of the same (partial) schedule
55
 *  sched_map may be NULL; if compressed is set, then this map
56
 *  is defined over the uncompressed domain space
57
 * rank is the number of linearly independent rows in the linear part
58
 *  of sched
59
 * the columns of cmap represent a change of basis for the schedule
60
 *  coefficients; the first rank columns span the linear part of
61
 *  the schedule rows
62
 * cinv is the inverse of cmap.
63
 * ctrans is the transpose of cmap.
64
 * start is the first variable in the LP problem in the sequences that
65
 *  represents the schedule coefficients of this node
66
 * nvar is the dimension of the domain
67
 * nparam is the number of parameters or 0 if we are not constructing
68
 *  a parametric schedule
69
 *
70
 * If compressed is set, then hull represents the constraints
71
 * that were used to derive the compression, while compress and
72
 * decompress map the original space to the compressed space and
73
 * vice versa.
74
 *
75
 * scc is the index of SCC (or WCC) this node belongs to
76
 *
77
 * "cluster" is only used inside extract_clusters and identifies
78
 * the cluster of SCCs that the node belongs to.
79
 *
80
 * coincident contains a boolean for each of the rows of the schedule,
81
 * indicating whether the corresponding scheduling dimension satisfies
82
 * the coincidence constraints in the sense that the corresponding
83
 * dependence distances are zero.
84
 *
85
 * If the schedule_treat_coalescing option is set, then
86
 * "sizes" contains the sizes of the (compressed) instance set
87
 * in each direction.  If there is no fixed size in a given direction,
88
 * then the corresponding size value is set to infinity.
89
 * If the schedule_treat_coalescing option or the schedule_max_coefficient
90
 * option is set, then "max" contains the maximal values for
91
 * schedule coefficients of the (compressed) variables.  If no bound
92
 * needs to be imposed on a particular variable, then the corresponding
93
 * value is negative.
94
 */
95
struct isl_sched_node {
96
  isl_space *space;
97
  int compressed;
98
  isl_set *hull;
99
  isl_multi_aff *compress;
100
  isl_multi_aff *decompress;
101
  isl_mat *sched;
102
  isl_map *sched_map;
103
  int  rank;
104
  isl_mat *cmap;
105
  isl_mat *cinv;
106
  isl_mat *ctrans;
107
  int  start;
108
  int  nvar;
109
  int  nparam;
110
111
  int  scc;
112
  int  cluster;
113
114
  int *coincident;
115
116
  isl_multi_val *sizes;
117
  isl_vec *max;
118
};
119
120
static int node_has_space(const void *entry, const void *val)
121
1.46k
{
122
1.46k
  struct isl_sched_node *node = (struct isl_sched_node *)entry;
123
1.46k
  isl_space *dim = (isl_space *)val;
124
1.46k
125
1.46k
  return isl_space_is_equal(node->space, dim);
126
1.46k
}
127
128
static int node_scc_exactly(struct isl_sched_node *node, int scc)
129
826
{
130
826
  return node->scc == scc;
131
826
}
132
133
static int node_scc_at_most(struct isl_sched_node *node, int scc)
134
18
{
135
18
  return node->scc <= scc;
136
18
}
137
138
static int node_scc_at_least(struct isl_sched_node *node, int scc)
139
18
{
140
18
  return node->scc >= scc;
141
18
}
142
143
/* An edge in the dependence graph.  An edge may be used to
144
 * ensure validity of the generated schedule, to minimize the dependence
145
 * distance or both
146
 *
147
 * map is the dependence relation, with i -> j in the map if j depends on i
148
 * tagged_condition and tagged_validity contain the union of all tagged
149
 *  condition or conditional validity dependence relations that
150
 *  specialize the dependence relation "map"; that is,
151
 *  if (i -> a) -> (j -> b) is an element of "tagged_condition"
152
 *  or "tagged_validity", then i -> j is an element of "map".
153
 *  If these fields are NULL, then they represent the empty relation.
154
 * src is the source node
155
 * dst is the sink node
156
 *
157
 * types is a bit vector containing the types of this edge.
158
 * validity is set if the edge is used to ensure correctness
159
 * coincidence is used to enforce zero dependence distances
160
 * proximity is set if the edge is used to minimize dependence distances
161
 * condition is set if the edge represents a condition
162
 *  for a conditional validity schedule constraint
163
 * local can only be set for condition edges and indicates that
164
 *  the dependence distance over the edge should be zero
165
 * conditional_validity is set if the edge is used to conditionally
166
 *  ensure correctness
167
 *
168
 * For validity edges, start and end mark the sequence of inequality
169
 * constraints in the LP problem that encode the validity constraint
170
 * corresponding to this edge.
171
 *
172
 * During clustering, an edge may be marked "no_merge" if it should
173
 * not be used to merge clusters.
174
 * The weight is also only used during clustering and it is
175
 * an indication of how many schedule dimensions on either side
176
 * of the schedule constraints can be aligned.
177
 * If the weight is negative, then this means that this edge was postponed
178
 * by has_bounded_distances or any_no_merge.  The original weight can
179
 * be retrieved by adding 1 + graph->max_weight, with "graph"
180
 * the graph containing this edge.
181
 */
182
struct isl_sched_edge {
183
  isl_map *map;
184
  isl_union_map *tagged_condition;
185
  isl_union_map *tagged_validity;
186
187
  struct isl_sched_node *src;
188
  struct isl_sched_node *dst;
189
190
  unsigned types;
191
192
  int start;
193
  int end;
194
195
  int no_merge;
196
  int weight;
197
};
198
199
/* Is "edge" marked as being of type "type"?
200
 */
201
static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
202
11.1k
{
203
11.1k
  return ISL_FL_ISSET(edge->types, 1 << type);
204
11.1k
}
205
206
/* Mark "edge" as being of type "type".
207
 */
208
static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
209
603
{
210
603
  ISL_FL_SET(edge->types, 1 << type);
211
603
}
212
213
/* No longer mark "edge" as being of type "type"?
214
 */
215
static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
216
35
{
217
35
  ISL_FL_CLR(edge->types, 1 << type);
218
35
}
219
220
/* Is "edge" marked as a validity edge?
221
 */
222
static int is_validity(struct isl_sched_edge *edge)
223
3.29k
{
224
3.29k
  return is_type(edge, isl_edge_validity);
225
3.29k
}
226
227
/* Mark "edge" as a validity edge.
228
 */
229
static void set_validity(struct isl_sched_edge *edge)
230
2
{
231
2
  set_type(edge, isl_edge_validity);
232
2
}
233
234
/* Is "edge" marked as a proximity edge?
235
 */
236
static int is_proximity(struct isl_sched_edge *edge)
237
1.33k
{
238
1.33k
  return is_type(edge, isl_edge_proximity);
239
1.33k
}
240
241
/* Is "edge" marked as a local edge?
242
 */
243
static int is_local(struct isl_sched_edge *edge)
244
1.85k
{
245
1.85k
  return is_type(edge, isl_edge_local);
246
1.85k
}
247
248
/* Mark "edge" as a local edge.
249
 */
250
static void set_local(struct isl_sched_edge *edge)
251
15
{
252
15
  set_type(edge, isl_edge_local);
253
15
}
254
255
/* No longer mark "edge" as a local edge.
256
 */
257
static void clear_local(struct isl_sched_edge *edge)
258
35
{
259
35
  clear_type(edge, isl_edge_local);
260
35
}
261
262
/* Is "edge" marked as a coincidence edge?
263
 */
264
static int is_coincidence(struct isl_sched_edge *edge)
265
1.82k
{
266
1.82k
  return is_type(edge, isl_edge_coincidence);
267
1.82k
}
268
269
/* Is "edge" marked as a condition edge?
270
 */
271
static int is_condition(struct isl_sched_edge *edge)
272
1.49k
{
273
1.49k
  return is_type(edge, isl_edge_condition);
274
1.49k
}
275
276
/* Is "edge" marked as a conditional validity edge?
277
 */
278
static int is_conditional_validity(struct isl_sched_edge *edge)
279
1.17k
{
280
1.17k
  return is_type(edge, isl_edge_conditional_validity);
281
1.17k
}
282
283
/* Internal information about the dependence graph used during
284
 * the construction of the schedule.
285
 *
286
 * intra_hmap is a cache, mapping dependence relations to their dual,
287
 *  for dependences from a node to itself
288
 * inter_hmap is a cache, mapping dependence relations to their dual,
289
 *  for dependences between distinct nodes
290
 * if compression is involved then the key for these maps
291
 * is the original, uncompressed dependence relation, while
292
 * the value is the dual of the compressed dependence relation.
293
 *
294
 * n is the number of nodes
295
 * node is the list of nodes
296
 * maxvar is the maximal number of variables over all nodes
297
 * max_row is the allocated number of rows in the schedule
298
 * n_row is the current (maximal) number of linearly independent
299
 *  rows in the node schedules
300
 * n_total_row is the current number of rows in the node schedules
301
 * band_start is the starting row in the node schedules of the current band
302
 * root is set if this graph is the original dependence graph,
303
 *  without any splitting
304
 *
305
 * sorted contains a list of node indices sorted according to the
306
 *  SCC to which a node belongs
307
 *
308
 * n_edge is the number of edges
309
 * edge is the list of edges
310
 * max_edge contains the maximal number of edges of each type;
311
 *  in particular, it contains the number of edges in the inital graph.
312
 * edge_table contains pointers into the edge array, hashed on the source
313
 *  and sink spaces; there is one such table for each type;
314
 *  a given edge may be referenced from more than one table
315
 *  if the corresponding relation appears in more than one of the
316
 *  sets of dependences; however, for each type there is only
317
 *  a single edge between a given pair of source and sink space
318
 *  in the entire graph
319
 *
320
 * node_table contains pointers into the node array, hashed on the space
321
 *
322
 * region contains a list of variable sequences that should be non-trivial
323
 *
324
 * lp contains the (I)LP problem used to obtain new schedule rows
325
 *
326
 * src_scc and dst_scc are the source and sink SCCs of an edge with
327
 *  conflicting constraints
328
 *
329
 * scc represents the number of components
330
 * weak is set if the components are weakly connected
331
 *
332
 * max_weight is used during clustering and represents the maximal
333
 * weight of the relevant proximity edges.
334
 */
335
struct isl_sched_graph {
336
  isl_map_to_basic_set *intra_hmap;
337
  isl_map_to_basic_set *inter_hmap;
338
339
  struct isl_sched_node *node;
340
  int n;
341
  int maxvar;
342
  int max_row;
343
  int n_row;
344
345
  int *sorted;
346
347
  int n_total_row;
348
  int band_start;
349
350
  int root;
351
352
  struct isl_sched_edge *edge;
353
  int n_edge;
354
  int max_edge[isl_edge_last + 1];
355
  struct isl_hash_table *edge_table[isl_edge_last + 1];
356
357
  struct isl_hash_table *node_table;
358
  struct isl_region *region;
359
360
  isl_basic_set *lp;
361
362
  int src_scc;
363
  int dst_scc;
364
365
  int scc;
366
  int weak;
367
368
  int max_weight;
369
};
370
371
/* Initialize node_table based on the list of nodes.
372
 */
373
static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
374
254
{
375
254
  int i;
376
254
377
254
  graph->node_table = isl_hash_table_alloc(ctx, graph->n);
378
254
  if (!graph->node_table)
379
0
    return -1;
380
254
381
638
  
for (i = 0; 254
i < graph->n638
;
++i384
)
{384
382
384
    struct isl_hash_table_entry *entry;
383
384
    uint32_t hash;
384
384
385
384
    hash = isl_space_get_hash(graph->node[i].space);
386
384
    entry = isl_hash_table_find(ctx, graph->node_table, hash,
387
384
              &node_has_space,
388
384
              graph->node[i].space, 1);
389
384
    if (!entry)
390
0
      return -1;
391
384
    entry->data = &graph->node[i];
392
384
  }
393
254
394
254
  return 0;
395
254
}
396
397
/* Return a pointer to the node that lives within the given space,
398
 * or NULL if there is no such node.
399
 */
400
static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
401
  struct isl_sched_graph *graph, __isl_keep isl_space *dim)
402
1.48k
{
403
1.48k
  struct isl_hash_table_entry *entry;
404
1.48k
  uint32_t hash;
405
1.48k
406
1.48k
  hash = isl_space_get_hash(dim);
407
1.48k
  entry = isl_hash_table_find(ctx, graph->node_table, hash,
408
1.48k
            &node_has_space, dim, 0);
409
1.48k
410
1.46k
  return entry ? entry->data : NULL;
411
1.48k
}
412
413
static int edge_has_src_and_dst(const void *entry, const void *val)
414
1.11k
{
415
1.11k
  const struct isl_sched_edge *edge = entry;
416
1.11k
  const struct isl_sched_edge *temp = val;
417
1.11k
418
1.11k
  return edge->src == temp->src && edge->dst == temp->dst;
419
1.11k
}
420
421
/* Add the given edge to graph->edge_table[type].
422
 */
423
static isl_stat graph_edge_table_add(isl_ctx *ctx,
424
  struct isl_sched_graph *graph, enum isl_edge_type type,
425
  struct isl_sched_edge *edge)
426
769
{
427
769
  struct isl_hash_table_entry *entry;
428
769
  uint32_t hash;
429
769
430
769
  hash = isl_hash_init();
431
769
  hash = isl_hash_builtin(hash, edge->src);
432
769
  hash = isl_hash_builtin(hash, edge->dst);
433
769
  entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
434
769
            &edge_has_src_and_dst, edge, 1);
435
769
  if (!entry)
436
0
    return isl_stat_error;
437
769
  entry->data = edge;
438
769
439
769
  return isl_stat_ok;
440
769
}
441
442
/* Allocate the edge_tables based on the maximal number of edges of
443
 * each type.
444
 */
445
static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
446
254
{
447
254
  int i;
448
254
449
1.52k
  for (i = 0; 
i <= isl_edge_last1.52k
;
++i1.27k
)
{1.27k
450
1.27k
    graph->edge_table[i] = isl_hash_table_alloc(ctx,
451
1.27k
                  graph->max_edge[i]);
452
1.27k
    if (!graph->edge_table[i])
453
0
      return -1;
454
1.27k
  }
455
254
456
254
  return 0;
457
254
}
458
459
/* If graph->edge_table[type] contains an edge from the given source
460
 * to the given destination, then return the hash table entry of this edge.
461
 * Otherwise, return NULL.
462
 */
463
static struct isl_hash_table_entry *graph_find_edge_entry(
464
  struct isl_sched_graph *graph,
465
  enum isl_edge_type type,
466
  struct isl_sched_node *src, struct isl_sched_node *dst)
467
4.05k
{
468
4.05k
  isl_ctx *ctx = isl_space_get_ctx(src->space);
469
4.05k
  uint32_t hash;
470
4.05k
  struct isl_sched_edge temp = { .src = src, .dst = dst };
471
4.05k
472
4.05k
  hash = isl_hash_init();
473
4.05k
  hash = isl_hash_builtin(hash, temp.src);
474
4.05k
  hash = isl_hash_builtin(hash, temp.dst);
475
4.05k
  return isl_hash_table_find(ctx, graph->edge_table[type], hash,
476
4.05k
            &edge_has_src_and_dst, &temp, 0);
477
4.05k
}
478
479
480
/* If graph->edge_table[type] contains an edge from the given source
481
 * to the given destination, then return this edge.
482
 * Otherwise, return NULL.
483
 */
484
static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
485
  enum isl_edge_type type,
486
  struct isl_sched_node *src, struct isl_sched_node *dst)
487
3.51k
{
488
3.51k
  struct isl_hash_table_entry *entry;
489
3.51k
490
3.51k
  entry = graph_find_edge_entry(graph, type, src, dst);
491
3.51k
  if (!entry)
492
2.64k
    return NULL;
493
3.51k
494
876
  return entry->data;
495
3.51k
}
496
497
/* Check whether the dependence graph has an edge of the given type
498
 * between the given two nodes.
499
 */
500
static isl_bool graph_has_edge(struct isl_sched_graph *graph,
501
  enum isl_edge_type type,
502
  struct isl_sched_node *src, struct isl_sched_node *dst)
503
1.60k
{
504
1.60k
  struct isl_sched_edge *edge;
505
1.60k
  isl_bool empty;
506
1.60k
507
1.60k
  edge = graph_find_edge(graph, type, src, dst);
508
1.60k
  if (!edge)
509
1.29k
    return 0;
510
1.60k
511
309
  empty = isl_map_plain_is_empty(edge->map);
512
309
  if (empty < 0)
513
0
    return isl_bool_error;
514
309
515
309
  return !empty;
516
309
}
517
518
/* Look for any edge with the same src, dst and map fields as "model".
519
 *
520
 * Return the matching edge if one can be found.
521
 * Return "model" if no matching edge is found.
522
 * Return NULL on error.
523
 */
524
static struct isl_sched_edge *graph_find_matching_edge(
525
  struct isl_sched_graph *graph, struct isl_sched_edge *model)
526
586
{
527
586
  enum isl_edge_type i;
528
586
  struct isl_sched_edge *edge;
529
586
530
1.79k
  for (i = isl_edge_first; 
i <= isl_edge_last1.79k
;
++i1.20k
)
{1.55k
531
1.55k
    int is_equal;
532
1.55k
533
1.55k
    edge = graph_find_edge(graph, i, model->src, model->dst);
534
1.55k
    if (!edge)
535
1.18k
      continue;
536
364
    is_equal = isl_map_plain_is_equal(model->map, edge->map);
537
364
    if (is_equal < 0)
538
0
      return NULL;
539
364
    
if (364
is_equal364
)
540
345
      return edge;
541
364
  }
542
586
543
241
  return model;
544
586
}
545
546
/* Remove the given edge from all the edge_tables that refer to it.
547
 */
548
static void graph_remove_edge(struct isl_sched_graph *graph,
549
  struct isl_sched_edge *edge)
550
108
{
551
108
  isl_ctx *ctx = isl_map_get_ctx(edge->map);
552
108
  enum isl_edge_type i;
553
108
554
648
  for (i = isl_edge_first; 
i <= isl_edge_last648
;
++i540
)
{540
555
540
    struct isl_hash_table_entry *entry;
556
540
557
540
    entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
558
540
    if (!entry)
559
303
      continue;
560
237
    
if (237
entry->data != edge237
)
561
22
      continue;
562
215
    isl_hash_table_remove(ctx, graph->edge_table[i], entry);
563
215
  }
564
108
}
565
566
/* Check whether the dependence graph has any edge
567
 * between the given two nodes.
568
 */
569
static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
570
  struct isl_sched_node *src, struct isl_sched_node *dst)
571
310
{
572
310
  enum isl_edge_type i;
573
310
  isl_bool r;
574
310
575
1.14k
  for (i = isl_edge_first; 
i <= isl_edge_last1.14k
;
++i832
)
{1.00k
576
1.00k
    r = graph_has_edge(graph, i, src, dst);
577
1.00k
    if (
r < 0 || 1.00k
r1.00k
)
578
168
      return r;
579
1.00k
  }
580
310
581
142
  return r;
582
310
}
583
584
/* Check whether the dependence graph has a validity edge
585
 * between the given two nodes.
586
 *
587
 * Conditional validity edges are essentially validity edges that
588
 * can be ignored if the corresponding condition edges are iteration private.
589
 * Here, we are only checking for the presence of validity
590
 * edges, so we need to consider the conditional validity edges too.
591
 * In particular, this function is used during the detection
592
 * of strongly connected components and we cannot ignore
593
 * conditional validity edges during this detection.
594
 */
595
static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
596
  struct isl_sched_node *src, struct isl_sched_node *dst)
597
362
{
598
362
  isl_bool r;
599
362
600
362
  r = graph_has_edge(graph, isl_edge_validity, src, dst);
601
362
  if (
r < 0 || 362
r362
)
602
121
    return r;
603
362
604
241
  return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
605
362
}
606
607
static int graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
608
  int n_node, int n_edge)
609
254
{
610
254
  int i;
611
254
612
254
  graph->n = n_node;
613
254
  graph->n_edge = n_edge;
614
254
  graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
615
254
  graph->sorted = isl_calloc_array(ctx, int, graph->n);
616
254
  graph->region = isl_alloc_array(ctx, struct isl_region, graph->n);
617
254
  graph->edge = isl_calloc_array(ctx,
618
254
          struct isl_sched_edge, graph->n_edge);
619
254
620
254
  graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
621
254
  graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
622
254
623
254
  if (
!graph->node || 254
!graph->region254
||
(graph->n_edge && 254
!graph->edge162
) ||
624
254
      !graph->sorted)
625
0
    return -1;
626
254
627
640
  
for(i = 0; 254
i < graph->n640
;
++i386
)
628
386
    graph->sorted[i] = i;
629
254
630
254
  return 0;
631
254
}
632
633
static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
634
273
{
635
273
  int i;
636
273
637
273
  isl_map_to_basic_set_free(graph->intra_hmap);
638
273
  isl_map_to_basic_set_free(graph->inter_hmap);
639
273
640
273
  if (graph->node)
641
638
    
for (i = 0; 254
i < graph->n638
;
++i384
)
{384
642
384
      isl_space_free(graph->node[i].space);
643
384
      isl_set_free(graph->node[i].hull);
644
384
      isl_multi_aff_free(graph->node[i].compress);
645
384
      isl_multi_aff_free(graph->node[i].decompress);
646
384
      isl_mat_free(graph->node[i].sched);
647
384
      isl_map_free(graph->node[i].sched_map);
648
384
      isl_mat_free(graph->node[i].cmap);
649
384
      isl_mat_free(graph->node[i].cinv);
650
384
      isl_mat_free(graph->node[i].ctrans);
651
384
      if (graph->root)
652
238
        free(graph->node[i].coincident);
653
384
      isl_multi_val_free(graph->node[i].sizes);
654
384
      isl_vec_free(graph->node[i].max);
655
384
    }
656
273
  free(graph->node);
657
273
  free(graph->sorted);
658
273
  if (graph->edge)
659
568
    
for (i = 0; 254
i < graph->n_edge568
;
++i314
)
{314
660
314
      isl_map_free(graph->edge[i].map);
661
314
      isl_union_map_free(graph->edge[i].tagged_condition);
662
314
      isl_union_map_free(graph->edge[i].tagged_validity);
663
314
    }
664
273
  free(graph->edge);
665
273
  free(graph->region);
666
1.63k
  for (i = 0; 
i <= isl_edge_last1.63k
;
++i1.36k
)
667
1.36k
    isl_hash_table_free(ctx, graph->edge_table[i]);
668
273
  isl_hash_table_free(ctx, graph->node_table);
669
273
  isl_basic_set_free(graph->lp);
670
273
}
671
672
/* For each "set" on which this function is called, increment
673
 * graph->n by one and update graph->maxvar.
674
 */
675
static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
676
240
{
677
240
  struct isl_sched_graph *graph = user;
678
240
  int nvar = isl_set_dim(set, isl_dim_set);
679
240
680
240
  graph->n++;
681
240
  if (nvar > graph->maxvar)
682
145
    graph->maxvar = nvar;
683
240
684
240
  isl_set_free(set);
685
240
686
240
  return isl_stat_ok;
687
240
}
688
689
/* Compute the number of rows that should be allocated for the schedule.
690
 * In particular, we need one row for each variable or one row
691
 * for each basic map in the dependences.
692
 * Note that it is practically impossible to exhaust both
693
 * the number of dependences and the number of variables.
694
 */
695
static isl_stat compute_max_row(struct isl_sched_graph *graph,
696
  __isl_keep isl_schedule_constraints *sc)
697
133
{
698
133
  int n_edge;
699
133
  isl_stat r;
700
133
  isl_union_set *domain;
701
133
702
133
  graph->n = 0;
703
133
  graph->maxvar = 0;
704
133
  domain = isl_schedule_constraints_get_domain(sc);
705
133
  r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
706
133
  isl_union_set_free(domain);
707
133
  if (r < 0)
708
0
    return isl_stat_error;
709
133
  n_edge = isl_schedule_constraints_n_basic_map(sc);
710
133
  if (n_edge < 0)
711
0
    return isl_stat_error;
712
133
  graph->max_row = n_edge + graph->maxvar;
713
133
714
133
  return isl_stat_ok;
715
133
}
716
717
/* Does "bset" have any defining equalities for its set variables?
718
 */
719
static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
720
238
{
721
238
  int i, n;
722
238
723
238
  if (!bset)
724
0
    return isl_bool_error;
725
238
726
238
  n = isl_basic_set_dim(bset, isl_dim_set);
727
589
  for (i = 0; 
i < n589
;
++i351
)
{359
728
359
    isl_bool has;
729
359
730
359
    has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
731
359
              NULL);
732
359
    if (
has < 0 || 359
has359
)
733
8
      return has;
734
359
  }
735
238
736
230
  return isl_bool_false;
737
238
}
738
739
/* Set the entries of node->max to the value of the schedule_max_coefficient
740
 * option, if set.
741
 */
742
static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
743
4
{
744
4
  int max;
745
4
746
4
  max = isl_options_get_schedule_max_coefficient(ctx);
747
4
  if (max == -1)
748
4
    return isl_stat_ok;
749
4
750
0
  node->max = isl_vec_alloc(ctx, node->nvar);
751
0
  node->max = isl_vec_set_si(node->max, max);
752
0
  if (!node->max)
753
0
    return isl_stat_error;
754
0
755
0
  return isl_stat_ok;
756
0
}
757
758
/* Set the entries of node->max to the minimum of the schedule_max_coefficient
759
 * option (if set) and half of the minimum of the sizes in the other
760
 * dimensions.  If the minimum of the sizes is one, half of the size
761
 * is zero and this value is reset to one.
762
 * If the global minimum is unbounded (i.e., if both
763
 * the schedule_max_coefficient is not set and the sizes in the other
764
 * dimensions are unbounded), then store a negative value.
765
 * If the schedule coefficient is close to the size of the instance set
766
 * in another dimension, then the schedule may represent a loop
767
 * coalescing transformation (especially if the coefficient
768
 * in that other dimension is one).  Forcing the coefficient to be
769
 * smaller than or equal to half the minimal size should avoid this
770
 * situation.
771
 */
772
static isl_stat compute_max_coefficient(isl_ctx *ctx,
773
  struct isl_sched_node *node)
774
234
{
775
234
  int max;
776
234
  int i, j;
777
234
  isl_vec *v;
778
234
779
234
  max = isl_options_get_schedule_max_coefficient(ctx);
780
234
  v = isl_vec_alloc(ctx, node->nvar);
781
234
  if (!v)
782
0
    return isl_stat_error;
783
234
784
575
  
for (i = 0; 234
i < node->nvar575
;
++i341
)
{341
785
341
    isl_int_set_si(v->el[i], max);
786
341
    isl_int_mul_si(v->el[i], v->el[i], 2);
787
341
  }
788
234
789
575
  for (i = 0; 
i < node->nvar575
;
++i341
)
{341
790
341
    isl_val *size;
791
341
792
341
    size = isl_multi_val_get_val(node->sizes, i);
793
341
    if (!size)
794
0
      goto error;
795
341
    
if (341
!isl_val_is_int(size)341
)
{195
796
195
      isl_val_free(size);
797
195
      continue;
798
195
    }
799
460
    
for (j = 0; 146
j < node->nvar460
;
++j314
)
{314
800
314
      if (j == i)
801
146
        continue;
802
168
      
if (168
isl_int_is_neg168
(v->el[j]) ||168
803
147
          isl_int_gt(v->el[j], size->n))
804
23
        isl_int_set(v->el[j], size->n);
805
168
    }
806
146
    isl_val_free(size);
807
146
  }
808
234
809
575
  
for (i = 0; 234
i < node->nvar575
;
++i341
)
{341
810
341
    isl_int_fdiv_q_ui(v->el[i], v->el[i], 2);
811
341
    if (isl_int_is_zero(v->el[i]))
812
10
      isl_int_set_si(v->el[i], 1);
813
341
  }
814
234
815
234
  node->max = v;
816
234
  return isl_stat_ok;
817
0
error:
818
0
  isl_vec_free(v);
819
0
  return isl_stat_error;
820
234
}
821
822
/* Compute and return the size of "set" in dimension "dim".
823
 * The size is taken to be the difference in values for that variable
824
 * for fixed values of the other variables.
825
 * In particular, the variable is first isolated from the other variables
826
 * in the range of a map
827
 *
828
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
829
 *
830
 * and then duplicated
831
 *
832
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
833
 *
834
 * The shared variables are then projected out and the maximal value
835
 * of i_dim' - i_dim is computed.
836
 */
837
static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
838
341
{
839
341
  isl_map *map;
840
341
  isl_local_space *ls;
841
341
  isl_aff *obj;
842
341
  isl_val *v;
843
341
844
341
  map = isl_set_project_onto_map(set, isl_dim_set, dim, 1);
845
341
  map = isl_map_project_out(map, isl_dim_in, dim, 1);
846
341
  map = isl_map_range_product(map, isl_map_copy(map));
847
341
  map = isl_set_unwrap(isl_map_range(map));
848
341
  set = isl_map_deltas(map);
849
341
  ls = isl_local_space_from_space(isl_set_get_space(set));
850
341
  obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
851
341
  v = isl_set_max_val(set, obj);
852
341
  isl_aff_free(obj);
853
341
  isl_set_free(set);
854
341
855
341
  return v;
856
341
}
857
858
/* Compute the size of the instance set "set" of "node", after compression,
859
 * as well as bounds on the corresponding coefficients, if needed.
860
 *
861
 * The sizes are needed when the schedule_treat_coalescing option is set.
862
 * The bounds are needed when the schedule_treat_coalescing option or
863
 * the schedule_max_coefficient option is set.
864
 *
865
 * If the schedule_treat_coalescing option is not set, then at most
866
 * the bounds need to be set and this is done in set_max_coefficient.
867
 * Otherwise, compress the domain if needed, compute the size
868
 * in each direction and store the results in node->size.
869
 * Finally, set the bounds on the coefficients based on the sizes
870
 * and the schedule_max_coefficient option in compute_max_coefficient.
871
 */
872
static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
873
  __isl_take isl_set *set)
874
238
{
875
238
  int j, n;
876
238
  isl_multi_val *mv;
877
238
878
238
  if (
!isl_options_get_schedule_treat_coalescing(ctx)238
)
{4
879
4
    isl_set_free(set);
880
4
    return set_max_coefficient(ctx, node);
881
4
  }
882
238
883
234
  
if (234
node->compressed234
)
884
8
    set = isl_set_preimage_multi_aff(set,
885
8
          isl_multi_aff_copy(node->decompress));
886
234
  mv = isl_multi_val_zero(isl_set_get_space(set));
887
234
  n = isl_set_dim(set, isl_dim_set);
888
575
  for (j = 0; 
j < n575
;
++j341
)
{341
889
341
    isl_val *v;
890
341
891
341
    v = compute_size(isl_set_copy(set), j);
892
341
    mv = isl_multi_val_set_val(mv, j, v);
893
341
  }
894
234
  node->sizes = mv;
895
234
  isl_set_free(set);
896
234
  if (!node->sizes)
897
0
    return isl_stat_error;
898
234
  return compute_max_coefficient(ctx, node);
899
234
}
900
901
/* Add a new node to the graph representing the given instance set.
902
 * "nvar" is the (possibly compressed) number of variables and
903
 * may be smaller than then number of set variables in "set"
904
 * if "compressed" is set.
905
 * If "compressed" is set, then "hull" represents the constraints
906
 * that were used to derive the compression, while "compress" and
907
 * "decompress" map the original space to the compressed space and
908
 * vice versa.
909
 * If "compressed" is not set, then "hull", "compress" and "decompress"
910
 * should be NULL.
911
 *
912
 * Compute the size of the instance set and bounds on the coefficients,
913
 * if needed.
914
 */
915
static isl_stat add_node(struct isl_sched_graph *graph,
916
  __isl_take isl_set *set, int nvar, int compressed,
917
  __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
918
  __isl_take isl_multi_aff *decompress)
919
238
{
920
238
  int nparam;
921
238
  isl_ctx *ctx;
922
238
  isl_mat *sched;
923
238
  isl_space *space;
924
238
  int *coincident;
925
238
  struct isl_sched_node *node;
926
238
927
238
  if (!set)
928
0
    return isl_stat_error;
929
238
930
238
  ctx = isl_set_get_ctx(set);
931
238
  nparam = isl_set_dim(set, isl_dim_param);
932
238
  if (!ctx->opt->schedule_parametric)
933
24
    nparam = 0;
934
238
  sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
935
238
  node = &graph->node[graph->n];
936
238
  graph->n++;
937
238
  space = isl_set_get_space(set);
938
238
  node->space = space;
939
238
  node->nvar = nvar;
940
238
  node->nparam = nparam;
941
238
  node->sched = sched;
942
238
  node->sched_map = NULL;
943
238
  coincident = isl_calloc_array(ctx, int, graph->max_row);
944
238
  node->coincident = coincident;
945
238
  node->compressed = compressed;
946
238
  node->hull = hull;
947
238
  node->compress = compress;
948
238
  node->decompress = decompress;
949
238
  if (compute_sizes_and_max(ctx, node, set) < 0)
950
0
    return isl_stat_error;
951
238
952
238
  
if (238
!space || 238
!sched238
||
(graph->max_row && 238
!coincident236
))
953
0
    return isl_stat_error;
954
238
  
if (238
compressed && 238
(!hull || 8
!compress8
||
!decompress8
))
955
0
    return isl_stat_error;
956
238
957
238
  return isl_stat_ok;
958
238
}
959
960
/* Add a new node to the graph representing the given set.
961
 *
962
 * If any of the set variables is defined by an equality, then
963
 * we perform variable compression such that we can perform
964
 * the scheduling on the compressed domain.
965
 */
966
static isl_stat extract_node(__isl_take isl_set *set, void *user)
967
238
{
968
238
  int nvar;
969
238
  isl_bool has_equality;
970
238
  isl_basic_set *hull;
971
238
  isl_set *hull_set;
972
238
  isl_morph *morph;
973
238
  isl_multi_aff *compress, *decompress;
974
238
  struct isl_sched_graph *graph = user;
975
238
976
238
  hull = isl_set_affine_hull(isl_set_copy(set));
977
238
  hull = isl_basic_set_remove_divs(hull);
978
238
  nvar = isl_set_dim(set, isl_dim_set);
979
238
  has_equality = has_any_defining_equality(hull);
980
238
981
238
  if (has_equality < 0)
982
0
    goto error;
983
238
  
if (238
!has_equality238
)
{230
984
230
    isl_basic_set_free(hull);
985
230
    return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
986
230
  }
987
238
988
8
  morph = isl_basic_set_variable_compression(hull, isl_dim_set);
989
8
  nvar = isl_morph_ran_dim(morph, isl_dim_set);
990
8
  compress = isl_morph_get_var_multi_aff(morph);
991
8
  morph = isl_morph_inverse(morph);
992
8
  decompress = isl_morph_get_var_multi_aff(morph);
993
8
  isl_morph_free(morph);
994
8
995
8
  hull_set = isl_set_from_basic_set(hull);
996
8
  return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
997
0
error:
998
0
  isl_basic_set_free(hull);
999
0
  isl_set_free(set);
1000
0
  return isl_stat_error;
1001
238
}
1002
1003
struct isl_extract_edge_data {
1004
  enum isl_edge_type type;
1005
  struct isl_sched_graph *graph;
1006
};
1007
1008
/* Merge edge2 into edge1, freeing the contents of edge2.
1009
 * Return 0 on success and -1 on failure.
1010
 *
1011
 * edge1 and edge2 are assumed to have the same value for the map field.
1012
 */
1013
static int merge_edge(struct isl_sched_edge *edge1,
1014
  struct isl_sched_edge *edge2)
1015
345
{
1016
345
  edge1->types |= edge2->types;
1017
345
  isl_map_free(edge2->map);
1018
345
1019
345
  if (
is_condition(edge2)345
)
{20
1020
20
    if (!edge1->tagged_condition)
1021
20
      edge1->tagged_condition = edge2->tagged_condition;
1022
20
    else
1023
0
      edge1->tagged_condition =
1024
0
        isl_union_map_union(edge1->tagged_condition,
1025
0
                edge2->tagged_condition);
1026
20
  }
1027
345
1028
345
  if (
is_conditional_validity(edge2)345
)
{3
1029
3
    if (!edge1->tagged_validity)
1030
3
      edge1->tagged_validity = edge2->tagged_validity;
1031
3
    else
1032
0
      edge1->tagged_validity =
1033
0
        isl_union_map_union(edge1->tagged_validity,
1034
0
                edge2->tagged_validity);
1035
3
  }
1036
345
1037
345
  if (
is_condition(edge2) && 345
!edge1->tagged_condition20
)
1038
0
    return -1;
1039
345
  
if (345
is_conditional_validity(edge2) && 345
!edge1->tagged_validity3
)
1040
0
    return -1;
1041
345
1042
345
  return 0;
1043
345
}
1044
1045
/* Insert dummy tags in domain and range of "map".
1046
 *
1047
 * In particular, if "map" is of the form
1048
 *
1049
 *  A -> B
1050
 *
1051
 * then return
1052
 *
1053
 *  [A -> dummy_tag] -> [B -> dummy_tag]
1054
 *
1055
 * where the dummy_tags are identical and equal to any dummy tags
1056
 * introduced by any other call to this function.
1057
 */
1058
static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1059
12
{
1060
12
  static char dummy;
1061
12
  isl_ctx *ctx;
1062
12
  isl_id *id;
1063
12
  isl_space *space;
1064
12
  isl_set *domain, *range;
1065
12
1066
12
  ctx = isl_map_get_ctx(map);
1067
12
1068
12
  id = isl_id_alloc(ctx, NULL, &dummy);
1069
12
  space = isl_space_params(isl_map_get_space(map));
1070
12
  space = isl_space_set_from_params(space);
1071
12
  space = isl_space_set_tuple_id(space, isl_dim_set, id);
1072
12
  space = isl_space_map_from_set(space);
1073
12
1074
12
  domain = isl_map_wrap(map);
1075
12
  range = isl_map_wrap(isl_map_universe(space));
1076
12
  map = isl_map_from_domain_and_range(domain, range);
1077
12
  map = isl_map_zip(map);
1078
12
1079
12
  return map;
1080
12
}
1081
1082
/* Given that at least one of "src" or "dst" is compressed, return
1083
 * a map between the spaces of these nodes restricted to the affine
1084
 * hull that was used in the compression.
1085
 */
1086
static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1087
  struct isl_sched_node *dst)
1088
22
{
1089
22
  isl_set *dom, *ran;
1090
22
1091
22
  if (src->compressed)
1092
14
    dom = isl_set_copy(src->hull);
1093
22
  else
1094
8
    dom = isl_set_universe(isl_space_copy(src->space));
1095
22
  if (dst->compressed)
1096
14
    ran = isl_set_copy(dst->hull);
1097
22
  else
1098
8
    ran = isl_set_universe(isl_space_copy(dst->space));
1099
22
1100
22
  return isl_map_from_domain_and_range(dom, ran);
1101
22
}
1102
1103
/* Intersect the domains of the nested relations in domain and range
1104
 * of "tagged" with "map".
1105
 */
1106
static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1107
  __isl_keep isl_map *map)
1108
14
{
1109
14
  isl_set *set;
1110
14
1111
14
  tagged = isl_map_zip(tagged);
1112
14
  set = isl_map_wrap(isl_map_copy(map));
1113
14
  tagged = isl_map_intersect_domain(tagged, set);
1114
14
  tagged = isl_map_zip(tagged);
1115
14
  return tagged;
1116
14
}
1117
1118
/* Return a pointer to the node that lives in the domain space of "map"
1119
 * or NULL if there is no such node.
1120
 */
1121
static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1122
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1123
610
{
1124
610
  struct isl_sched_node *node;
1125
610
  isl_space *space;
1126
610
1127
610
  space = isl_space_domain(isl_map_get_space(map));
1128
610
  node = graph_find_node(ctx, graph, space);
1129
610
  isl_space_free(space);
1130
610
1131
610
  return node;
1132
610
}
1133
1134
/* Return a pointer to the node that lives in the range space of "map"
1135
 * or NULL if there is no such node.
1136
 */
1137
static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1138
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1139
610
{
1140
610
  struct isl_sched_node *node;
1141
610
  isl_space *space;
1142
610
1143
610
  space = isl_space_range(isl_map_get_space(map));
1144
610
  node = graph_find_node(ctx, graph, space);
1145
610
  isl_space_free(space);
1146
610
1147
610
  return node;
1148
610
}
1149
1150
/* Add a new edge to the graph based on the given map
1151
 * and add it to data->graph->edge_table[data->type].
1152
 * If a dependence relation of a given type happens to be identical
1153
 * to one of the dependence relations of a type that was added before,
1154
 * then we don't create a new edge, but instead mark the original edge
1155
 * as also representing a dependence of the current type.
1156
 *
1157
 * Edges of type isl_edge_condition or isl_edge_conditional_validity
1158
 * may be specified as "tagged" dependence relations.  That is, "map"
1159
 * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1160
 * the dependence on iterations and a and b are tags.
1161
 * edge->map is set to the relation containing the elements i -> j,
1162
 * while edge->tagged_condition and edge->tagged_validity contain
1163
 * the union of all the "map" relations
1164
 * for which extract_edge is called that result in the same edge->map.
1165
 *
1166
 * If the source or the destination node is compressed, then
1167
 * intersect both "map" and "tagged" with the constraints that
1168
 * were used to construct the compression.
1169
 * This ensures that there are no schedule constraints defined
1170
 * outside of these domains, while the scheduler no longer has
1171
 * any control over those outside parts.
1172
 */
1173
static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1174
610
{
1175
610
  isl_ctx *ctx = isl_map_get_ctx(map);
1176
610
  struct isl_extract_edge_data *data = user;
1177
610
  struct isl_sched_graph *graph = data->graph;
1178
610
  struct isl_sched_node *src, *dst;
1179
610
  struct isl_sched_edge *edge;
1180
610
  isl_map *tagged = NULL;
1181
610
1182
610
  if (data->type == isl_edge_condition ||
1183
583
      
data->type == isl_edge_conditional_validity583
)
{59
1184
59
    if (
isl_map_can_zip(map)59
)
{47
1185
47
      tagged = isl_map_copy(map);
1186
47
      map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1187
12
    } else {
1188
12
      tagged = insert_dummy_tags(isl_map_copy(map));
1189
12
    }
1190
59
  }
1191
610
1192
610
  src = find_domain_node(ctx, graph, map);
1193
610
  dst = find_range_node(ctx, graph, map);
1194
610
1195
610
  if (
!src || 610
!dst610
)
{24
1196
24
    isl_map_free(map);
1197
24
    isl_map_free(tagged);
1198
24
    return isl_stat_ok;
1199
24
  }
1200
610
1201
586
  
if (586
src->compressed || 586
dst->compressed572
)
{22
1202
22
    isl_map *hull;
1203
22
    hull = extract_hull(src, dst);
1204
22
    if (tagged)
1205
14
      tagged = map_intersect_domains(tagged, hull);
1206
22
    map = isl_map_intersect(map, hull);
1207
22
  }
1208
586
1209
586
  graph->edge[graph->n_edge].src = src;
1210
586
  graph->edge[graph->n_edge].dst = dst;
1211
586
  graph->edge[graph->n_edge].map = map;
1212
586
  graph->edge[graph->n_edge].types = 0;
1213
586
  graph->edge[graph->n_edge].tagged_condition = NULL;
1214
586
  graph->edge[graph->n_edge].tagged_validity = NULL;
1215
586
  set_type(&graph->edge[graph->n_edge], data->type);
1216
586
  if (data->type == isl_edge_condition)
1217
27
    graph->edge[graph->n_edge].tagged_condition =
1218
27
          isl_union_map_from_map(tagged);
1219
586
  if (data->type == isl_edge_conditional_validity)
1220
32
    graph->edge[graph->n_edge].tagged_validity =
1221
32
          isl_union_map_from_map(tagged);
1222
586
1223
586
  edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1224
586
  if (
!edge586
)
{0
1225
0
    graph->n_edge++;
1226
0
    return isl_stat_error;
1227
0
  }
1228
586
  
if (586
edge == &graph->edge[graph->n_edge]586
)
1229
241
    return graph_edge_table_add(ctx, graph, data->type,
1230
241
            &graph->edge[graph->n_edge++]);
1231
586
1232
345
  
if (345
merge_edge(edge, &graph->edge[graph->n_edge]) < 0345
)
1233
0
    return -1;
1234
345
1235
345
  return graph_edge_table_add(ctx, graph, data->type, edge);
1236
345
}
1237
1238
/* Initialize the schedule graph "graph" from the schedule constraints "sc".
1239
 *
1240
 * The context is included in the domain before the nodes of
1241
 * the graphs are extracted in order to be able to exploit
1242
 * any possible additional equalities.
1243
 * Note that this intersection is only performed locally here.
1244
 */
1245
static isl_stat graph_init(struct isl_sched_graph *graph,
1246
  __isl_keep isl_schedule_constraints *sc)
1247
133
{
1248
133
  isl_ctx *ctx;
1249
133
  isl_union_set *domain;
1250
133
  isl_union_map *c;
1251
133
  struct isl_extract_edge_data data;
1252
133
  enum isl_edge_type i;
1253
133
  isl_stat r;
1254
133
1255
133
  if (!sc)
1256
0
    return isl_stat_error;
1257
133
1258
133
  ctx = isl_schedule_constraints_get_ctx(sc);
1259
133
1260
133
  domain = isl_schedule_constraints_get_domain(sc);
1261
133
  graph->n = isl_union_set_n_set(domain);
1262
133
  isl_union_set_free(domain);
1263
133
1264
133
  if (graph_alloc(ctx, graph, graph->n,
1265
133
      isl_schedule_constraints_n_map(sc)) < 0)
1266
0
    return isl_stat_error;
1267
133
1268
133
  
if (133
compute_max_row(graph, sc) < 0133
)
1269
0
    return isl_stat_error;
1270
133
  graph->root = 1;
1271
133
  graph->n = 0;
1272
133
  domain = isl_schedule_constraints_get_domain(sc);
1273
133
  domain = isl_union_set_intersect_params(domain,
1274
133
            isl_schedule_constraints_get_context(sc));
1275
133
  r = isl_union_set_foreach_set(domain, &extract_node, graph);
1276
133
  isl_union_set_free(domain);
1277
133
  if (r < 0)
1278
0
    return isl_stat_error;
1279
133
  
if (133
graph_init_table(ctx, graph) < 0133
)
1280
0
    return isl_stat_error;
1281
798
  
for (i = isl_edge_first; 133
i <= isl_edge_last798
;
++i665
)
{665
1282
665
    c = isl_schedule_constraints_get(sc, i);
1283
665
    graph->max_edge[i] = isl_union_map_n_map(c);
1284
665
    isl_union_map_free(c);
1285
665
    if (!c)
1286
0
      return isl_stat_error;
1287
665
  }
1288
133
  
if (133
graph_init_edge_tables(ctx, graph) < 0133
)
1289
0
    return isl_stat_error;
1290
133
  graph->n_edge = 0;
1291
133
  data.graph = graph;
1292
798
  for (i = isl_edge_first; 
i <= isl_edge_last798
;
++i665
)
{665
1293
665
    isl_stat r;
1294
665
1295
665
    data.type = i;
1296
665
    c = isl_schedule_constraints_get(sc, i);
1297
665
    r = isl_union_map_foreach_map(c, &extract_edge, &data);
1298
665
    isl_union_map_free(c);
1299
665
    if (r < 0)
1300
0
      return isl_stat_error;
1301
665
  }
1302
133
1303
133
  return isl_stat_ok;
1304
133
}
1305
1306
/* Check whether there is any dependence from node[j] to node[i]
1307
 * or from node[i] to node[j].
1308
 */
1309
static isl_bool node_follows_weak(int i, int j, void *user)
1310
210
{
1311
210
  isl_bool f;
1312
210
  struct isl_sched_graph *graph = user;
1313
210
1314
210
  f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
1315
210
  if (
f < 0 || 210
f210
)
1316
110
    return f;
1317
100
  return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
1318
210
}
1319
1320
/* Check whether there is a (conditional) validity dependence from node[j]
1321
 * to node[i], forcing node[i] to follow node[j].
1322
 */
1323
static isl_bool node_follows_strong(int i, int j, void *user)
1324
299
{
1325
299
  struct isl_sched_graph *graph = user;
1326
299
1327
299
  return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
1328
299
}
1329
1330
/* Use Tarjan's algorithm for computing the strongly connected components
1331
 * in the dependence graph only considering those edges defined by "follows".
1332
 */
1333
static int detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
1334
  isl_bool (*follows)(int i, int j, void *user))
1335
402
{
1336
402
  int i, n;
1337
402
  struct isl_tarjan_graph *g = NULL;
1338
402
1339
402
  g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
1340
402
  if (!g)
1341
0
    return -1;
1342
402
1343
402
  graph->scc = 0;
1344
402
  i = 0;
1345
402
  n = graph->n;
1346
934
  while (
n934
)
{532
1347
1.20k
    while (
g->order[i] != -11.20k
)
{668
1348
668
      graph->node[g->order[i]].scc = graph->scc;
1349
668
      --n;
1350
668
      ++i;
1351
668
    }
1352
532
    ++i;
1353
532
    graph->scc++;
1354
532
  }
1355
402
1356
402
  isl_tarjan_graph_free(g);
1357
402
1358
402
  return 0;
1359
402
}
1360
1361
/* Apply Tarjan's algorithm to detect the strongly connected components
1362
 * in the dependence graph.
1363
 * Only consider the (conditional) validity dependences and clear "weak".
1364
 */
1365
static int detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1366
256
{
1367
256
  graph->weak = 0;
1368
256
  return detect_ccs(ctx, graph, &node_follows_strong);
1369
256
}
1370
1371
/* Apply Tarjan's algorithm to detect the (weakly) connected components
1372
 * in the dependence graph.
1373
 * Consider all dependences and set "weak".
1374
 */
1375
static int detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1376
130
{
1377
130
  graph->weak = 1;
1378
130
  return detect_ccs(ctx, graph, &node_follows_weak);
1379
130
}
1380
1381
static int cmp_scc(const void *a, const void *b, void *data)
1382
90
{
1383
90
  struct isl_sched_graph *graph = data;
1384
90
  const int *i1 = a;
1385
90
  const int *i2 = b;
1386
90
1387
90
  return graph->node[*i1].scc - graph->node[*i2].scc;
1388
90
}
1389
1390
/* Sort the elements of graph->sorted according to the corresponding SCCs.
1391
 */
1392
static int sort_sccs(struct isl_sched_graph *graph)
1393
219
{
1394
219
  return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
1395
219
}
1396
1397
/* Given a dependence relation R from "node" to itself,
1398
 * construct the set of coefficients of valid constraints for elements
1399
 * in that dependence relation.
1400
 * In particular, the result contains tuples of coefficients
1401
 * c_0, c_n, c_x such that
1402
 *
1403
 *  c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
1404
 *
1405
 * or, equivalently,
1406
 *
1407
 *  c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
1408
 *
1409
 * We choose here to compute the dual of delta R.
1410
 * Alternatively, we could have computed the dual of R, resulting
1411
 * in a set of tuples c_0, c_n, c_x, c_y, and then
1412
 * plugged in (c_0, c_n, c_x, -c_x).
1413
 *
1414
 * If "node" has been compressed, then the dependence relation
1415
 * is also compressed before the set of coefficients is computed.
1416
 */
1417
static __isl_give isl_basic_set *intra_coefficients(
1418
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1419
  __isl_take isl_map *map)
1420
891
{
1421
891
  isl_set *delta;
1422
891
  isl_map *key;
1423
891
  isl_basic_set *coef;
1424
891
  isl_maybe_isl_basic_set m;
1425
891
1426
891
  m = isl_map_to_basic_set_try_get(graph->intra_hmap, map);
1427
891
  if (
m.valid < 0 || 891
m.valid891
)
{763
1428
763
    isl_map_free(map);
1429
763
    return m.value;
1430
763
  }
1431
891
1432
128
  key = isl_map_copy(map);
1433
128
  if (
node->compressed128
)
{0
1434
0
    map = isl_map_preimage_domain_multi_aff(map,
1435
0
            isl_multi_aff_copy(node->decompress));
1436
0
    map = isl_map_preimage_range_multi_aff(map,
1437
0
            isl_multi_aff_copy(node->decompress));
1438
0
  }
1439
128
  delta = isl_set_remove_divs(isl_map_deltas(map));
1440
128
  coef = isl_set_coefficients(delta);
1441
128
  graph->intra_hmap = isl_map_to_basic_set_set(graph->intra_hmap, key,
1442
128
          isl_basic_set_copy(coef));
1443
128
1444
128
  return coef;
1445
891
}
1446
1447
/* Given a dependence relation R, construct the set of coefficients
1448
 * of valid constraints for elements in that dependence relation.
1449
 * In particular, the result contains tuples of coefficients
1450
 * c_0, c_n, c_x, c_y such that
1451
 *
1452
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
1453
 *
1454
 * If the source or destination nodes of "edge" have been compressed,
1455
 * then the dependence relation is also compressed before
1456
 * the set of coefficients is computed.
1457
 */
1458
static __isl_give isl_basic_set *inter_coefficients(
1459
  struct isl_sched_graph *graph, struct isl_sched_edge *edge,
1460
  __isl_take isl_map *map)
1461
622
{
1462
622
  isl_set *set;
1463
622
  isl_map *key;
1464
622
  isl_basic_set *coef;
1465
622
  isl_maybe_isl_basic_set m;
1466
622
1467
622
  m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
1468
622
  if (
m.valid < 0 || 622
m.valid622
)
{532
1469
532
    isl_map_free(map);
1470
532
    return m.value;
1471
532
  }
1472
622
1473
90
  key = isl_map_copy(map);
1474
90
  if (edge->src->compressed)
1475
3
    map = isl_map_preimage_domain_multi_aff(map,
1476
3
            isl_multi_aff_copy(edge->src->decompress));
1477
90
  if (edge->dst->compressed)
1478
3
    map = isl_map_preimage_range_multi_aff(map,
1479
3
            isl_multi_aff_copy(edge->dst->decompress));
1480
90
  set = isl_map_wrap(isl_map_remove_divs(map));
1481
90
  coef = isl_set_coefficients(set);
1482
90
  graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
1483
90
          isl_basic_set_copy(coef));
1484
90
1485
90
  return coef;
1486
622
}
1487
1488
/* Return the position of the coefficients of the variables in
1489
 * the coefficients constraints "coef".
1490
 *
1491
 * The space of "coef" is of the form
1492
 *
1493
 *  { coefficients[[cst, params] -> S] }
1494
 *
1495
 * Return the position of S.
1496
 */
1497
static int coef_var_offset(__isl_keep isl_basic_set *coef)
1498
984
{
1499
984
  int offset;
1500
984
  isl_space *space;
1501
984
1502
984
  space = isl_space_unwrap(isl_basic_set_get_space(coef));
1503
984
  offset = isl_space_dim(space, isl_dim_in);
1504
984
  isl_space_free(space);
1505
984
1506
984
  return offset;
1507
984
}
1508
1509
/* Return the offset of the coefficients of the variables of "node"
1510
 * within the (I)LP.
1511
 *
1512
 * Within each node, the coefficients have the following order:
1513
 *  - c_i_0
1514
 *  - c_i_n (if parametric)
1515
 *  - positive and negative parts of c_i_x
1516
 */
1517
static int node_var_coef_offset(struct isl_sched_node *node)
1518
3.89k
{
1519
3.89k
  return node->start + 1 + node->nparam;
1520
3.89k
}
1521
1522
/* Construct an isl_dim_map for mapping constraints on coefficients
1523
 * for "node" to the corresponding positions in graph->lp.
1524
 * "offset" is the offset of the coefficients for the variables
1525
 * in the input constraints.
1526
 * "s" is the sign of the mapping.
1527
 *
1528
 * The input constraints are given in terms of the coefficients (c_0, c_n, c_x).
1529
 * The mapping produced by this function essentially plugs in
1530
 * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
1531
 * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
1532
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1533
 *
1534
 * The caller can extend the mapping to also map the other coefficients
1535
 * (and therefore not plug in 0).
1536
 */
1537
static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
1538
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1539
  int offset, int s)
1540
577
{
1541
577
  int pos;
1542
577
  unsigned total;
1543
577
  isl_dim_map *dim_map;
1544
577
1545
577
  total = isl_basic_set_total_dim(graph->lp);
1546
577
  pos = node_var_coef_offset(node);
1547
577
  dim_map = isl_dim_map_alloc(ctx, total);
1548
577
  isl_dim_map_range(dim_map, pos, 2, offset, 1, node->nvar, -s);
1549
577
  isl_dim_map_range(dim_map, pos + 1, 2, offset, 1, node->nvar, s);
1550
577
1551
577
  return dim_map;
1552
577
}
1553
1554
/* Construct an isl_dim_map for mapping constraints on coefficients
1555
 * for "src" (node i) and "dst" (node j) to the corresponding positions
1556
 * in graph->lp.
1557
 * "offset" is the offset of the coefficients for the variables of "src"
1558
 * in the input constraints.
1559
 * "s" is the sign of the mapping.
1560
 *
1561
 * The input constraints are given in terms of the coefficients
1562
 * (c_0, c_n, c_x, c_y).
1563
 * The mapping produced by this function essentially plugs in
1564
 * (c_j_0 - c_i_0, c_j_n - c_i_n,
1565
 *  c_j_x^+ - c_j_x^-, -(c_i_x^+ - c_i_x^-)) if s = 1 and
1566
 * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
1567
 *  - (c_j_x^+ - c_j_x^-), c_i_x^+ - c_i_x^-) if s = -1.
1568
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1569
 *
1570
 * The caller can further extend the mapping.
1571
 */
1572
static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
1573
  struct isl_sched_graph *graph, struct isl_sched_node *src,
1574
  struct isl_sched_node *dst, int offset, int s)
1575
407
{
1576
407
  int pos;
1577
407
  unsigned total;
1578
407
  isl_dim_map *dim_map;
1579
407
1580
407
  total = isl_basic_set_total_dim(graph->lp);
1581
407
  dim_map = isl_dim_map_alloc(ctx, total);
1582
407
1583
407
  isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, s);
1584
407
  isl_dim_map_range(dim_map, dst->start + 1, 1, 1, 1, dst->nparam, s);
1585
407
  pos = node_var_coef_offset(dst);
1586
407
  isl_dim_map_range(dim_map, pos, 2, offset + src->nvar, 1,
1587
407
        dst->nvar, -s);
1588
407
  isl_dim_map_range(dim_map, pos + 1, 2, offset + src->nvar, 1,
1589
407
        dst->nvar, s);
1590
407
1591
407
  isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -s);
1592
407
  isl_dim_map_range(dim_map, src->start + 1, 1, 1, 1, src->nparam, -s);
1593
407
  pos = node_var_coef_offset(src);
1594
407
  isl_dim_map_range(dim_map, pos, 2, offset, 1, src->nvar, s);
1595
407
  isl_dim_map_range(dim_map, pos + 1, 2, offset, 1, src->nvar, -s);
1596
407
1597
407
  return dim_map;
1598
407
}
1599
1600
/* Add constraints to graph->lp that force validity for the given
1601
 * dependence from a node i to itself.
1602
 * That is, add constraints that enforce
1603
 *
1604
 *  (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
1605
 *  = c_i_x (y - x) >= 0
1606
 *
1607
 * for each (x,y) in R.
1608
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
1609
 * of valid constraints for (y - x) and then plug in (0, 0, c_i_x^+ - c_i_x^-),
1610
 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
1611
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1612
 *
1613
 * Actually, we do not construct constraints for the c_i_x themselves,
1614
 * but for the coefficients of c_i_x written as a linear combination
1615
 * of the columns in node->cmap.
1616
 */
1617
static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
1618
  struct isl_sched_edge *edge)
1619
261
{
1620
261
  int offset;
1621
261
  isl_map *map = isl_map_copy(edge->map);
1622
261
  isl_ctx *ctx = isl_map_get_ctx(map);
1623
261
  isl_dim_map *dim_map;
1624
261
  isl_basic_set *coef;
1625
261
  struct isl_sched_node *node = edge->src;
1626
261
1627
261
  coef = intra_coefficients(graph, node, map);
1628
261
1629
261
  offset = coef_var_offset(coef);
1630
261
1631
261
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1632
261
              offset, isl_mat_copy(node->cmap));
1633
261
  if (!coef)
1634
0
    return isl_stat_error;
1635
261
1636
261
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
1637
261
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
1638
261
      coef->n_eq, coef->n_ineq);
1639
261
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
1640
261
                 coef, dim_map);
1641
261
1642
261
  return isl_stat_ok;
1643
261
}
1644
1645
/* Add constraints to graph->lp that force validity for the given
1646
 * dependence from node i to node j.
1647
 * That is, add constraints that enforce
1648
 *
1649
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
1650
 *
1651
 * for each (x,y) in R.
1652
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1653
 * of valid constraints for R and then plug in
1654
 * (c_j_0 - c_i_0, c_j_n - c_i_n, c_j_x^+ - c_j_x^- - (c_i_x^+ - c_i_x^-)),
1655
 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
1656
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1657
 *
1658
 * Actually, we do not construct constraints for the c_*_x themselves,
1659
 * but for the coefficients of c_*_x written as a linear combination
1660
 * of the columns in node->cmap.
1661
 */
1662
static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
1663
  struct isl_sched_edge *edge)
1664
192
{
1665
192
  int offset;
1666
192
  isl_map *map = isl_map_copy(edge->map);
1667
192
  isl_ctx *ctx = isl_map_get_ctx(map);
1668
192
  isl_dim_map *dim_map;
1669
192
  isl_basic_set *coef;
1670
192
  struct isl_sched_node *src = edge->src;
1671
192
  struct isl_sched_node *dst = edge->dst;
1672
192
1673
192
  coef = inter_coefficients(graph, edge, map);
1674
192
1675
192
  offset = coef_var_offset(coef);
1676
192
1677
192
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1678
192
        offset, isl_mat_copy(src->cmap));
1679
192
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1680
192
        offset + src->nvar, isl_mat_copy(dst->cmap));
1681
192
  if (!coef)
1682
0
    return isl_stat_error;
1683
192
1684
192
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
1685
192
1686
192
  edge->start = graph->lp->n_ineq;
1687
192
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
1688
192
      coef->n_eq, coef->n_ineq);
1689
192
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
1690
192
                 coef, dim_map);
1691
192
  if (!graph->lp)
1692
0
    return isl_stat_error;
1693
192
  edge->end = graph->lp->n_ineq;
1694
192
1695
192
  return isl_stat_ok;
1696
192
}
1697
1698
/* Add constraints to graph->lp that bound the dependence distance for the given
1699
 * dependence from a node i to itself.
1700
 * If s = 1, we add the constraint
1701
 *
1702
 *  c_i_x (y - x) <= m_0 + m_n n
1703
 *
1704
 * or
1705
 *
1706
 *  -c_i_x (y - x) + m_0 + m_n n >= 0
1707
 *
1708
 * for each (x,y) in R.
1709
 * If s = -1, we add the constraint
1710
 *
1711
 *  -c_i_x (y - x) <= m_0 + m_n n
1712
 *
1713
 * or
1714
 *
1715
 *  c_i_x (y - x) + m_0 + m_n n >= 0
1716
 *
1717
 * for each (x,y) in R.
1718
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
1719
 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
1720
 * with each coefficient (except m_0) represented as a pair of non-negative
1721
 * coefficients.
1722
 *
1723
 * Actually, we do not construct constraints for the c_i_x themselves,
1724
 * but for the coefficients of c_i_x written as a linear combination
1725
 * of the columns in node->cmap.
1726
 *
1727
 *
1728
 * If "local" is set, then we add constraints
1729
 *
1730
 *  c_i_x (y - x) <= 0
1731
 *
1732
 * or
1733
 *
1734
 *  -c_i_x (y - x) <= 0
1735
 *
1736
 * instead, forcing the dependence distance to be (less than or) equal to 0.
1737
 * That is, we plug in (0, 0, -s * c_i_x),
1738
 * Note that dependences marked local are treated as validity constraints
1739
 * by add_all_validity_constraints and therefore also have
1740
 * their distances bounded by 0 from below.
1741
 */
1742
static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
1743
  struct isl_sched_edge *edge, int s, int local)
1744
271
{
1745
271
  int offset;
1746
271
  unsigned nparam;
1747
271
  isl_map *map = isl_map_copy(edge->map);
1748
271
  isl_ctx *ctx = isl_map_get_ctx(map);
1749
271
  isl_dim_map *dim_map;
1750
271
  isl_basic_set *coef;
1751
271
  struct isl_sched_node *node = edge->src;
1752
271
1753
271
  coef = intra_coefficients(graph, node, map);
1754
271
1755
271
  offset = coef_var_offset(coef);
1756
271
1757
271
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1758
271
              offset, isl_mat_copy(node->cmap));
1759
271
  if (!coef)
1760
0
    return isl_stat_error;
1761
271
1762
271
  nparam = isl_space_dim(node->space, isl_dim_param);
1763
271
  dim_map = intra_dim_map(ctx, graph, node, offset, -s);
1764
271
1765
271
  if (
!local271
)
{140
1766
140
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
1767
140
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
1768
140
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
1769
140
  }
1770
271
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
1771
271
      coef->n_eq, coef->n_ineq);
1772
271
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
1773
271
                 coef, dim_map);
1774
271
1775
271
  return isl_stat_ok;
1776
271
}
1777
1778
/* Add constraints to graph->lp that bound the dependence distance for the given
1779
 * dependence from node i to node j.
1780
 * If s = 1, we add the constraint
1781
 *
1782
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
1783
 *    <= m_0 + m_n n
1784
 *
1785
 * or
1786
 *
1787
 *  -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
1788
 *    m_0 + m_n n >= 0
1789
 *
1790
 * for each (x,y) in R.
1791
 * If s = -1, we add the constraint
1792
 *
1793
 *  -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
1794
 *    <= m_0 + m_n n
1795
 *
1796
 * or
1797
 *
1798
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
1799
 *    m_0 + m_n n >= 0
1800
 *
1801
 * for each (x,y) in R.
1802
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1803
 * of valid constraints for R and then plug in
1804
 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
1805
 *  -s*c_j_x+s*c_i_x)
1806
 * with each coefficient (except m_0, c_*_0 and c_*_n)
1807
 * represented as a pair of non-negative coefficients.
1808
 *
1809
 * Actually, we do not construct constraints for the c_*_x themselves,
1810
 * but for the coefficients of c_*_x written as a linear combination
1811
 * of the columns in node->cmap.
1812
 *
1813
 *
1814
 * If "local" is set, then we add constraints
1815
 *
1816
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
1817
 *
1818
 * or
1819
 *
1820
 *  -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)) <= 0
1821
 *
1822
 * instead, forcing the dependence distance to be (less than or) equal to 0.
1823
 * That is, we plug in
1824
 * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, -s*c_j_x+s*c_i_x).
1825
 * Note that dependences marked local are treated as validity constraints
1826
 * by add_all_validity_constraints and therefore also have
1827
 * their distances bounded by 0 from below.
1828
 */
1829
static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
1830
  struct isl_sched_edge *edge, int s, int local)
1831
201
{
1832
201
  int offset;
1833
201
  unsigned nparam;
1834
201
  isl_map *map = isl_map_copy(edge->map);
1835
201
  isl_ctx *ctx = isl_map_get_ctx(map);
1836
201
  isl_dim_map *dim_map;
1837
201
  isl_basic_set *coef;
1838
201
  struct isl_sched_node *src = edge->src;
1839
201
  struct isl_sched_node *dst = edge->dst;
1840
201
1841
201
  coef = inter_coefficients(graph, edge, map);
1842
201
1843
201
  offset = coef_var_offset(coef);
1844
201
1845
201
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1846
201
        offset, isl_mat_copy(src->cmap));
1847
201
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1848
201
        offset + src->nvar, isl_mat_copy(dst->cmap));
1849
201
  if (!coef)
1850
0
    return isl_stat_error;
1851
201
1852
201
  nparam = isl_space_dim(src->space, isl_dim_param);
1853
201
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
1854
201
1855
201
  if (
!local201
)
{119
1856
119
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
1857
119
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
1858
119
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
1859
119
  }
1860
201
1861
201
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
1862
201
      coef->n_eq, coef->n_ineq);
1863
201
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
1864
201
                 coef, dim_map);
1865
201
1866
201
  return isl_stat_ok;
1867
201
}
1868
1869
/* Add all validity constraints to graph->lp.
1870
 *
1871
 * An edge that is forced to be local needs to have its dependence
1872
 * distances equal to zero.  We take care of bounding them by 0 from below
1873
 * here.  add_all_proximity_constraints takes care of bounding them by 0
1874
 * from above.
1875
 *
1876
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
1877
 * Otherwise, we ignore them.
1878
 */
1879
static int add_all_validity_constraints(struct isl_sched_graph *graph,
1880
  int use_coincidence)
1881
376
{
1882
376
  int i;
1883
376
1884
938
  for (i = 0; 
i < graph->n_edge938
;
++i562
)
{562
1885
562
    struct isl_sched_edge *edge= &graph->edge[i];
1886
562
    int local;
1887
562
1888
562
    local = is_local(edge) ||
1889
538
      
(is_coincidence(edge) && 538
use_coincidence322
);
1890
562
    if (
!is_validity(edge) && 562
!local113
)
1891
109
      continue;
1892
453
    
if (453
edge->src != edge->dst453
)
1893
192
      continue;
1894
261
    
if (261
add_intra_validity_constraints(graph, edge) < 0261
)
1895
0
      return -1;
1896
261
  }
1897
376
1898
938
  
for (i = 0; 376
i < graph->n_edge938
;
++i562
)
{562
1899
562
    struct isl_sched_edge *edge = &graph->edge[i];
1900
562
    int local;
1901
562
1902
562
    local = is_local(edge) ||
1903
538
      
(is_coincidence(edge) && 538
use_coincidence322
);
1904
562
    if (
!is_validity(edge) && 562
!local113
)
1905
109
      continue;
1906
453
    
if (453
edge->src == edge->dst453
)
1907
261
      continue;
1908
192
    
if (192
add_inter_validity_constraints(graph, edge) < 0192
)
1909
0
      return -1;
1910
192
  }
1911
376
1912
376
  return 0;
1913
376
}
1914
1915
/* Add constraints to graph->lp that bound the dependence distance
1916
 * for all dependence relations.
1917
 * If a given proximity dependence is identical to a validity
1918
 * dependence, then the dependence distance is already bounded
1919
 * from below (by zero), so we only need to bound the distance
1920
 * from above.  (This includes the case of "local" dependences
1921
 * which are treated as validity dependence by add_all_validity_constraints.)
1922
 * Otherwise, we need to bound the distance both from above and from below.
1923
 *
1924
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
1925
 * Otherwise, we ignore them.
1926
 */
1927
static int add_all_proximity_constraints(struct isl_sched_graph *graph,
1928
  int use_coincidence)
1929
376
{
1930
376
  int i;
1931
376
1932
938
  for (i = 0; 
i < graph->n_edge938
;
++i562
)
{562
1933
562
    struct isl_sched_edge *edge= &graph->edge[i];
1934
562
    int local;
1935
562
1936
562
    local = is_local(edge) ||
1937
538
      
(is_coincidence(edge) && 538
use_coincidence322
);
1938
562
    if (
!is_proximity(edge) && 562
!local115
)
1939
107
      continue;
1940
455
    
if (455
edge->src == edge->dst &&455
1941
263
        add_intra_proximity_constraints(graph, edge, 1, local) < 0)
1942
0
      return -1;
1943
455
    
if (455
edge->src != edge->dst &&455
1944
192
        add_inter_proximity_constraints(graph, edge, 1, local) < 0)
1945
0
      return -1;
1946
455
    
if (455
is_validity(edge) || 455
local21
)
1947
438
      continue;
1948
17
    
if (17
edge->src == edge->dst &&17
1949
8
        add_intra_proximity_constraints(graph, edge, -1, 0) < 0)
1950
0
      return -1;
1951
17
    
if (17
edge->src != edge->dst &&17
1952
9
        add_inter_proximity_constraints(graph, edge, -1, 0) < 0)
1953
0
      return -1;
1954
17
  }
1955
376
1956
376
  return 0;
1957
376
}
1958
1959
/* Compute a basis for the rows in the linear part of the schedule
1960
 * and extend this basis to a full basis.  The remaining rows
1961
 * can then be used to force linear independence from the rows
1962
 * in the schedule.
1963
 *
1964
 * In particular, given the schedule rows S, we compute
1965
 *
1966
 *  S   = H Q
1967
 *  S U = H
1968
 *
1969
 * with H the Hermite normal form of S.  That is, all but the
1970
 * first rank columns of H are zero and so each row in S is
1971
 * a linear combination of the first rank rows of Q.
1972
 * The matrix Q is then transposed because we will write the
1973
 * coefficients of the next schedule row as a column vector s
1974
 * and express this s as a linear combination s = Q c of the
1975
 * computed basis.
1976
 * Similarly, the matrix U is transposed such that we can
1977
 * compute the coefficients c = U s from a schedule row s.
1978
 */
1979
static int node_update_cmap(struct isl_sched_node *node)
1980
952
{
1981
952
  isl_mat *H, *U, *Q;
1982
952
  int n_row = isl_mat_rows(node->sched);
1983
952
1984
952
  H = isl_mat_sub_alloc(node->sched, 0, n_row,
1985
952
            1 + node->nparam, node->nvar);
1986
952
1987
952
  H = isl_mat_left_hermite(H, 0, &U, &Q);
1988
952
  isl_mat_free(node->cmap);
1989
952
  isl_mat_free(node->cinv);
1990
952
  isl_mat_free(node->ctrans);
1991
952
  node->ctrans = isl_mat_copy(Q);
1992
952
  node->cmap = isl_mat_transpose(Q);
1993
952
  node->cinv = isl_mat_transpose(U);
1994
952
  node->rank = isl_mat_initial_non_zero_cols(H);
1995
952
  isl_mat_free(H);
1996
952
1997
952
  if (
!node->cmap || 952
!node->cinv952
||
!node->ctrans952
||
node->rank < 0952
)
1998
0
    return -1;
1999
952
  return 0;
2000
952
}
2001
2002
/* Is "edge" marked as a validity or a conditional validity edge?
2003
 */
2004
static int is_any_validity(struct isl_sched_edge *edge)
2005
132
{
2006
3
  return is_validity(edge) || is_conditional_validity(edge);
2007
132
}
2008
2009
/* How many times should we count the constraints in "edge"?
2010
 *
2011
 * If carry is set, then we are counting the number of
2012
 * (validity or conditional validity) constraints that will be added
2013
 * in setup_carry_lp and we count each edge exactly once.
2014
 *
2015
 * Otherwise, we count as follows
2016
 * validity   -> 1 (>= 0)
2017
 * validity+proximity -> 2 (>= 0 and upper bound)
2018
 * proximity    -> 2 (lower and upper bound)
2019
 * local(+any)    -> 2 (>= 0 and <= 0)
2020
 *
2021
 * If an edge is only marked conditional_validity then it counts
2022
 * as zero since it is only checked afterwards.
2023
 *
2024
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2025
 * Otherwise, we ignore them.
2026
 */
2027
static int edge_multiplicity(struct isl_sched_edge *edge, int carry,
2028
  int use_coincidence)
2029
621
{
2030
621
  if (carry)
2031
59
    return 1;
2032
562
  
if (562
is_proximity(edge) || 562
is_local(edge)115
)
2033
451
    return 2;
2034
111
  
if (111
use_coincidence && 111
is_coincidence(edge)4
)
2035
4
    return 2;
2036
107
  
if (107
is_validity(edge)107
)
2037
15
    return 1;
2038
92
  return 0;
2039
107
}
2040
2041
/* Count the number of equality and inequality constraints
2042
 * that will be added for the given map.
2043
 *
2044
 * "use_coincidence" is set if we should take into account coincidence edges.
2045
 */
2046
static isl_stat count_map_constraints(struct isl_sched_graph *graph,
2047
  struct isl_sched_edge *edge, __isl_take isl_map *map,
2048
  int *n_eq, int *n_ineq, int carry, int use_coincidence)
2049
621
{
2050
621
  isl_basic_set *coef;
2051
621
  int f = edge_multiplicity(edge, carry, use_coincidence);
2052
621
2053
621
  if (
f == 0621
)
{92
2054
92
    isl_map_free(map);
2055
92
    return isl_stat_ok;
2056
92
  }
2057
621
2058
529
  
if (529
edge->src == edge->dst529
)
2059
314
    coef = intra_coefficients(graph, edge->src, map);
2060
529
  else
2061
215
    coef = inter_coefficients(graph, edge, map);
2062
529
  if (!coef)
2063
0
    return isl_stat_error;
2064
529
  *n_eq += f * coef->n_eq;
2065
529
  *n_ineq += f * coef->n_ineq;
2066
529
  isl_basic_set_free(coef);
2067
529
2068
529
  return isl_stat_ok;
2069
529
}
2070
2071
/* Count the number of equality and inequality constraints
2072
 * that will be added to the main lp problem.
2073
 * We count as follows
2074
 * validity   -> 1 (>= 0)
2075
 * validity+proximity -> 2 (>= 0 and upper bound)
2076
 * proximity    -> 2 (lower and upper bound)
2077
 * local(+any)    -> 2 (>= 0 and <= 0)
2078
 *
2079
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2080
 * Otherwise, we ignore them.
2081
 */
2082
static int count_constraints(struct isl_sched_graph *graph,
2083
  int *n_eq, int *n_ineq, int use_coincidence)
2084
376
{
2085
376
  int i;
2086
376
2087
376
  *n_eq = *n_ineq = 0;
2088
938
  for (i = 0; 
i < graph->n_edge938
;
++i562
)
{562
2089
562
    struct isl_sched_edge *edge= &graph->edge[i];
2090
562
    isl_map *map = isl_map_copy(edge->map);
2091
562
2092
562
    if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
2093
562
              0, use_coincidence) < 0)
2094
0
      return -1;
2095
562
  }
2096
376
2097
376
  return 0;
2098
376
}
2099
2100
/* Count the number of constraints that will be added by
2101
 * add_bound_constant_constraints to bound the values of the constant terms
2102
 * and increment *n_eq and *n_ineq accordingly.
2103
 *
2104
 * In practice, add_bound_constant_constraints only adds inequalities.
2105
 */
2106
static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
2107
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2108
376
{
2109
376
  if (isl_options_get_schedule_max_constant_term(ctx) == -1)
2110
250
    return isl_stat_ok;
2111
376
2112
126
  *n_ineq += graph->n;
2113
126
2114
126
  return isl_stat_ok;
2115
376
}
2116
2117
/* Add constraints to bound the values of the constant terms in the schedule,
2118
 * if requested by the user.
2119
 *
2120
 * The maximal value of the constant terms is defined by the option
2121
 * "schedule_max_constant_term".
2122
 *
2123
 * Within each node, the coefficients have the following order:
2124
 *  - c_i_0
2125
 *  - c_i_n (if parametric)
2126
 *  - positive and negative parts of c_i_x
2127
 */
2128
static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
2129
  struct isl_sched_graph *graph)
2130
376
{
2131
376
  int i, k;
2132
376
  int max;
2133
376
  int total;
2134
376
2135
376
  max = isl_options_get_schedule_max_constant_term(ctx);
2136
376
  if (max == -1)
2137
250
    return isl_stat_ok;
2138
376
2139
126
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2140
126
2141
260
  for (i = 0; 
i < graph->n260
;
++i134
)
{134
2142
134
    struct isl_sched_node *node = &graph->node[i];
2143
134
    k = isl_basic_set_alloc_inequality(graph->lp);
2144
134
    if (k < 0)
2145
0
      return isl_stat_error;
2146
134
    isl_seq_clr(graph->lp->ineq[k], 1 +  total);
2147
134
    isl_int_set_si(graph->lp->ineq[k][1 + node->start], -1);
2148
134
    isl_int_set_si(graph->lp->ineq[k][0], max);
2149
134
  }
2150
126
2151
126
  return isl_stat_ok;
2152
126
}
2153
2154
/* Count the number of constraints that will be added by
2155
 * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
2156
 * accordingly.
2157
 *
2158
 * In practice, add_bound_coefficient_constraints only adds inequalities.
2159
 */
2160
static int count_bound_coefficient_constraints(isl_ctx *ctx,
2161
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2162
376
{
2163
376
  int i;
2164
376
2165
376
  if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
2166
249
      !isl_options_get_schedule_treat_coalescing(ctx))
2167
8
    return 0;
2168
376
2169
897
  
for (i = 0; 368
i < graph->n897
;
++i529
)
2170
529
    *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
2171
368
2172
368
  return 0;
2173
376
}
2174
2175
/* Add constraints to graph->lp that bound the values of
2176
 * the parameter schedule coefficients of "node" to "max" and
2177
 * the variable schedule coefficients to the corresponding entry
2178
 * in node->max.
2179
 * In either case, a negative value means that no bound needs to be imposed.
2180
 *
2181
 * For parameter coefficients, this amounts to adding a constraint
2182
 *
2183
 *  c_n <= max
2184
 *
2185
 * i.e.,
2186
 *
2187
 *  -c_n + max >= 0
2188
 *
2189
 * The variables coefficients are, however, not represented directly.
2190
 * Instead, the variables coefficients c_x are written as a linear
2191
 * combination c_x = cmap c_z of some other coefficients c_z,
2192
 * which are in turn encoded as c_z = c_z^+ - c_z^-.
2193
 * Let a_j be the elements of row i of node->cmap, then
2194
 *
2195
 *  -max_i <= c_x_i <= max_i
2196
 *
2197
 * is encoded as
2198
 *
2199
 *  -max_i <= \sum_j a_j (c_z_j^+ - c_z_j^-) <= max_i
2200
 *
2201
 * or
2202
 *
2203
 *  -\sum_j a_j (c_z_j^+ - c_z_j^-) + max_i >= 0
2204
 *  \sum_j a_j (c_z_j^+ - c_z_j^-) + max_i >= 0
2205
 */
2206
static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
2207
  struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
2208
529
{
2209
529
  int i, j, k;
2210
529
  int total;
2211
529
  isl_vec *ineq;
2212
529
2213
529
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2214
529
2215
824
  for (j = 0; 
j < node->nparam824
;
++j295
)
{295
2216
295
    int dim;
2217
295
2218
295
    if (max < 0)
2219
240
      continue;
2220
295
2221
55
    k = isl_basic_set_alloc_inequality(graph->lp);
2222
55
    if (k < 0)
2223
0
      return isl_stat_error;
2224
55
    dim = 1 + node->start + 1 + j;
2225
55
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2226
55
    isl_int_set_si(graph->lp->ineq[k][dim], -1);
2227
55
    isl_int_set_si(graph->lp->ineq[k][0], max);
2228
55
  }
2229
529
2230
529
  ineq = isl_vec_alloc(ctx, 1 + total);
2231
529
  ineq = isl_vec_clr(ineq);
2232
529
  if (!ineq)
2233
0
    return isl_stat_error;
2234
1.42k
  
for (i = 0; 529
i < node->nvar1.42k
;
++i893
)
{893
2235
893
    int pos = 1 + node_var_coef_offset(node);
2236
893
2237
893
    if (isl_int_is_neg(node->max->el[i]))
2238
499
      continue;
2239
893
2240
1.39k
    
for (j = 0; 394
j < node->nvar1.39k
;
++j1.00k
)
{1.00k
2241
1.00k
      isl_int_set(ineq->el[pos + 2 * j],
2242
1.00k
          node->cmap->row[i][j]);
2243
1.00k
      isl_int_neg(ineq->el[pos + 2 * j + 1],
2244
1.00k
          node->cmap->row[i][j]);
2245
1.00k
    }
2246
394
    isl_int_set(ineq->el[0], node->max->el[i]);
2247
394
2248
394
    k = isl_basic_set_alloc_inequality(graph->lp);
2249
394
    if (k < 0)
2250
0
      goto error;
2251
394
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2252
394
2253
394
    isl_seq_neg(ineq->el + pos, ineq->el + pos, 2 * node->nvar);
2254
394
    k = isl_basic_set_alloc_inequality(graph->lp);
2255
394
    if (k < 0)
2256
0
      goto error;
2257
394
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2258
394
  }
2259
529
  isl_vec_free(ineq);
2260
529
2261
529
  return isl_stat_ok;
2262
0
error:
2263
0
  isl_vec_free(ineq);
2264
0
  return isl_stat_error;
2265
529
}
2266
2267
/* Add constraints that bound the values of the variable and parameter
2268
 * coefficients of the schedule.
2269
 *
2270
 * The maximal value of the coefficients is defined by the option
2271
 * 'schedule_max_coefficient' and the entries in node->max.
2272
 * These latter entries are only set if either the schedule_max_coefficient
2273
 * option or the schedule_treat_coalescing option is set.
2274
 */
2275
static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
2276
  struct isl_sched_graph *graph)
2277
376
{
2278
376
  int i;
2279
376
  int max;
2280
376
2281
376
  max = isl_options_get_schedule_max_coefficient(ctx);
2282
376
2283
376
  if (
max == -1 && 376
!isl_options_get_schedule_treat_coalescing(ctx)249
)
2284
8
    return isl_stat_ok;
2285
376
2286
897
  
for (i = 0; 368
i < graph->n897
;
++i529
)
{529
2287
529
    struct isl_sched_node *node = &graph->node[i];
2288
529
2289
529
    if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
2290
0
      return isl_stat_error;
2291
529
  }
2292
368
2293
368
  return isl_stat_ok;
2294
368
}
2295
2296
/* Add a constraint to graph->lp that equates the value at position
2297
 * "sum_pos" to the sum of the "n" values starting at "first".
2298
 */
2299
static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
2300
  int sum_pos, int first, int n)
2301
376
{
2302
376
  int i, k;
2303
376
  int total;
2304
376
2305
376
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2306
376
2307
376
  k = isl_basic_set_alloc_equality(graph->lp);
2308
376
  if (k < 0)
2309
0
    return isl_stat_error;
2310
376
  isl_seq_clr(graph->lp->eq[k], 1 +  total);
2311
376
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2312
860
  for (i = 0; 
i < n860
;
++i484
)
2313
484
    isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
2314
376
2315
376
  return isl_stat_ok;
2316
376
}
2317
2318
/* Add a constraint to graph->lp that equates the value at position
2319
 * "sum_pos" to the sum of the parameter coefficients of all nodes.
2320
 *
2321
 * Within each node, the coefficients have the following order:
2322
 *  - c_i_0
2323
 *  - c_i_n (if parametric)
2324
 *  - positive and negative parts of c_i_x
2325
 */
2326
static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
2327
  int sum_pos)
2328
373
{
2329
373
  int i, j, k;
2330
373
  int total;
2331
373
2332
373
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2333
373
2334
373
  k = isl_basic_set_alloc_equality(graph->lp);
2335
373
  if (k < 0)
2336
0
    return isl_stat_error;
2337
373
  isl_seq_clr(graph->lp->eq[k], 1 +  total);
2338
373
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2339
893
  for (i = 0; 
i < graph->n893
;
++i520
)
{520
2340
520
    int pos = 1 + graph->node[i].start + 1;
2341
520
2342
829
    for (j = 0; 
j < graph->node[i].nparam829
;
++j309
)
2343
309
      isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2344
520
  }
2345
373
2346
373
  return isl_stat_ok;
2347
373
}
2348
2349
/* Add a constraint to graph->lp that equates the value at position
2350
 * "sum_pos" to the sum of the variable coefficients of all nodes.
2351
 *
2352
 * Within each node, the coefficients have the following order:
2353
 *  - c_i_0
2354
 *  - c_i_n (if parametric)
2355
 *  - positive and negative parts of c_i_x
2356
 */
2357
static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
2358
  int sum_pos)
2359
405
{
2360
405
  int i, j, k;
2361
405
  int total;
2362
405
2363
405
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2364
405
2365
405
  k = isl_basic_set_alloc_equality(graph->lp);
2366
405
  if (k < 0)
2367
0
    return isl_stat_error;
2368
405
  isl_seq_clr(graph->lp->eq[k], 1 +  total);
2369
405
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2370
987
  for (i = 0; 
i < graph->n987
;
++i582
)
{582
2371
582
    struct isl_sched_node *node = &graph->node[i];
2372
582
    int pos = 1 + node_var_coef_offset(node);
2373
582
2374
2.59k
    for (j = 0; 
j < 2 * node->nvar2.59k
;
++j2.01k
)
2375
2.01k
      isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2376
582
  }
2377
405
2378
405
  return isl_stat_ok;
2379
405
}
2380
2381
/* Construct an ILP problem for finding schedule coefficients
2382
 * that result in non-negative, but small dependence distances
2383
 * over all dependences.
2384
 * In particular, the dependence distances over proximity edges
2385
 * are bounded by m_0 + m_n n and we compute schedule coefficients
2386
 * with small values (preferably zero) of m_n and m_0.
2387
 *
2388
 * All variables of the ILP are non-negative.  The actual coefficients
2389
 * may be negative, so each coefficient is represented as the difference
2390
 * of two non-negative variables.  The negative part always appears
2391
 * immediately before the positive part.
2392
 * Other than that, the variables have the following order
2393
 *
2394
 *  - sum of positive and negative parts of m_n coefficients
2395
 *  - m_0
2396
 *  - sum of all c_n coefficients
2397
 *    (unconstrained when computing non-parametric schedules)
2398
 *  - sum of positive and negative parts of all c_x coefficients
2399
 *  - positive and negative parts of m_n coefficients
2400
 *  - for each node
2401
 *    - c_i_0
2402
 *    - c_i_n (if parametric)
2403
 *    - positive and negative parts of c_i_x
2404
 *
2405
 * The c_i_x are not represented directly, but through the columns of
2406
 * node->cmap.  That is, the computed values are for variable t_i_x
2407
 * such that c_i_x = Q t_i_x with Q equal to node->cmap.
2408
 *
2409
 * The constraints are those from the edges plus two or three equalities
2410
 * to express the sums.
2411
 *
2412
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2413
 * Otherwise, we ignore them.
2414
 */
2415
static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
2416
  int use_coincidence)
2417
376
{
2418
376
  int i;
2419
376
  unsigned nparam;
2420
376
  unsigned total;
2421
376
  isl_space *space;
2422
376
  int parametric;
2423
376
  int param_pos;
2424
376
  int n_eq, n_ineq;
2425
376
2426
376
  parametric = ctx->opt->schedule_parametric;
2427
376
  nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
2428
376
  param_pos = 4;
2429
376
  total = param_pos + 2 * nparam;
2430
921
  for (i = 0; 
i < graph->n921
;
++i545
)
{545
2431
545
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2432
545
    if (node_update_cmap(node) < 0)
2433
0
      return isl_stat_error;
2434
545
    node->start = total;
2435
545
    total += 1 + node->nparam + 2 * node->nvar;
2436
545
  }
2437
376
2438
376
  
if (376
count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0376
)
2439
0
    return isl_stat_error;
2440
376
  
if (376
count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0376
)
2441
0
    return isl_stat_error;
2442
376
  
if (376
count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0376
)
2443
0
    return isl_stat_error;
2444
376
2445
376
  space = isl_space_set_alloc(ctx, 0, total);
2446
376
  isl_basic_set_free(graph->lp);
2447
376
  n_eq += 2 + parametric;
2448
376
2449
376
  graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
2450
376
2451
376
  if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
2452
0
    return isl_stat_error;
2453
376
  
if (376
parametric && 376
add_param_sum_constraint(graph, 2) < 0344
)
2454
0
    return isl_stat_error;
2455
376
  
if (376
add_var_sum_constraint(graph, 3) < 0376
)
2456
0
    return isl_stat_error;
2457
376
  
if (376
add_bound_constant_constraints(ctx, graph) < 0376
)
2458
0
    return isl_stat_error;
2459
376
  
if (376
add_bound_coefficient_constraints(ctx, graph) < 0376
)
2460
0
    return isl_stat_error;
2461
376
  
if (376
add_all_validity_constraints(graph, use_coincidence) < 0376
)
2462
0
    return isl_stat_error;
2463
376
  
if (376
add_all_proximity_constraints(graph, use_coincidence) < 0376
)
2464
0
    return isl_stat_error;
2465
376
2466
376
  return isl_stat_ok;
2467
376
}
2468
2469
/* Analyze the conflicting constraint found by
2470
 * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
2471
 * constraint of one of the edges between distinct nodes, living, moreover
2472
 * in distinct SCCs, then record the source and sink SCC as this may
2473
 * be a good place to cut between SCCs.
2474
 */
2475
static int check_conflict(int con, void *user)
2476
1.17k
{
2477
1.17k
  int i;
2478
1.17k
  struct isl_sched_graph *graph = user;
2479
1.17k
2480
1.17k
  if (graph->src_scc >= 0)
2481
43
    return 0;
2482
1.17k
2483
1.13k
  con -= graph->lp->n_eq;
2484
1.13k
2485
1.13k
  if (con >= graph->lp->n_ineq)
2486
536
    return 0;
2487
1.13k
2488
2.04k
  
for (i = 0; 594
i < graph->n_edge2.04k
;
++i1.45k
)
{1.45k
2489
1.45k
    if (!is_validity(&graph->edge[i]))
2490
175
      continue;
2491
1.27k
    
if (1.27k
graph->edge[i].src == graph->edge[i].dst1.27k
)
2492
682
      continue;
2493
597
    
if (597
graph->edge[i].src->scc == graph->edge[i].dst->scc597
)
2494
529
      continue;
2495
68
    
if (68
graph->edge[i].start > con68
)
2496
39
      continue;
2497
29
    
if (29
graph->edge[i].end <= con29
)
2498
21
      continue;
2499
8
    graph->src_scc = graph->edge[i].src->scc;
2500
8
    graph->dst_scc = graph->edge[i].dst->scc;
2501
8
  }
2502
594
2503
594
  return 0;
2504
1.13k
}
2505
2506
/* Check whether the next schedule row of the given node needs to be
2507
 * non-trivial.  Lower-dimensional domains may have some trivial rows,
2508
 * but as soon as the number of remaining required non-trivial rows
2509
 * is as large as the number or remaining rows to be computed,
2510
 * all remaining rows need to be non-trivial.
2511
 */
2512
static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
2513
578
{
2514
578
  return node->nvar - node->rank >= graph->maxvar - graph->n_row;
2515
578
}
2516
2517
/* Solve the ILP problem constructed in setup_lp.
2518
 * For each node such that all the remaining rows of its schedule
2519
 * need to be non-trivial, we construct a non-triviality region.
2520
 * This region imposes that the next row is independent of previous rows.
2521
 * In particular the coefficients c_i_x are represented by t_i_x
2522
 * variables with c_i_x = Q t_i_x and Q a unimodular matrix such that
2523
 * its first columns span the rows of the previously computed part
2524
 * of the schedule.  The non-triviality region enforces that at least
2525
 * one of the remaining components of t_i_x is non-zero, i.e.,
2526
 * that the new schedule row depends on at least one of the remaining
2527
 * columns of Q.
2528
 */
2529
static __isl_give isl_vec *solve_lp(struct isl_sched_graph *graph)
2530
376
{
2531
376
  int i;
2532
376
  isl_vec *sol;
2533
376
  isl_basic_set *lp;
2534
376
2535
921
  for (i = 0; 
i < graph->n921
;
++i545
)
{545
2536
545
    struct isl_sched_node *node = &graph->node[i];
2537
545
    int skip = node->rank;
2538
545
    graph->region[i].pos = node_var_coef_offset(node) + 2 * skip;
2539
545
    if (needs_row(graph, node))
2540
441
      graph->region[i].len = 2 * (node->nvar - skip);
2541
545
    else
2542
104
      graph->region[i].len = 0;
2543
545
  }
2544
376
  lp = isl_basic_set_copy(graph->lp);
2545
376
  sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
2546
376
               graph->region, &check_conflict, graph);
2547
376
  return sol;
2548
376
}
2549
2550
/* Extract the coefficients for the variables of "node" from "sol".
2551
 *
2552
 * Within each node, the coefficients have the following order:
2553
 *  - c_i_0
2554
 *  - c_i_n (if parametric)
2555
 *  - positive and negative parts of c_i_x
2556
 *
2557
 * The c_i_x^- appear before their c_i_x^+ counterpart.
2558
 *
2559
 * Return c_i_x = c_i_x^+ - c_i_x^-
2560
 */
2561
static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
2562
  __isl_keep isl_vec *sol)
2563
475
{
2564
475
  int i;
2565
475
  int pos;
2566
475
  isl_vec *csol;
2567
475
2568
475
  if (!sol)
2569
0
    return NULL;
2570
475
  csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
2571
475
  if (!csol)
2572
0
    return NULL;
2573
475
2574
475
  pos = 1 + node_var_coef_offset(node);
2575
1.34k
  for (i = 0; 
i < node->nvar1.34k
;
++i868
)
2576
868
    isl_int_sub(csol->el[i],
2577
475
          sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
2578
475
2579
475
  return csol;
2580
475
}
2581
2582
/* Update the schedules of all nodes based on the given solution
2583
 * of the LP problem.
2584
 * The new row is added to the current band.
2585
 * All possibly negative coefficients are encoded as a difference
2586
 * of two non-negative variables, so we need to perform the subtraction
2587
 * here.  Moreover, if use_cmap is set, then the solution does
2588
 * not refer to the actual coefficients c_i_x, but instead to variables
2589
 * t_i_x such that c_i_x = Q t_i_x and Q is equal to node->cmap.
2590
 * In this case, we then also need to perform this multiplication
2591
 * to obtain the values of c_i_x.
2592
 *
2593
 * If coincident is set, then the caller guarantees that the new
2594
 * row satisfies the coincidence constraints.
2595
 */
2596
static int update_schedule(struct isl_sched_graph *graph,
2597
  __isl_take isl_vec *sol, int use_cmap, int coincident)
2598
302
{
2599
302
  int i, j;
2600
302
  isl_vec *csol = NULL;
2601
302
2602
302
  if (!sol)
2603
0
    goto error;
2604
302
  
if (302
sol->size == 0302
)
2605
0
    isl_die(sol->ctx, isl_error_internal,
2606
302
      "no solution found", goto error);
2607
302
  
if (302
graph->n_total_row >= graph->max_row302
)
2608
0
    isl_die(sol->ctx, isl_error_internal,
2609
302
      "too many schedule rows", goto error);
2610
302
2611
719
  
for (i = 0; 302
i < graph->n719
;
++i417
)
{417
2612
417
    struct isl_sched_node *node = &graph->node[i];
2613
417
    int pos = node->start;
2614
417
    int row = isl_mat_rows(node->sched);
2615
417
2616
417
    isl_vec_free(csol);
2617
417
    csol = extract_var_coef(node, sol);
2618
417
    if (!csol)
2619
0
      goto error;
2620
417
2621
417
    isl_map_free(node->sched_map);
2622
417
    node->sched_map = NULL;
2623
417
    node->sched = isl_mat_add_rows(node->sched, 1);
2624
417
    if (!node->sched)
2625
0
      goto error;
2626
1.05k
    
for (j = 0; 417
j < 1 + node->nparam1.05k
;
++j635
)
2627
635
      node->sched = isl_mat_set_element(node->sched,
2628
635
            row, j, sol->el[1 + pos + j]);
2629
417
    if (use_cmap)
2630
380
      csol = isl_mat_vec_product(isl_mat_copy(node->cmap),
2631
380
               csol);
2632
417
    if (!csol)
2633
0
      goto error;
2634
1.17k
    
for (j = 0; 417
j < node->nvar1.17k
;
++j762
)
2635
762
      node->sched = isl_mat_set_element(node->sched,
2636
762
          row, 1 + node->nparam + j, csol->el[j]);
2637
417
    node->coincident[graph->n_total_row] = coincident;
2638
417
  }
2639
302
  isl_vec_free(sol);
2640
302
  isl_vec_free(csol);
2641
302
2642
302
  graph->n_row++;
2643
302
  graph->n_total_row++;
2644
302
2645
302
  return 0;
2646
0
error:
2647
0
  isl_vec_free(sol);
2648
0
  isl_vec_free(csol);
2649
0
  return -1;
2650
302
}
2651
2652
/* Convert row "row" of node->sched into an isl_aff living in "ls"
2653
 * and return this isl_aff.
2654
 */
2655
static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
2656
  struct isl_sched_node *node, int row)
2657
904
{
2658
904
  int j;
2659
904
  isl_int v;
2660
904
  isl_aff *aff;
2661
904
2662
904
  isl_int_init(v);
2663
904
2664
904
  aff = isl_aff_zero_on_domain(ls);
2665
904
  isl_mat_get_element(node->sched, row, 0, &v);
2666
904
  aff = isl_aff_set_constant(aff, v);
2667
1.37k
  for (j = 0; 
j < node->nparam1.37k
;
++j470
)
{470
2668
470
    isl_mat_get_element(node->sched, row, 1 + j, &v);
2669
470
    aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
2670
470
  }
2671
2.43k
  for (j = 0; 
j < node->nvar2.43k
;
++j1.53k
)
{1.53k
2672
1.53k
    isl_mat_get_element(node->sched, row, 1 + node->nparam + j, &v);
2673
1.53k
    aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
2674
1.53k
  }
2675
904
2676
904
  isl_int_clear(v);
2677
904
2678
904
  return aff;
2679
904
}
2680
2681
/* Convert the "n" rows starting at "first" of node->sched into a multi_aff
2682
 * and return this multi_aff.
2683
 *
2684
 * The result is defined over the uncompressed node domain.
2685
 */
2686
static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
2687
  struct isl_sched_node *node, int first, int n)
2688
676
{
2689
676
  int i;
2690
676
  isl_space *space;
2691
676
  isl_local_space *ls;
2692
676
  isl_aff *aff;
2693
676
  isl_multi_aff *ma;
2694
676
  int nrow;
2695
676
2696
676
  if (!node)
2697
0
    return NULL;
2698
676
  nrow = isl_mat_rows(node->sched);
2699
676
  if (node->compressed)
2700
65
    space = isl_multi_aff_get_domain_space(node->decompress);
2701
676
  else
2702
611
    space = isl_space_copy(node->space);
2703
676
  ls = isl_local_space_from_space(isl_space_copy(space));
2704
676
  space = isl_space_from_domain(space);
2705
676
  space = isl_space_add_dims(space, isl_dim_out, n);
2706
676
  ma = isl_multi_aff_zero(space);
2707
676
2708
1.58k
  for (i = first; 
i < first + n1.58k
;
++i904
)
{904
2709
904
    aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
2710
904
    ma = isl_multi_aff_set_aff(ma, i - first, aff);
2711
904
  }
2712
676
2713
676
  isl_local_space_free(ls);
2714
676
2715
676
  if (node->compressed)
2716
65
    ma = isl_multi_aff_pullback_multi_aff(ma,
2717
65
          isl_multi_aff_copy(node->compress));
2718
676
2719
676
  return ma;
2720
676
}
2721
2722
/* Convert node->sched into a multi_aff and return this multi_aff.
2723
 *
2724
 * The result is defined over the uncompressed node domain.
2725
 */
2726
static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
2727
  struct isl_sched_node *node)
2728
171
{
2729
171
  int nrow;
2730
171
2731
171
  nrow = isl_mat_rows(node->sched);
2732
171
  return node_extract_partial_schedule_multi_aff(node, 0, nrow);
2733
171
}
2734
2735
/* Convert node->sched into a map and return this map.
2736
 *
2737
 * The result is cached in node->sched_map, which needs to be released
2738
 * whenever node->sched is updated.
2739
 * It is defined over the uncompressed node domain.
2740
 */
2741
static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
2742
454
{
2743
454
  if (
!node->sched_map454
)
{171
2744
171
    isl_multi_aff *ma;
2745
171
2746
171
    ma = node_extract_schedule_multi_aff(node);
2747
171
    node->sched_map = isl_map_from_multi_aff(ma);
2748
171
  }
2749
454
2750
454
  return isl_map_copy(node->sched_map);
2751
454
}
2752
2753
/* Construct a map that can be used to update a dependence relation
2754
 * based on the current schedule.
2755
 * That is, construct a map expressing that source and sink
2756
 * are executed within the same iteration of the current schedule.
2757
 * This map can then be intersected with the dependence relation.
2758
 * This is not the most efficient way, but this shouldn't be a critical
2759
 * operation.
2760
 */
2761
static __isl_give isl_map *specializer(struct isl_sched_node *src,
2762
  struct isl_sched_node *dst)
2763
197
{
2764
197
  isl_map *src_sched, *dst_sched;
2765
197
2766
197
  src_sched = node_extract_schedule(src);
2767
197
  dst_sched = node_extract_schedule(dst);
2768
197
  return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
2769
197
}
2770
2771
/* Intersect the domains of the nested relations in domain and range
2772
 * of "umap" with "map".
2773
 */
2774
static __isl_give isl_union_map *intersect_domains(
2775
  __isl_take isl_union_map *umap, __isl_keep isl_map *map)
2776
70
{
2777
70
  isl_union_set *uset;
2778
70
2779
70
  umap = isl_union_map_zip(umap);
2780
70
  uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
2781
70
  umap = isl_union_map_intersect_domain(umap, uset);
2782
70
  umap = isl_union_map_zip(umap);
2783
70
  return umap;
2784
70
}
2785
2786
/* Update the dependence relation of the given edge based
2787
 * on the current schedule.
2788
 * If the dependence is carried completely by the current schedule, then
2789
 * it is removed from the edge_tables.  It is kept in the list of edges
2790
 * as otherwise all edge_tables would have to be recomputed.
2791
 */
2792
static int update_edge(struct isl_sched_graph *graph,
2793
  struct isl_sched_edge *edge)
2794
197
{
2795
197
  int empty;
2796
197
  isl_map *id;
2797
197
2798
197
  id = specializer(edge->src, edge->dst);
2799
197
  edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
2800
197
  if (!edge->map)
2801
0
    goto error;
2802
197
2803
197
  
if (197
edge->tagged_condition197
)
{32
2804
32
    edge->tagged_condition =
2805
32
      intersect_domains(edge->tagged_condition, id);
2806
32
    if (!edge->tagged_condition)
2807
0
      goto error;
2808
32
  }
2809
197
  
if (197
edge->tagged_validity197
)
{38
2810
38
    edge->tagged_validity =
2811
38
      intersect_domains(edge->tagged_validity, id);
2812
38
    if (!edge->tagged_validity)
2813
0
      goto error;
2814
38
  }
2815
197
2816
197
  empty = isl_map_plain_is_empty(edge->map);
2817
197
  if (empty < 0)
2818
0
    goto error;
2819
197
  
if (197
empty197
)
2820
108
    graph_remove_edge(graph, edge);
2821
197
2822
197
  isl_map_free(id);
2823
197
  return 0;
2824
0
error:
2825
0
  isl_map_free(id);
2826
0
  return -1;
2827
197
}
2828
2829
/* Does the domain of "umap" intersect "uset"?
2830
 */
2831
static int domain_intersects(__isl_keep isl_union_map *umap,
2832
  __isl_keep isl_union_set *uset)
2833
39
{
2834
39
  int empty;
2835
39
2836
39
  umap = isl_union_map_copy(umap);
2837
39
  umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
2838
39
  empty = isl_union_map_is_empty(umap);
2839
39
  isl_union_map_free(umap);
2840
39
2841
39
  return empty < 0 ? 
-10
:
!empty39
;
2842
39
}
2843
2844
/* Does the range of "umap" intersect "uset"?
2845
 */
2846
static int range_intersects(__isl_keep isl_union_map *umap,
2847
  __isl_keep isl_union_set *uset)
2848
26
{
2849
26
  int empty;
2850
26
2851
26
  umap = isl_union_map_copy(umap);
2852
26
  umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
2853
26
  empty = isl_union_map_is_empty(umap);
2854
26
  isl_union_map_free(umap);
2855
26
2856
26
  return empty < 0 ? 
-10
:
!empty26
;
2857
26
}
2858
2859
/* Are the condition dependences of "edge" local with respect to
2860
 * the current schedule?
2861
 *
2862
 * That is, are domain and range of the condition dependences mapped
2863
 * to the same point?
2864
 *
2865
 * In other words, is the condition false?
2866
 */
2867
static int is_condition_false(struct isl_sched_edge *edge)
2868
30
{
2869
30
  isl_union_map *umap;
2870
30
  isl_map *map, *sched, *test;
2871
30
  int empty, local;
2872
30
2873
30
  empty = isl_union_map_is_empty(edge->tagged_condition);
2874
30
  if (
empty < 0 || 30
empty30
)
2875
0
    return empty;
2876
30
2877
30
  umap = isl_union_map_copy(edge->tagged_condition);
2878
30
  umap = isl_union_map_zip(umap);
2879
30
  umap = isl_union_set_unwrap(isl_union_map_domain(umap));
2880
30
  map = isl_map_from_union_map(umap);
2881
30
2882
30
  sched = node_extract_schedule(edge->src);
2883
30
  map = isl_map_apply_domain(map, sched);
2884
30
  sched = node_extract_schedule(edge->dst);
2885
30
  map = isl_map_apply_range(map, sched);
2886
30
2887
30
  test = isl_map_identity(isl_map_get_space(map));
2888
30
  local = isl_map_is_subset(map, test);
2889
30
  isl_map_free(map);
2890
30
  isl_map_free(test);
2891
30
2892
30
  return local;
2893
30
}
2894
2895
/* For each conditional validity constraint that is adjacent
2896
 * to a condition with domain in condition_source or range in condition_sink,
2897
 * turn it into an unconditional validity constraint.
2898
 */
2899
static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
2900
  __isl_take isl_union_set *condition_source,
2901
  __isl_take isl_union_set *condition_sink)
2902
10
{
2903
10
  int i;
2904
10
2905
10
  condition_source = isl_union_set_coalesce(condition_source);
2906
10
  condition_sink = isl_union_set_coalesce(condition_sink);
2907
10
2908
52
  for (i = 0; 
i < graph->n_edge52
;
++i42
)
{42
2909
42
    int adjacent;
2910
42
    isl_union_map *validity;
2911
42
2912
42
    if (!is_conditional_validity(&graph->edge[i]))
2913
19
      continue;
2914
23
    
if (23
is_validity(&graph->edge[i])23
)
2915
4
      continue;
2916
23
2917
19
    validity = graph->edge[i].tagged_validity;
2918
19
    adjacent = domain_intersects(validity, condition_sink);
2919
19
    if (
adjacent >= 0 && 19
!adjacent19
)
2920
18
      adjacent = range_intersects(validity, condition_source);
2921
19
    if (adjacent < 0)
2922
0
      goto error;
2923
19
    
if (19
!adjacent19
)
2924
17
      continue;
2925
19
2926
2
    set_validity(&graph->edge[i]);
2927
2
  }
2928
10
2929
10
  isl_union_set_free(condition_source);
2930
10
  isl_union_set_free(condition_sink);
2931
10
  return 0;
2932
0
error:
2933
0
  isl_union_set_free(condition_source);
2934
0
  isl_union_set_free(condition_sink);
2935
0
  return -1;
2936
10
}
2937
2938
/* Update the dependence relations of all edges based on the current schedule
2939
 * and enforce conditional validity constraints that are adjacent
2940
 * to satisfied condition constraints.
2941
 *
2942
 * First check if any of the condition constraints are satisfied
2943
 * (i.e., not local to the outer schedule) and keep track of
2944
 * their domain and range.
2945
 * Then update all dependence relations (which removes the non-local
2946
 * constraints).
2947
 * Finally, if any condition constraints turned out to be satisfied,
2948
 * then turn all adjacent conditional validity constraints into
2949
 * unconditional validity constraints.
2950
 */
2951
static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
2952
81
{
2953
81
  int i;
2954
81
  int any = 0;
2955
81
  isl_union_set *source, *sink;
2956
81
2957
81
  source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
2958
81
  sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
2959
278
  for (i = 0; 
i < graph->n_edge278
;
++i197
)
{197
2960
197
    int local;
2961
197
    isl_union_set *uset;
2962
197
    isl_union_map *umap;
2963
197
2964
197
    if (!is_condition(&graph->edge[i]))
2965
165
      continue;
2966
32
    
if (32
is_local(&graph->edge[i])32
)
2967
17
      continue;
2968
15
    local = is_condition_false(&graph->edge[i]);
2969
15
    if (local < 0)
2970
0
      goto error;
2971
15
    
if (15
local15
)
2972
4
      continue;
2973
15
2974
11
    any = 1;
2975
11
2976
11
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
2977
11
    uset = isl_union_map_domain(umap);
2978
11
    source = isl_union_set_union(source, uset);
2979
11
2980
11
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
2981
11
    uset = isl_union_map_range(umap);
2982
11
    sink = isl_union_set_union(sink, uset);
2983
11
  }
2984
81
2985
278
  
for (i = graph->n_edge - 1; 81
i >= 0278
;
--i197
)
{197
2986
197
    if (update_edge(graph, &graph->edge[i]) < 0)
2987
0
      goto error;
2988
197
  }
2989
81
2990
81
  
if (81
any81
)
2991
10
    return unconditionalize_adjacent_validity(graph, source, sink);
2992
81
2993
71
  isl_union_set_free(source);
2994
71
  isl_union_set_free(sink);
2995
71
  return 0;
2996
0
error:
2997
0
  isl_union_set_free(source);
2998
0
  isl_union_set_free(sink);
2999
0
  return -1;
3000
81
}
3001
3002
static void next_band(struct isl_sched_graph *graph)
3003
83
{
3004
83
  graph->band_start = graph->n_total_row;
3005
83
}
3006
3007
/* Return the union of the universe domains of the nodes in "graph"
3008
 * that satisfy "pred".
3009
 */
3010
static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
3011
  struct isl_sched_graph *graph,
3012
  int (*pred)(struct isl_sched_node *node, int data), int data)
3013
155
{
3014
155
  int i;
3015
155
  isl_set *set;
3016
155
  isl_union_set *dom;
3017
155
3018
262
  for (i = 0; 
i < graph->n262
;
++i107
)
3019
262
    
if (262
pred(&graph->node[i], data)262
)
3020
155
      break;
3021
155
3022
155
  if (i >= graph->n)
3023
0
    isl_die(ctx, isl_error_internal,
3024
155
      "empty component", return NULL);
3025
155
3026
155
  set = isl_set_universe(isl_space_copy(graph->node[i].space));
3027
155
  dom = isl_union_set_from_set(set);
3028
155
3029
267
  for (i = i + 1; 
i < graph->n267
;
++i112
)
{112
3030
112
    if (!pred(&graph->node[i], data))
3031
105
      continue;
3032
7
    set = isl_set_universe(isl_space_copy(graph->node[i].space));
3033
7
    dom = isl_union_set_union(dom, isl_union_set_from_set(set));
3034
7
  }
3035
155
3036
155
  return dom;
3037
155
}
3038
3039
/* Return a list of unions of universe domains, where each element
3040
 * in the list corresponds to an SCC (or WCC) indexed by node->scc.
3041
 */
3042
static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
3043
  struct isl_sched_graph *graph)
3044
68
{
3045
68
  int i;
3046
68
  isl_union_set_list *filters;
3047
68
3048
68
  filters = isl_union_set_list_alloc(ctx, graph->scc);
3049
219
  for (i = 0; 
i < graph->scc219
;
++i151
)
{151
3050
151
    isl_union_set *dom;
3051
151
3052
151
    dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
3053
151
    filters = isl_union_set_list_add(filters, dom);
3054
151
  }
3055
68
3056
68
  return filters;
3057
68
}
3058
3059
/* Return a list of two unions of universe domains, one for the SCCs up
3060
 * to and including graph->src_scc and another for the other SCCs.
3061
 */
3062
static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
3063
  struct isl_sched_graph *graph)
3064
2
{
3065
2
  isl_union_set *dom;
3066
2
  isl_union_set_list *filters;
3067
2
3068
2
  filters = isl_union_set_list_alloc(ctx, 2);
3069
2
  dom = isl_sched_graph_domain(ctx, graph,
3070
2
          &node_scc_at_most, graph->src_scc);
3071
2
  filters = isl_union_set_list_add(filters, dom);
3072
2
  dom = isl_sched_graph_domain(ctx, graph,
3073
2
          &node_scc_at_least, graph->src_scc + 1);
3074
2
  filters = isl_union_set_list_add(filters, dom);
3075
2
3076
2
  return filters;
3077
2
}
3078
3079
/* Copy nodes that satisfy node_pred from the src dependence graph
3080
 * to the dst dependence graph.
3081
 */
3082
static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
3083
  int (*node_pred)(struct isl_sched_node *node, int data), int data)
3084
121
{
3085
121
  int i;
3086
121
3087
121
  dst->n = 0;
3088
425
  for (i = 0; 
i < src->n425
;
++i304
)
{304
3089
304
    int j;
3090
304
3091
304
    if (!node_pred(&src->node[i], data))
3092
158
      continue;
3093
304
3094
146
    j = dst->n;
3095
146
    dst->node[j].space = isl_space_copy(src->node[i].space);
3096
146
    dst->node[j].compressed = src->node[i].compressed;
3097
146
    dst->node[j].hull = isl_set_copy(src->node[i].hull);
3098
146
    dst->node[j].compress =
3099
146
      isl_multi_aff_copy(src->node[i].compress);
3100
146
    dst->node[j].decompress =
3101
146
      isl_multi_aff_copy(src->node[i].decompress);
3102
146
    dst->node[j].nvar = src->node[i].nvar;
3103
146
    dst->node[j].nparam = src->node[i].nparam;
3104
146
    dst->node[j].sched = isl_mat_copy(src->node[i].sched);
3105
146
    dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
3106
146
    dst->node[j].coincident = src->node[i].coincident;
3107
146
    dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
3108
146
    dst->node[j].max = isl_vec_copy(src->node[i].max);
3109
146
    dst->n++;
3110
146
3111
146
    if (
!dst->node[j].space || 146
!dst->node[j].sched146
)
3112
0
      return -1;
3113
146
    
if (146
dst->node[j].compressed &&146
3114
8
        
(!dst->node[j].hull || 8
!dst->node[j].compress8
||
3115
8
         !dst->node[j].decompress))
3116
0
      return -1;
3117
146
  }
3118
121
3119
121
  return 0;
3120
121
}
3121
3122
/* Copy non-empty edges that satisfy edge_pred from the src dependence graph
3123
 * to the dst dependence graph.
3124
 * If the source or destination node of the edge is not in the destination
3125
 * graph, then it must be a backward proximity edge and it should simply
3126
 * be ignored.
3127
 */
3128
static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
3129
  struct isl_sched_graph *src,
3130
  int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
3131
121
{
3132
121
  int i;
3133
121
  enum isl_edge_type t;
3134
121
3135
121
  dst->n_edge = 0;
3136
457
  for (i = 0; 
i < src->n_edge457
;
++i336
)
{336
3137
336
    struct isl_sched_edge *edge = &src->edge[i];
3138
336
    isl_map *map;
3139
336
    isl_union_map *tagged_condition;
3140
336
    isl_union_map *tagged_validity;
3141
336
    struct isl_sched_node *dst_src, *dst_dst;
3142
336
3143
336
    if (!edge_pred(edge, data))
3144
252
      continue;
3145
336
3146
84
    
if (84
isl_map_plain_is_empty(edge->map)84
)
3147
11
      continue;
3148
84
3149
73
    dst_src = graph_find_node(ctx, dst, edge->src->space);
3150
73
    dst_dst = graph_find_node(ctx, dst, edge->dst->space);
3151
73
    if (
!dst_src || 73
!dst_dst73
)
{0
3152
0
      if (
is_validity(edge) || 0
is_conditional_validity(edge)0
)
3153
0
        isl_die(ctx, isl_error_internal,
3154
0
          "backward (conditional) validity edge",
3155
0
          return -1);
3156
0
      continue;
3157
0
    }
3158
73
3159
73
    map = isl_map_copy(edge->map);
3160
73
    tagged_condition = isl_union_map_copy(edge->tagged_condition);
3161
73
    tagged_validity = isl_union_map_copy(edge->tagged_validity);
3162
73
3163
73
    dst->edge[dst->n_edge].src = dst_src;
3164
73
    dst->edge[dst->n_edge].dst = dst_dst;
3165
73
    dst->edge[dst->n_edge].map = map;
3166
73
    dst->edge[dst->n_edge].tagged_condition = tagged_condition;
3167
73
    dst->edge[dst->n_edge].tagged_validity = tagged_validity;
3168
73
    dst->edge[dst->n_edge].types = edge->types;
3169
73
    dst->n_edge++;
3170
73
3171
73
    if (
edge->tagged_condition && 73
!tagged_condition10
)
3172
0
      return -1;
3173
73
    
if (73
edge->tagged_validity && 73
!tagged_validity6
)
3174
0
      return -1;
3175
73
3176
438
    
for (t = isl_edge_first; 73
t <= isl_edge_last438
;
++t365
)
{365
3177
365
      if (edge !=
3178
365
          graph_find_edge(src, t, edge->src, edge->dst))
3179
182
        continue;
3180
183
      
if (183
graph_edge_table_add(ctx, dst, t,183
3181
183
              &dst->edge[dst->n_edge - 1]) < 0)
3182
0
        return -1;
3183
183
    }
3184
73
  }
3185
121
3186
121
  return 0;
3187
121
}
3188
3189
/* Compute the maximal number of variables over all nodes.
3190
 * This is the maximal number of linearly independent schedule
3191
 * rows that we need to compute.
3192
 * Just in case we end up in a part of the dependence graph
3193
 * with only lower-dimensional domains, we make sure we will
3194
 * compute the required amount of extra linearly independent rows.
3195
 */
3196
static int compute_maxvar(struct isl_sched_graph *graph)
3197
247
{
3198
247
  int i;
3199
247
3200
247
  graph->maxvar = 0;
3201
602
  for (i = 0; 
i < graph->n602
;
++i355
)
{355
3202
355
    struct isl_sched_node *node = &graph->node[i];
3203
355
    int nvar;
3204
355
3205
355
    if (node_update_cmap(node) < 0)
3206
0
      return -1;
3207
355
    nvar = node->nvar + graph->n_row - node->rank;
3208
355
    if (nvar > graph->maxvar)
3209
246
      graph->maxvar = nvar;
3210
355
  }
3211
247
3212
247
  return 0;
3213
247
}
3214
3215
/* Extract the subgraph of "graph" that consists of the node satisfying
3216
 * "node_pred" and the edges satisfying "edge_pred" and store
3217
 * the result in "sub".
3218
 */
3219
static int extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
3220
  int (*node_pred)(struct isl_sched_node *node, int data),
3221
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3222
  int data, struct isl_sched_graph *sub)
3223
121
{
3224
121
  int i, n = 0, n_edge = 0;
3225
121
  int t;
3226
121
3227
425
  for (i = 0; 
i < graph->n425
;
++i304
)
3228
304
    
if (304
node_pred(&graph->node[i], data)304
)
3229
146
      ++n;
3230
457
  for (i = 0; 
i < graph->n_edge457
;
++i336
)
3231
336
    
if (336
edge_pred(&graph->edge[i], data)336
)
3232
84
      ++n_edge;
3233
121
  if (graph_alloc(ctx, sub, n, n_edge) < 0)
3234
0
    return -1;
3235
121
  
if (121
copy_nodes(sub, graph, node_pred, data) < 0121
)
3236
0
    return -1;
3237
121
  
if (121
graph_init_table(ctx, sub) < 0121
)
3238
0
    return -1;
3239
726
  
for (t = 0; 121
t <= isl_edge_last726
;
++t605
)
3240
605
    sub->max_edge[t] = graph->max_edge[t];
3241
121
  if (graph_init_edge_tables(ctx, sub) < 0)
3242
0
    return -1;
3243
121
  
if (121
copy_edges(ctx, sub, graph, edge_pred, data) < 0121
)
3244
0
    return -1;
3245
121
  sub->n_row = graph->n_row;
3246
121
  sub->max_row = graph->max_row;
3247
121
  sub->n_total_row = graph->n_total_row;
3248
121
  sub->band_start = graph->band_start;
3249
121
3250
121
  return 0;
3251
121
}
3252
3253
static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
3254
  struct isl_sched_graph *graph);
3255
static __isl_give isl_schedule_node *compute_schedule_wcc(
3256
  isl_schedule_node *node, struct isl_sched_graph *graph);
3257
3258
/* Compute a schedule for a subgraph of "graph".  In particular, for
3259
 * the graph composed of nodes that satisfy node_pred and edges that
3260
 * that satisfy edge_pred.
3261
 * If the subgraph is known to consist of a single component, then wcc should
3262
 * be set and then we call compute_schedule_wcc on the constructed subgraph.
3263
 * Otherwise, we call compute_schedule, which will check whether the subgraph
3264
 * is connected.
3265
 *
3266
 * The schedule is inserted at "node" and the updated schedule node
3267
 * is returned.
3268
 */
3269
static __isl_give isl_schedule_node *compute_sub_schedule(
3270
  __isl_take isl_schedule_node *node, isl_ctx *ctx,
3271
  struct isl_sched_graph *graph,
3272
  int (*node_pred)(struct isl_sched_node *node, int data),
3273
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3274
  int data, int wcc)
3275
58
{
3276
58
  struct isl_sched_graph split = { 0 };
3277
58
3278
58
  if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
3279
58
        &split) < 0)
3280
0
    goto error;
3281
58
3282
58
  
if (58
wcc58
)
3283
40
    node = compute_schedule_wcc(node, &split);
3284
58
  else
3285
18
    node = compute_schedule(node, &split);
3286
58
3287
58
  graph_free(ctx, &split);
3288
58
  return node;
3289
0
error:
3290
0
  graph_free(ctx, &split);
3291
0
  return isl_schedule_node_free(node);
3292
58
}
3293
3294
static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
3295
498
{
3296
204
  return edge->src->scc == scc && edge->dst->scc == scc;
3297
498
}
3298
3299
static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
3300
14
{
3301
14
  return edge->dst->scc <= scc;
3302
14
}
3303
3304
static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
3305
14
{
3306
14
  return edge->src->scc >= scc;
3307
14
}
3308
3309
/* Reset the current band by dropping all its schedule rows.
3310
 */
3311
static int reset_band(struct isl_sched_graph *graph)
3312
8
{
3313
8
  int i;
3314
8
  int drop;
3315
8
3316
8
  drop = graph->n_total_row - graph->band_start;
3317
8
  graph->n_total_row -= drop;
3318
8
  graph->n_row -= drop;
3319
8
3320
28
  for (i = 0; 
i < graph->n28
;
++i20
)
{20
3321
20
    struct isl_sched_node *node = &graph->node[i];
3322
20
3323
20
    isl_map_free(node->sched_map);
3324
20
    node->sched_map = NULL;
3325
20
3326
20
    node->sched = isl_mat_drop_rows(node->sched,
3327
20
            graph->band_start, drop);
3328
20
3329
20
    if (!node->sched)
3330
0
      return -1;
3331
20
  }
3332
8
3333
8
  return 0;
3334
8
}
3335
3336
/* Split the current graph into two parts and compute a schedule for each
3337
 * part individually.  In particular, one part consists of all SCCs up
3338
 * to and including graph->src_scc, while the other part contains the other
3339
 * SCCs.  The split is enforced by a sequence node inserted at position "node"
3340
 * in the schedule tree.  Return the updated schedule node.
3341
 * If either of these two parts consists of a sequence, then it is spliced
3342
 * into the sequence containing the two parts.
3343
 *
3344
 * The current band is reset. It would be possible to reuse
3345
 * the previously computed rows as the first rows in the next
3346
 * band, but recomputing them may result in better rows as we are looking
3347
 * at a smaller part of the dependence graph.
3348
 */
3349
static __isl_give isl_schedule_node *compute_split_schedule(
3350
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3351
2
{
3352
2
  int is_seq;
3353
2
  isl_ctx *ctx;
3354
2
  isl_union_set_list *filters;
3355
2
3356
2
  if (!node)
3357
0
    return NULL;
3358
2
3359
2
  
if (2
reset_band(graph) < 02
)
3360
0
    return isl_schedule_node_free(node);
3361
2
3362
2
  next_band(graph);
3363
2
3364
2
  ctx = isl_schedule_node_get_ctx(node);
3365
2
  filters = extract_split(ctx, graph);
3366
2
  node = isl_schedule_node_insert_sequence(node, filters);
3367
2
  node = isl_schedule_node_child(node, 1);
3368
2
  node = isl_schedule_node_child(node, 0);
3369
2
3370
2
  node = compute_sub_schedule(node, ctx, graph,
3371
2
        &node_scc_at_least, &edge_src_scc_at_least,
3372
2
        graph->src_scc + 1, 0);
3373
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3374
2
  node = isl_schedule_node_parent(node);
3375
2
  node = isl_schedule_node_parent(node);
3376
2
  if (is_seq)
3377
0
    node = isl_schedule_node_sequence_splice_child(node, 1);
3378
2
  node = isl_schedule_node_child(node, 0);
3379
2
  node = isl_schedule_node_child(node, 0);
3380
2
  node = compute_sub_schedule(node, ctx, graph,
3381
2
        &node_scc_at_most, &edge_dst_scc_at_most,
3382
2
        graph->src_scc, 0);
3383
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3384
2
  node = isl_schedule_node_parent(node);
3385
2
  node = isl_schedule_node_parent(node);
3386
2
  if (is_seq)
3387
0
    node = isl_schedule_node_sequence_splice_child(node, 0);
3388
2
3389
2
  return node;
3390
2
}
3391
3392
/* Insert a band node at position "node" in the schedule tree corresponding
3393
 * to the current band in "graph".  Mark the band node permutable
3394
 * if "permutable" is set.
3395
 * The partial schedules and the coincidence property are extracted
3396
 * from the graph nodes.
3397
 * Return the updated schedule node.
3398
 */
3399
static __isl_give isl_schedule_node *insert_current_band(
3400
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3401
  int permutable)
3402
159
{
3403
159
  int i;
3404
159
  int start, end, n;
3405
159
  isl_multi_aff *ma;
3406
159
  isl_multi_pw_aff *mpa;
3407
159
  isl_multi_union_pw_aff *mupa;
3408
159
3409
159
  if (!node)
3410
0
    return NULL;
3411
159
3412
159
  
if (159
graph->n < 1159
)
3413
0
    isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
3414
159
      "graph should have at least one node",
3415
159
      return isl_schedule_node_free(node));
3416
159
3417
159
  start = graph->band_start;
3418
159
  end = graph->n_total_row;
3419
159
  n = end - start;
3420
159
3421
159
  ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
3422
159
  mpa = isl_multi_pw_aff_from_multi_aff(ma);
3423
159
  mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3424
159
3425
235
  for (i = 1; 
i < graph->n235
;
++i76
)
{76
3426
76
    isl_multi_union_pw_aff *mupa_i;
3427
76
3428
76
    ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
3429
76
                start, n);
3430
76
    mpa = isl_multi_pw_aff_from_multi_aff(ma);
3431
76
    mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3432
76
    mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
3433
76
  }
3434
159
  node = isl_schedule_node_insert_partial_schedule(node, mupa);
3435
159
3436
410
  for (i = 0; 
i < n410
;
++i251
)
3437
251
    node = isl_schedule_node_band_member_set_coincident(node, i,
3438
251
          graph->node[0].coincident[start + i]);
3439
159
  node = isl_schedule_node_band_set_permutable(node, permutable);
3440
159
3441
159
  return node;
3442
159
}
3443
3444
/* Update the dependence relations based on the current schedule,
3445
 * add the current band to "node" and then continue with the computation
3446
 * of the next band.
3447
 * Return the updated schedule node.
3448
 */
3449
static __isl_give isl_schedule_node *compute_next_band(
3450
  __isl_take isl_schedule_node *node,
3451
  struct isl_sched_graph *graph, int permutable)
3452
36
{
3453
36
  isl_ctx *ctx;
3454
36
3455
36
  if (!node)
3456
0
    return NULL;
3457
36
3458
36
  ctx = isl_schedule_node_get_ctx(node);
3459
36
  if (update_edges(ctx, graph) < 0)
3460
0
    return isl_schedule_node_free(node);
3461
36
  node = insert_current_band(node, graph, permutable);
3462
36
  next_band(graph);
3463
36
3464
36
  node = isl_schedule_node_child(node, 0);
3465
36
  node = compute_schedule(node, graph);
3466
36
  node = isl_schedule_node_parent(node);
3467
36
3468
36
  return node;
3469
36
}
3470
3471
/* Add constraints to graph->lp that force the dependence "map" (which
3472
 * is part of the dependence relation of "edge")
3473
 * to be respected and attempt to carry it, where the edge is one from
3474
 * a node j to itself.  "pos" is the sequence number of the given map.
3475
 * That is, add constraints that enforce
3476
 *
3477
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_j_0 + c_j_n n + c_j_x x)
3478
 *  = c_j_x (y - x) >= e_i
3479
 *
3480
 * for each (x,y) in R.
3481
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
3482
 * of valid constraints for (y - x) and then plug in (-e_i, 0, c_j_x),
3483
 * with each coefficient in c_j_x represented as a pair of non-negative
3484
 * coefficients.
3485
 */
3486
static int add_intra_constraints(struct isl_sched_graph *graph,
3487
  struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
3488
45
{
3489
45
  int offset;
3490
45
  isl_ctx *ctx = isl_map_get_ctx(map);
3491
45
  isl_dim_map *dim_map;
3492
45
  isl_basic_set *coef;
3493
45
  struct isl_sched_node *node = edge->src;
3494
45
3495
45
  coef = intra_coefficients(graph, node, map);
3496
45
  if (!coef)
3497
0
    return -1;
3498
45
3499
45
  offset = coef_var_offset(coef);
3500
45
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
3501
45
  isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3502
45
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
3503
45
      coef->n_eq, coef->n_ineq);
3504
45
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
3505
45
                 coef, dim_map);
3506
45
3507
45
  return 0;
3508
45
}
3509
3510
/* Add constraints to graph->lp that force the dependence "map" (which
3511
 * is part of the dependence relation of "edge")
3512
 * to be respected and attempt to carry it, where the edge is one from
3513
 * node j to node k.  "pos" is the sequence number of the given map.
3514
 * That is, add constraints that enforce
3515
 *
3516
 *  (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
3517
 *
3518
 * for each (x,y) in R.
3519
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
3520
 * of valid constraints for R and then plug in
3521
 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, c_k_x - c_j_x)
3522
 * with each coefficient (except e_i, c_*_0 and c_*_n)
3523
 * represented as a pair of non-negative coefficients.
3524
 */
3525
static int add_inter_constraints(struct isl_sched_graph *graph,
3526
  struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
3527
14
{
3528
14
  int offset;
3529
14
  isl_ctx *ctx = isl_map_get_ctx(map);
3530
14
  isl_dim_map *dim_map;
3531
14
  isl_basic_set *coef;
3532
14
  struct isl_sched_node *src = edge->src;
3533
14
  struct isl_sched_node *dst = edge->dst;
3534
14
3535
14
  coef = inter_coefficients(graph, edge, map);
3536
14
  if (!coef)
3537
0
    return -1;
3538
14
3539
14
  offset = coef_var_offset(coef);
3540
14
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
3541
14
  isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3542
14
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
3543
14
      coef->n_eq, coef->n_ineq);
3544
14
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
3545
14
                 coef, dim_map);
3546
14
3547
14
  return 0;
3548
14
}
3549
3550
/* Add constraints to graph->lp that force all (conditional) validity
3551
 * dependences to be respected and attempt to carry them.
3552
 */
3553
static isl_stat add_all_constraints(struct isl_sched_graph *graph)
3554
29
{
3555
29
  int i, j;
3556
29
  int pos;
3557
29
3558
29
  pos = 0;
3559
68
  for (i = 0; 
i < graph->n_edge68
;
++i39
)
{39
3560
39
    struct isl_sched_edge *edge= &graph->edge[i];
3561
39
3562
39
    if (!is_any_validity(edge))
3563
0
      continue;
3564
39
3565
98
    
for (j = 0; 39
j < edge->map->n98
;
++j59
)
{59
3566
59
      isl_basic_map *bmap;
3567
59
      isl_map *map;
3568
59
3569
59
      bmap = isl_basic_map_copy(edge->map->p[j]);
3570
59
      map = isl_map_from_basic_map(bmap);
3571
59
3572
59
      if (edge->src == edge->dst &&
3573
45
          add_intra_constraints(graph, edge, map, pos) < 0)
3574
0
        return isl_stat_error;
3575
59
      
if (59
edge->src != edge->dst &&59
3576
14
          add_inter_constraints(graph, edge, map, pos) < 0)
3577
0
        return isl_stat_error;
3578
59
      ++pos;
3579
59
    }
3580
39
  }
3581
29
3582
29
  return isl_stat_ok;
3583
29
}
3584
3585
/* Count the number of equality and inequality constraints
3586
 * that will be added to the carry_lp problem.
3587
 * We count each edge exactly once.
3588
 */
3589
static isl_stat count_all_constraints(struct isl_sched_graph *graph,
3590
  int *n_eq, int *n_ineq)
3591
29
{
3592
29
  int i, j;
3593
29
3594
29
  *n_eq = *n_ineq = 0;
3595
68
  for (i = 0; 
i < graph->n_edge68
;
++i39
)
{39
3596
39
    struct isl_sched_edge *edge= &graph->edge[i];
3597
39
3598
39
    if (!is_any_validity(edge))
3599
0
      continue;
3600
39
3601
98
    
for (j = 0; 39
j < edge->map->n98
;
++j59
)
{59
3602
59
      isl_basic_map *bmap;
3603
59
      isl_map *map;
3604
59
3605
59
      bmap = isl_basic_map_copy(edge->map->p[j]);
3606
59
      map = isl_map_from_basic_map(bmap);
3607
59
3608
59
      if (count_map_constraints(graph, edge, map,
3609
59
              n_eq, n_ineq, 1, 0) < 0)
3610
0
            return isl_stat_error;
3611
59
    }
3612
39
  }
3613
29
3614
29
  return isl_stat_ok;
3615
29
}
3616
3617
/* Return the total number of (validity) edges that carry_dependences will
3618
 * attempt to carry.
3619
 */
3620
static int count_carry_edges(struct isl_sched_graph *graph)
3621
30
{
3622
30
  int i;
3623
30
  int n_edge;
3624
30
3625
30
  n_edge = 0;
3626
70
  for (i = 0; 
i < graph->n_edge70
;
++i40
)
{40
3627
40
    struct isl_sched_edge *edge = &graph->edge[i];
3628
40
3629
40
    if (!is_any_validity(edge))
3630
1
      continue;
3631
40
3632
39
    n_edge += isl_map_n_basic_map(edge->map);
3633
39
  }
3634
30
3635
30
  return n_edge;
3636
30
}
3637
3638
/* Construct an LP problem for finding schedule coefficients
3639
 * such that the schedule carries as many validity dependences as possible.
3640
 * In particular, for each dependence i, we bound the dependence distance
3641
 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
3642
 * of all e_i's.  Dependences with e_i = 0 in the solution are simply
3643
 * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
3644
 * Note that if the dependence relation is a union of basic maps,
3645
 * then we have to consider each basic map individually as it may only
3646
 * be possible to carry the dependences expressed by some of those
3647
 * basic maps and not all of them.
3648
 * Below, we consider each of those basic maps as a separate "edge".
3649
 * "n_edge" is the number of these edges.
3650
 *
3651
 * All variables of the LP are non-negative.  The actual coefficients
3652
 * may be negative, so each coefficient is represented as the difference
3653
 * of two non-negative variables.  The negative part always appears
3654
 * immediately before the positive part.
3655
 * Other than that, the variables have the following order
3656
 *
3657
 *  - sum of (1 - e_i) over all edges
3658
 *  - sum of all c_n coefficients
3659
 *    (unconstrained when computing non-parametric schedules)
3660
 *  - sum of positive and negative parts of all c_x coefficients
3661
 *  - for each edge
3662
 *    - e_i
3663
 *  - for each node
3664
 *    - c_i_0
3665
 *    - c_i_n (if parametric)
3666
 *    - positive and negative parts of c_i_x
3667
 *
3668
 * The constraints are those from the (validity) edges plus three equalities
3669
 * to express the sums and n_edge inequalities to express e_i <= 1.
3670
 */
3671
static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
3672
  int n_edge)
3673
29
{
3674
29
  int i;
3675
29
  int k;
3676
29
  isl_space *dim;
3677
29
  unsigned total;
3678
29
  int n_eq, n_ineq;
3679
29
3680
29
  total = 3 + n_edge;
3681
66
  for (i = 0; 
i < graph->n66
;
++i37
)
{37
3682
37
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
3683
37
    node->start = total;
3684
37
    total += 1 + node->nparam + 2 * node->nvar;
3685
37
  }
3686
29
3687
29
  if (count_all_constraints(graph, &n_eq, &n_ineq) < 0)
3688
0
    return isl_stat_error;
3689
29
3690
29
  dim = isl_space_set_alloc(ctx, 0, total);
3691
29
  isl_basic_set_free(graph->lp);
3692
29
  n_eq += 3;
3693
29
  n_ineq += n_edge;
3694
29
  graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
3695
29
  graph->lp = isl_basic_set_set_rational(graph->lp);
3696
29
3697
29
  k = isl_basic_set_alloc_equality(graph->lp);
3698
29
  if (k < 0)
3699
0
    return isl_stat_error;
3700
29
  isl_seq_clr(graph->lp->eq[k], 1 +  total);
3701
29
  isl_int_set_si(graph->lp->eq[k][0], -n_edge);
3702
29
  isl_int_set_si(graph->lp->eq[k][1], 1);
3703
88
  for (i = 0; 
i < n_edge88
;
++i59
)
3704
59
    isl_int_set_si(graph->lp->eq[k][4 + i], 1);
3705
29
3706
29
  if (add_param_sum_constraint(graph, 1) < 0)
3707
0
    return isl_stat_error;
3708
29
  
if (29
add_var_sum_constraint(graph, 2) < 029
)
3709
0
    return isl_stat_error;
3710
29
3711
88
  
for (i = 0; 29
i < n_edge88
;
++i59
)
{59
3712
59
    k = isl_basic_set_alloc_inequality(graph->lp);
3713
59
    if (k < 0)
3714
0
      return isl_stat_error;
3715
59
    isl_seq_clr(graph->lp->ineq[k], 1 +  total);
3716
59
    isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
3717
59
    isl_int_set_si(graph->lp->ineq[k][0], 1);
3718
59
  }
3719
29
3720
29
  
if (29
add_all_constraints(graph) < 029
)
3721
0
    return isl_stat_error;
3722
29
3723
29
  return isl_stat_ok;
3724
29
}
3725
3726
static __isl_give isl_schedule_node *compute_component_schedule(
3727
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3728
  int wcc);
3729
3730
/* Comparison function for sorting the statements based on
3731
 * the corresponding value in "r".
3732
 */
3733
static int smaller_value(const void *a, const void *b, void *data)
3734
10
{
3735
10
  isl_vec *r = data;
3736
10
  const int *i1 = a;
3737
10
  const int *i2 = b;
3738
10
3739
10
  return isl_int_cmp(r->el[*i1], r->el[*i2]);
3740
10
}
3741
3742
/* If the schedule_split_scaled option is set and if the linear
3743
 * parts of the scheduling rows for all nodes in the graphs have
3744
 * a non-trivial common divisor, then split off the remainder of the
3745
 * constant term modulo this common divisor from the linear part.
3746
 * Otherwise, insert a band node directly and continue with
3747
 * the construction of the schedule.
3748
 *
3749
 * If a non-trivial common divisor is found, then
3750
 * the linear part is reduced and the remainder is enforced
3751
 * by a sequence node with the children placed in the order
3752
 * of this remainder.
3753
 * In particular, we assign an scc index based on the remainder and
3754
 * then rely on compute_component_schedule to insert the sequence and
3755
 * to continue the schedule construction on each part.
3756
 */
3757
static __isl_give isl_schedule_node *split_scaled(
3758
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3759
29
{
3760
29
  int i;
3761
29
  int row;
3762
29
  int scc;
3763
29
  isl_ctx *ctx;
3764
29
  isl_int gcd, gcd_i;
3765
29
  isl_vec *r;
3766
29
  int *order;
3767
29
3768
29
  if (!node)
3769
0
    return NULL;
3770
29
3771
29
  ctx = isl_schedule_node_get_ctx(node);
3772
29
  if (!ctx->opt->schedule_split_scaled)
3773
0
    return compute_next_band(node, graph, 0);
3774
29
  
if (29
graph->n <= 129
)
3775
23
    return compute_next_band(node, graph, 0);
3776
29
3777
6
  
isl_int_init6
(gcd);6
3778
6
  isl_int_init(gcd_i);
3779
6
3780
6
  isl_int_set_si(gcd, 0);
3781
6
3782
6
  row = isl_mat_rows(graph->node[0].sched) - 1;
3783
6
3784
20
  for (i = 0; 
i < graph->n20
;
++i14
)
{14
3785
14
    struct isl_sched_node *node = &graph->node[i];
3786
14
    int cols = isl_mat_cols(node->sched);
3787
14
3788
14
    isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
3789
14
    isl_int_gcd(gcd, gcd, gcd_i);
3790
14
  }
3791
6
3792
6
  isl_int_clear(gcd_i);
3793
6
3794
6
  if (
isl_int_cmp_si6
(gcd, 1) <= 06
)
{0
3795
0
    isl_int_clear(gcd);
3796
0
    return compute_next_band(node, graph, 0);
3797
0
  }
3798
6
3799
6
  r = isl_vec_alloc(ctx, graph->n);
3800
6
  order = isl_calloc_array(ctx, int, graph->n);
3801
6
  if (
!r || 6
!order6
)
3802
0
    goto error;
3803
6
3804
20
  
for (i = 0; 6
i < graph->n20
;
++i14
)
{14
3805
14
    struct isl_sched_node *node = &graph->node[i];
3806
14
3807
14
    order[i] = i;
3808
14
    isl_int_fdiv_r(r->el[i], node->sched->row[row][0], gcd);
3809
14
    isl_int_fdiv_q(node->sched->row[row][0],
3810
14
             node->sched->row[row][0], gcd);
3811
14
    isl_int_mul(node->sched->row[row][0],
3812
14
          node->sched->row[row][0], gcd);
3813
14
    node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
3814
14
    if (!node->sched)
3815
0
      goto error;
3816
14
  }
3817
6
3818
6
  
if (6
isl_sort(order, graph->n, sizeof(order[0]), &smaller_value, r) < 06
)
3819
0
    goto error;
3820
6
3821
6
  scc = 0;
3822
20
  for (i = 0; 
i < graph->n20
;
++i14
)
{14
3823
14
    if (
i > 0 && 14
isl_int_ne8
(r->el[order[i - 1]], r->el[order[i]]))
3824
8
      ++scc;
3825
14
    graph->node[order[i]].scc = scc;
3826
14
  }
3827
6
  graph->scc = ++scc;
3828
6
  graph->weak = 0;
3829
6
3830
6
  isl_int_clear(gcd);
3831
6
  isl_vec_free(r);
3832
6
  free(order);
3833
6
3834
6
  if (update_edges(ctx, graph) < 0)
3835
0
    return isl_schedule_node_free(node);
3836
6
  node = insert_current_band(node, graph, 0);
3837
6
  next_band(graph);
3838
6
3839
6
  node = isl_schedule_node_child(node, 0);
3840
6
  node = compute_component_schedule(node, graph, 0);
3841
6
  node = isl_schedule_node_parent(node);
3842
6
3843
6
  return node;
3844
0
error:
3845
0
  isl_vec_free(r);
3846
0
  free(order);
3847
0
  isl_int_clear(gcd);
3848
0
  return isl_schedule_node_free(node);
3849
6
}
3850
3851
/* Is the schedule row "sol" trivial on node "node"?
3852
 * That is, is the solution zero on the dimensions orthogonal to
3853
 * the previously found solutions?
3854
 * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
3855
 *
3856
 * Each coefficient is represented as the difference between
3857
 * two non-negative values in "sol".  "sol" has been computed
3858
 * in terms of the original iterators (i.e., without use of cmap).
3859
 * We construct the schedule row s and write it as a linear
3860
 * combination of (linear combinations of) previously computed schedule rows.
3861
 * s = Q c or c = U s.
3862
 * If the final entries of c are all zero, then the solution is trivial.
3863
 */
3864
static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
3865
33
{
3866
33
  int trivial;
3867
33
  isl_vec *node_sol;
3868
33
3869
33
  if (!sol)
3870
0
    return -1;
3871
33
  
if (33
node->nvar == node->rank33
)
3872
0
    return 0;
3873
33
3874
33
  node_sol = extract_var_coef(node, sol);
3875
33
  node_sol = isl_mat_vec_product(isl_mat_copy(node->cinv), node_sol);
3876
33
  if (!node_sol)
3877
0
    return -1;
3878
33
3879
33
  trivial = isl_seq_first_non_zero(node_sol->el + node->rank,
3880
33
          node->nvar - node->rank) == -1;
3881
33
3882
33
  isl_vec_free(node_sol);
3883
33
3884
33
  return trivial;
3885
33
}
3886
3887
/* Is the schedule row "sol" trivial on any node where it should
3888
 * not be trivial?
3889
 * "sol" has been computed in terms of the original iterators
3890
 * (i.e., without use of cmap).
3891
 * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
3892
 */
3893
static int is_any_trivial(struct isl_sched_graph *graph,
3894
  __isl_keep isl_vec *sol)
3895
29
{
3896
29
  int i;
3897
29
3898
58
  for (i = 0; 
i < graph->n58
;
++i29
)
{33
3899
33
    struct isl_sched_node *node = &graph->node[i];
3900
33
    int trivial;
3901
33
3902
33
    if (!needs_row(graph, node))
3903
0
      continue;
3904
33
    trivial = is_trivial(node, sol);
3905
33
    if (
trivial < 0 || 33
trivial33
)
3906
4
      return trivial;
3907
33
  }
3908
29
3909
25
  return 0;
3910
29
}
3911
3912
/* Does the schedule represented by "sol" perform loop coalescing on "node"?
3913
 * If so, return the position of the coalesced dimension.
3914
 * Otherwise, return node->nvar or -1 on error.
3915
 *
3916
 * In particular, look for pairs of coefficients c_i and c_j such that
3917
 * |c_j/c_i| >= size_i, i.e., |c_j| >= |c_i * size_i|.
3918
 * If any such pair is found, then return i.
3919
 * If size_i is infinity, then no check on c_i needs to be performed.
3920
 */
3921
static int find_node_coalescing(struct isl_sched_node *node,
3922
  __isl_keep isl_vec *sol)
3923
39
{
3924
39
  int i, j;
3925
39
  isl_int max;
3926
39
  isl_vec *csol;
3927
39
3928
39
  if (node->nvar <= 1)
3929
14
    return node->nvar;
3930
39
3931
25
  csol = extract_var_coef(node, sol);
3932
25
  if (!csol)
3933
0
    return -1;
3934
25
  
isl_int_init25
(max);25
3935
71
  for (i = 0; 
i < node->nvar71
;
++i46
)
{50
3936
50
    isl_val *v;
3937
50
3938
50
    if (isl_int_is_zero(csol->el[i]))
3939
18
      continue;
3940
32
    v = isl_multi_val_get_val(node->sizes, i);
3941
32
    if (!v)
3942
0
      goto error;
3943
32
    
if (32
!isl_val_is_int(v)32
)
{14
3944
14
      isl_val_free(v);
3945
14
      continue;
3946
14
    }
3947
18
    
isl_int_mul18
(max, v->n, csol->el[i]);18
3948
18
    isl_val_free(v);
3949
18
3950
46
    for (j = 0; 
j < node->nvar46
;
++j28
)
{32
3951
32
      if (j == i)
3952
14
        continue;
3953
18
      
if (18
isl_int_abs_ge18
(csol->el[j], max))
3954
4
        break;
3955
18
    }
3956
18
    if (j < node->nvar)
3957
4
      break;
3958
18
  }
3959
25
3960
25
  
isl_int_clear25
(max);25
3961
25
  isl_vec_free(csol);
3962
25
  return i;
3963
0
error:
3964
0
  isl_int_clear(max);
3965
0
  isl_vec_free(csol);
3966
0
  return -1;
3967
25
}
3968
3969
/* Force the schedule coefficient at position "pos" of "node" to be zero
3970
 * in "tl".
3971
 * The coefficient is encoded as the difference between two non-negative
3972
 * variables.  Force these two variables to have the same value.
3973
 */
3974
static __isl_give isl_tab_lexmin *zero_out_node_coef(
3975
  __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
3976
4
{
3977
4
  int dim;
3978
4
  isl_ctx *ctx;
3979
4
  isl_vec *eq;
3980
4
3981
4
  ctx = isl_space_get_ctx(node->space);
3982
4
  dim = isl_tab_lexmin_dim(tl);
3983
4
  if (dim < 0)
3984
0
    return isl_tab_lexmin_free(tl);
3985
4
  eq = isl_vec_alloc(ctx, 1 + dim);
3986
4
  eq = isl_vec_clr(eq);
3987
4
  if (!eq)
3988
0
    return isl_tab_lexmin_free(tl);
3989
4
3990
4
  pos = 1 + node_var_coef_offset(node) + 2 * pos;
3991
4
  isl_int_set_si(eq->el[pos], 1);
3992
4
  isl_int_set_si(eq->el[pos + 1], -1);
3993
4
  tl = isl_tab_lexmin_add_eq(tl, eq->el);
3994
4
  isl_vec_free(eq);
3995
4
3996
4
  return tl;
3997
4
}
3998
3999
/* Return the lexicographically smallest rational point in the basic set
4000
 * from which "tl" was constructed, double checking that this input set
4001
 * was not empty.
4002
 */
4003
static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
4004
33
{
4005
33
  isl_vec *sol;
4006
33
4007
33
  sol = isl_tab_lexmin_get_solution(tl);
4008
33
  if (!sol)
4009
0
    return NULL;
4010
33
  
if (33
sol->size == 033
)
4011
0
    isl_die(isl_vec_get_ctx(sol), isl_error_internal,
4012
33
      "error in schedule construction",
4013
33
      return isl_vec_free(sol));
4014
33
  return sol;
4015
33
}
4016
4017
/* Does the solution "sol" of the LP problem constructed by setup_carry_lp
4018
 * carry any of the "n_edge" groups of dependences?
4019
 * The value in the first position is the sum of (1 - e_i) over all "n_edge"
4020
 * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
4021
 * by the edge are carried by the solution.
4022
 * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
4023
 * one of those is carried.
4024
 *
4025
 * Note that despite the fact that the problem is solved using a rational
4026
 * solver, the solution is guaranteed to be integral.
4027
 * Specifically, the dependence distance lower bounds e_i (and therefore
4028
 * also their sum) are integers.  See Lemma 5 of [1].
4029
 *
4030
 * Any potential denominator of the sum is cleared by this function.
4031
 * The denominator is not relevant for any of the other elements
4032
 * in the solution.
4033
 *
4034
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4035
 *     Problem, Part II: Multi-Dimensional Time.
4036
 *     In Intl. Journal of Parallel Programming, 1992.
4037
 */
4038
static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
4039
33
{
4040
33
  isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
4041
33
  isl_int_set_si(sol->el[0], 1);
4042
33
  return isl_int_cmp_si(sol->el[1], n_edge) < 0;
4043
33
}
4044
4045
/* Return the lexicographically smallest rational point in "lp",
4046
 * assuming that all variables are non-negative and performing some
4047
 * additional sanity checks.
4048
 * In particular, "lp" should not be empty by construction.
4049
 * Double check that this is the case.
4050
 * Also, check that dependences are carried for at least one of
4051
 * the "n_edge" edges.
4052
 *
4053
 * If the computed schedule performs loop coalescing on a given node,
4054
 * i.e., if it is of the form
4055
 *
4056
 *  c_i i + c_j j + ...
4057
 *
4058
 * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
4059
 * to cut out this solution.  Repeat this process until no more loop
4060
 * coalescing occurs or until no more dependences can be carried.
4061
 * In the latter case, revert to the previously computed solution.
4062
 */
4063
static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
4064
  __isl_take isl_basic_set *lp, int n_edge)
4065
29
{
4066
29
  int i, pos;
4067
29
  isl_ctx *ctx;
4068
29
  isl_tab_lexmin *tl;
4069
29
  isl_vec *sol, *prev = NULL;
4070
29
  int treat_coalescing;
4071
29
4072
29
  if (!lp)
4073
0
    return NULL;
4074
29
  ctx = isl_basic_set_get_ctx(lp);
4075
29
  treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
4076
29
  tl = isl_tab_lexmin_from_basic_set(lp);
4077
29
4078
33
  do {
4079
33
    sol = non_empty_solution(tl);
4080
33
    if (!sol)
4081
0
      goto error;
4082
33
4083
33
    
if (33
!carries_dependences(sol, n_edge)33
)
{2
4084
2
      if (!prev)
4085
0
        isl_die(ctx, isl_error_unknown,
4086
2
          "unable to carry dependences",
4087
2
          goto error);
4088
2
      isl_vec_free(sol);
4089
2
      sol = prev;
4090
2
      break;
4091
2
    }
4092
31
    prev = isl_vec_free(prev);
4093
31
    if (!treat_coalescing)
4094
0
      break;
4095
66
    
for (i = 0; 31
i < graph->n66
;
++i35
)
{39
4096
39
      struct isl_sched_node *node = &graph->node[i];
4097
39
4098
39
      pos = find_node_coalescing(node, sol);
4099
39
      if (pos < 0)
4100
0
        goto error;
4101
39
      
if (39
pos < node->nvar39
)
4102
4
        break;
4103
39
    }
4104
31
    
if (31
i < graph->n31
)
{4
4105
4
      prev = sol;
4106
4
      tl = zero_out_node_coef(tl, &graph->node[i], pos);
4107
4
    }
4108
31
  } while (i < graph->n);
4109
29
4110
29
  isl_tab_lexmin_free(tl);
4111
29
4112
29
  return sol;
4113
0
error:
4114
0
  isl_tab_lexmin_free(tl);
4115
0
  isl_vec_free(prev);
4116
0
  isl_vec_free(sol);
4117
0
  return NULL;
4118
29
}
4119
4120
/* Construct a schedule row for each node such that as many validity dependences
4121
 * as possible are carried and then continue with the next band.
4122
 *
4123
 * If there are no validity dependences, then no dependence can be carried and
4124
 * the procedure is guaranteed to fail.  If there is more than one component,
4125
 * then try computing a schedule on each component separately
4126
 * to prevent or at least postpone this failure.
4127
 *
4128
 * If the computed schedule row turns out to be trivial on one or
4129
 * more nodes where it should not be trivial, then we throw it away
4130
 * and try again on each component separately.
4131
 *
4132
 * If there is only one component, then we accept the schedule row anyway,
4133
 * but we do not consider it as a complete row and therefore do not
4134
 * increment graph->n_row.  Note that the ranks of the nodes that
4135
 * do get a non-trivial schedule part will get updated regardless and
4136
 * graph->maxvar is computed based on these ranks.  The test for
4137
 * whether more schedule rows are required in compute_schedule_wcc
4138
 * is therefore not affected.
4139
 *
4140
 * Insert a band corresponding to the schedule row at position "node"
4141
 * of the schedule tree and continue with the construction of the schedule.
4142
 * This insertion and the continued construction is performed by split_scaled
4143
 * after optionally checking for non-trivial common divisors.
4144
 */
4145
static __isl_give isl_schedule_node *carry_dependences(
4146
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4147
30
{
4148
30
  int n_edge;
4149
30
  int trivial;
4150
30
  isl_ctx *ctx;
4151
30
  isl_vec *sol;
4152
30
  isl_basic_set *lp;
4153
30
4154
30
  if (!node)
4155
0
    return NULL;
4156
30
4157
30
  n_edge = count_carry_edges(graph);
4158
30
  if (
n_edge == 0 && 30
graph->scc > 11
)
4159
1
    return compute_component_schedule(node, graph, 1);
4160
30
4161
29
  ctx = isl_schedule_node_get_ctx(node);
4162
29
  if (setup_carry_lp(ctx, graph, n_edge) < 0)
4163
0
    return isl_schedule_node_free(node);
4164
29
4165
29
  lp = isl_basic_set_copy(graph->lp);
4166
29
  sol = non_neg_lexmin(graph, lp, n_edge);
4167
29
  if (!sol)
4168
0
    return isl_schedule_node_free(node);
4169
29
4170
29
  trivial = is_any_trivial(graph, sol);
4171
29
  if (
trivial < 029
)
{0
4172
0
    sol = isl_vec_free(sol);
4173
29
  } else 
if (29
trivial && 29
graph->scc > 14
)
{0
4174
0
    isl_vec_free(sol);
4175
0
    return compute_component_schedule(node, graph, 1);
4176
0
  }
4177
29
4178
29
  
if (29
update_schedule(graph, sol, 0, 0) < 029
)
4179
0
    return isl_schedule_node_free(node);
4180
29
  
if (29
trivial29
)
4181
4
    graph->n_row--;
4182
29
4183
29
  return split_scaled(node, graph);
4184
29
}
4185
4186
/* Topologically sort statements mapped to the same schedule iteration
4187
 * and add insert a sequence node in front of "node"
4188
 * corresponding to this order.
4189
 * If "initialized" is set, then it may be assumed that compute_maxvar
4190
 * has been called on the current band.  Otherwise, call
4191
 * compute_maxvar if and before carry_dependences gets called.
4192
 *
4193
 * If it turns out to be impossible to sort the statements apart,
4194
 * because different dependences impose different orderings
4195
 * on the statements, then we extend the schedule such that
4196
 * it carries at least one more dependence.
4197
 */
4198
static __isl_give isl_schedule_node *sort_statements(
4199
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4200
  int initialized)
4201
147
{
4202
147
  isl_ctx *ctx;
4203
147
  isl_union_set_list *filters;
4204
147
4205
147
  if (!node)
4206
0
    return NULL;
4207
147
4208
147
  ctx = isl_schedule_node_get_ctx(node);
4209
147
  if (graph->n < 1)
4210
0
    isl_die(ctx, isl_error_internal,
4211
147
      "graph should have at least one node",
4212
147
      return isl_schedule_node_free(node));
4213
147
4214
147
  
if (147
graph->n == 1147
)
4215
108
    return node;
4216
147
4217
39
  
if (39
update_edges(ctx, graph) < 039
)
4218
0
    return isl_schedule_node_free(node);
4219
39
4220
39
  
if (39
graph->n_edge == 039
)
4221
0
    return node;
4222
39
4223
39
  
if (39
detect_sccs(ctx, graph) < 039
)
4224
0
    return isl_schedule_node_free(node);
4225
39
4226
39
  next_band(graph);
4227
39
  if (
graph->scc < graph->n39
)
{2
4228
2
    if (
!initialized && 2
compute_maxvar(graph) < 00
)
4229
0
      return isl_schedule_node_free(node);
4230
2
    return carry_dependences(node, graph);
4231
2
  }
4232
39
4233
37
  filters = extract_sccs(ctx, graph);
4234
37
  node = isl_schedule_node_insert_sequence(node, filters);
4235
37
4236
37
  return node;
4237
39
}
4238
4239
/* Are there any (non-empty) (conditional) validity edges in the graph?
4240
 */
4241
static int has_validity_edges(struct isl_sched_graph *graph)
4242
26
{
4243
26
  int i;
4244
26
4245
34
  for (i = 0; 
i < graph->n_edge34
;
++i8
)
{20
4246
20
    int empty;
4247
20
4248
20
    empty = isl_map_plain_is_empty(graph->edge[i].map);
4249
20
    if (empty < 0)
4250
0
      return -1;
4251
20
    
if (20
empty20
)
4252
6
      continue;
4253
14
    
if (14
is_any_validity(&graph->edge[i])14
)
4254
12
      return 1;
4255
14
  }
4256
26
4257
14
  return 0;
4258
26
}
4259
4260
/* Should we apply a Feautrier step?
4261
 * That is, did the user request the Feautrier algorithm and are
4262
 * there any validity dependences (left)?
4263
 */
4264
static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
4265
184
{
4266
184
  if (
ctx->opt->schedule_algorithm != 184
ISL_SCHEDULE_ALGORITHM_FEAUTRIER184
)
4267
158
    return 0;
4268
184
4269
26
  return has_validity_edges(graph);
4270
184
}
4271
4272
/* Compute a schedule for a connected dependence graph using Feautrier's
4273
 * multi-dimensional scheduling algorithm and return the updated schedule node.
4274
 *
4275
 * The original algorithm is described in [1].
4276
 * The main idea is to minimize the number of scheduling dimensions, by
4277
 * trying to satisfy as many dependences as possible per scheduling dimension.
4278
 *
4279
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4280
 *     Problem, Part II: Multi-Dimensional Time.
4281
 *     In Intl. Journal of Parallel Programming, 1992.
4282
 */
4283
static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
4284
  isl_schedule_node *node, struct isl_sched_graph *graph)
4285
12
{
4286
12
  return carry_dependences(node, graph);
4287
12
}
4288
4289
/* Turn off the "local" bit on all (condition) edges.
4290
 */
4291
static void clear_local_edges(struct isl_sched_graph *graph)
4292
219
{
4293
219
  int i;
4294
219
4295
470
  for (i = 0; 
i < graph->n_edge470
;
++i251
)
4296
251
    
if (251
is_condition(&graph->edge[i])251
)
4297
35
      clear_local(&graph->edge[i]);
4298
219
}
4299
4300
/* Does "graph" have both condition and conditional validity edges?
4301
 */
4302
static int need_condition_check(struct isl_sched_graph *graph)
4303
219
{
4304
219
  int i;
4305
219
  int any_condition = 0;
4306
219
  int any_conditional_validity = 0;
4307
219
4308
470
  for (i = 0; 
i < graph->n_edge470
;
++i251
)
{251
4309
251
    if (is_condition(&graph->edge[i]))
4310
35
      any_condition = 1;
4311
251
    if (is_conditional_validity(&graph->edge[i]))
4312
40
      any_conditional_validity = 1;
4313
251
  }
4314
219
4315
20
  return any_condition && any_conditional_validity;
4316
219
}
4317
4318
/* Does "graph" contain any coincidence edge?
4319
 */
4320
static int has_any_coincidence(struct isl_sched_graph *graph)
4321
219
{
4322
219
  int i;
4323
219
4324
333
  for (i = 0; 
i < graph->n_edge333
;
++i114
)
4325
208
    
if (208
is_coincidence(&graph->edge[i])208
)
4326
94
      return 1;
4327
219
4328
125
  return 0;
4329
219
}
4330
4331
/* Extract the final schedule row as a map with the iteration domain
4332
 * of "node" as domain.
4333
 */
4334
static __isl_give isl_map *final_row(struct isl_sched_node *node)
4335
142
{
4336
142
  isl_multi_aff *ma;
4337
142
  int row;
4338
142
4339
142
  row = isl_mat_rows(node->sched) - 1;
4340
142
  ma = node_extract_partial_schedule_multi_aff(node, row, 1);
4341
142
  return isl_map_from_multi_aff(ma);
4342
142
}
4343
4344
/* Is the conditional validity dependence in the edge with index "edge_index"
4345
 * violated by the latest (i.e., final) row of the schedule?
4346
 * That is, is i scheduled after j
4347
 * for any conditional validity dependence i -> j?
4348
 */
4349
static int is_violated(struct isl_sched_graph *graph, int edge_index)
4350
71
{
4351
71
  isl_map *src_sched, *dst_sched, *map;
4352
71
  struct isl_sched_edge *edge = &graph->edge[edge_index];
4353
71
  int empty;
4354
71
4355
71
  src_sched = final_row(edge->src);
4356
71
  dst_sched = final_row(edge->dst);
4357
71
  map = isl_map_copy(edge->map);
4358
71
  map = isl_map_apply_domain(map, src_sched);
4359
71
  map = isl_map_apply_range(map, dst_sched);
4360
71
  map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
4361
71
  empty = isl_map_is_empty(map);
4362
71
  isl_map_free(map);
4363
71
4364
71
  if (empty < 0)
4365
0
    return -1;
4366
71
4367
71
  return !empty;
4368
71
}
4369
4370
/* Does "graph" have any satisfied condition edges that
4371
 * are adjacent to the conditional validity constraint with
4372
 * domain "conditional_source" and range "conditional_sink"?
4373
 *
4374
 * A satisfied condition is one that is not local.
4375
 * If a condition was forced to be local already (i.e., marked as local)
4376
 * then there is no need to check if it is in fact local.
4377
 *
4378
 * Additionally, mark all adjacent condition edges found as local.
4379
 */
4380
static int has_adjacent_true_conditions(struct isl_sched_graph *graph,
4381
  __isl_keep isl_union_set *conditional_source,
4382
  __isl_keep isl_union_set *conditional_sink)
4383
11
{
4384
11
  int i;
4385
11
  int any = 0;
4386
11
4387
60
  for (i = 0; 
i < graph->n_edge60
;
++i49
)
{49
4388
49
    int adjacent, local;
4389
49
    isl_union_map *condition;
4390
49
4391
49
    if (!is_condition(&graph->edge[i]))
4392
29
      continue;
4393
20
    
if (20
is_local(&graph->edge[i])20
)
4394
0
      continue;
4395
20
4396
20
    condition = graph->edge[i].tagged_condition;
4397
20
    adjacent = domain_intersects(condition, conditional_sink);
4398
20
    if (
adjacent >= 0 && 20
!adjacent20
)
4399
8
      adjacent = range_intersects(condition,
4400
8
              conditional_source);
4401
20
    if (adjacent < 0)
4402
0
      return -1;
4403
20
    
if (20
!adjacent20
)
4404
5
      continue;
4405
20
4406
15
    set_local(&graph->edge[i]);
4407
15
4408
15
    local = is_condition_false(&graph->edge[i]);
4409
15
    if (local < 0)
4410
0
      return -1;
4411
15
    
if (15
!local15
)
4412
8
      any = 1;
4413
15
  }
4414
11
4415
11
  return any;
4416
11
}
4417
4418
/* Are there any violated conditional validity dependences with
4419
 * adjacent condition dependences that are not local with respect
4420
 * to the current schedule?
4421
 * That is, is the conditional validity constraint violated?
4422
 *
4423
 * Additionally, mark all those adjacent condition dependences as local.
4424
 * We also mark those adjacent condition dependences that were not marked
4425
 * as local before, but just happened to be local already.  This ensures
4426
 * that they remain local if the schedule is recomputed.
4427
 *
4428
 * We first collect domain and range of all violated conditional validity
4429
 * dependences and then check if there are any adjacent non-local
4430
 * condition dependences.
4431
 */
4432
static int has_violated_conditional_constraint(isl_ctx *ctx,
4433
  struct isl_sched_graph *graph)
4434
31
{
4435
31
  int i;
4436
31
  int any = 0;
4437
31
  isl_union_set *source, *sink;
4438
31
4439
31
  source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4440
31
  sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4441
168
  for (i = 0; 
i < graph->n_edge168
;
++i137
)
{137
4442
137
    isl_union_set *uset;
4443
137
    isl_union_map *umap;
4444
137
    int violated;
4445
137
4446
137
    if (!is_conditional_validity(&graph->edge[i]))
4447
66
      continue;
4448
137
4449
71
    violated = is_violated(graph, i);
4450
71
    if (violated < 0)
4451
0
      goto error;
4452
71
    
if (71
!violated71
)
4453
52
      continue;
4454
71
4455
19
    any = 1;
4456
19
4457
19
    umap = isl_union_map_copy(graph->edge[i].tagged_validity);
4458
19
    uset = isl_union_map_domain(umap);
4459
19
    source = isl_union_set_union(source, uset);
4460
19
    source = isl_union_set_coalesce(source);
4461
19
4462
19
    umap = isl_union_map_copy(graph->edge[i].tagged_validity);
4463
19
    uset = isl_union_map_range(umap);
4464
19
    sink = isl_union_set_union(sink, uset);
4465
19
    sink = isl_union_set_coalesce(sink);
4466
19
  }
4467
31
4468
31
  
if (31
any31
)
4469
11
    any = has_adjacent_true_conditions(graph, source, sink);
4470
31
4471
31
  isl_union_set_free(source);
4472
31
  isl_union_set_free(sink);
4473
31
  return any;
4474
0
error:
4475
0
  isl_union_set_free(source);
4476
0
  isl_union_set_free(sink);
4477
0
  return -1;
4478
31
}
4479
4480
/* Examine the current band (the rows between graph->band_start and
4481
 * graph->n_total_row), deciding whether to drop it or add it to "node"
4482
 * and then continue with the computation of the next band, if any.
4483
 * If "initialized" is set, then it may be assumed that compute_maxvar
4484
 * has been called on the current band.  Otherwise, call
4485
 * compute_maxvar if and before carry_dependences gets called.
4486
 *
4487
 * The caller keeps looking for a new row as long as
4488
 * graph->n_row < graph->maxvar.  If the latest attempt to find
4489
 * such a row failed (i.e., we still have graph->n_row < graph->maxvar),
4490
 * then we either
4491
 * - split between SCCs and start over (assuming we found an interesting
4492
 *  pair of SCCs between which to split)
4493
 * - continue with the next band (assuming the current band has at least
4494
 *  one row)
4495
 * - try to carry as many dependences as possible and continue with the next
4496
 *  band
4497
 * In each case, we first insert a band node in the schedule tree
4498
 * if any rows have been computed.
4499
 *
4500
 * If the caller managed to complete the schedule, we insert a band node
4501
 * (if any schedule rows were computed) and we finish off by topologically
4502
 * sorting the statements based on the remaining dependences.
4503
 */
4504
static __isl_give isl_schedule_node *compute_schedule_finish_band(
4505
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4506
  int initialized)
4507
178
{
4508
178
  int insert;
4509
178
4510
178
  if (!node)
4511
0
    return NULL;
4512
178
4513
178
  
if (178
graph->n_row < graph->maxvar178
)
{31
4514
31
    isl_ctx *ctx;
4515
31
    int empty = graph->n_total_row == graph->band_start;
4516
31
4517
31
    ctx = isl_schedule_node_get_ctx(node);
4518
31
    if (
!ctx->opt->schedule_maximize_band_depth && 31
!empty29
)
4519
12
      return compute_next_band(node, graph, 1);
4520
19
    
if (19
graph->src_scc >= 019
)
4521
2
      return compute_split_schedule(node, graph);
4522
17
    
if (17
!empty17
)
4523
1
      return compute_next_band(node, graph, 1);
4524
16
    
if (16
!initialized && 16
compute_maxvar(graph) < 00
)
4525
0
      return isl_schedule_node_free(node);
4526
16
    return carry_dependences(node, graph);
4527
16
  }
4528
178
4529
147
  insert = graph->n_total_row > graph->band_start;
4530
147
  if (
insert147
)
{117
4531
117
    node = insert_current_band(node, graph, 1);
4532
117
    node = isl_schedule_node_child(node, 0);
4533
117
  }
4534
147
  node = sort_statements(node, graph, initialized);
4535
147
  if (insert)
4536
117
    node = isl_schedule_node_parent(node);
4537
147
4538
147
  return node;
4539
178
}
4540
4541
/* Construct a band of schedule rows for a connected dependence graph.
4542
 * The caller is responsible for determining the strongly connected
4543
 * components and calling compute_maxvar first.
4544
 *
4545
 * We try to find a sequence of as many schedule rows as possible that result
4546
 * in non-negative dependence distances (independent of the previous rows
4547
 * in the sequence, i.e., such that the sequence is tilable), with as
4548
 * many of the initial rows as possible satisfying the coincidence constraints.
4549
 * The computation stops if we can't find any more rows or if we have found
4550
 * all the rows we wanted to find.
4551
 *
4552
 * If ctx->opt->schedule_outer_coincidence is set, then we force the
4553
 * outermost dimension to satisfy the coincidence constraints.  If this
4554
 * turns out to be impossible, we fall back on the general scheme above
4555
 * and try to carry as many dependences as possible.
4556
 *
4557
 * If "graph" contains both condition and conditional validity dependences,
4558
 * then we need to check that that the conditional schedule constraint
4559
 * is satisfied, i.e., there are no violated conditional validity dependences
4560
 * that are adjacent to any non-local condition dependences.
4561
 * If there are, then we mark all those adjacent condition dependences
4562
 * as local and recompute the current band.  Those dependences that
4563
 * are marked local will then be forced to be local.
4564
 * The initial computation is performed with no dependences marked as local.
4565
 * If we are lucky, then there will be no violated conditional validity
4566
 * dependences adjacent to any non-local condition dependences.
4567
 * Otherwise, we mark some additional condition dependences as local and
4568
 * recompute.  We continue this process until there are no violations left or
4569
 * until we are no longer able to compute a schedule.
4570
 * Since there are only a finite number of dependences,
4571
 * there will only be a finite number of iterations.
4572
 */
4573
static isl_stat compute_schedule_wcc_band(isl_ctx *ctx,
4574
  struct isl_sched_graph *graph)
4575
219
{
4576
219
  int has_coincidence;
4577
219
  int use_coincidence;
4578
219
  int force_coincidence = 0;
4579
219
  int check_conditional;
4580
219
4581
219
  if (sort_sccs(graph) < 0)
4582
0
    return isl_stat_error;
4583
219
4584
219
  clear_local_edges(graph);
4585
219
  check_conditional = need_condition_check(graph);
4586
219
  has_coincidence = has_any_coincidence(graph);
4587
219
4588
219
  if (ctx->opt->schedule_outer_coincidence)
4589
14
    force_coincidence = 1;
4590
219
4591
219
  use_coincidence = has_coincidence;
4592
561
  while (
graph->n_row < graph->maxvar561
)
{376
4593
376
    isl_vec *sol;
4594
376
    int violated;
4595
376
    int coincident;
4596
376
4597
376
    graph->src_scc = -1;
4598
376
    graph->dst_scc = -1;
4599
376
4600
376
    if (setup_lp(ctx, graph, use_coincidence) < 0)
4601
0
      return isl_stat_error;
4602
376
    sol = solve_lp(graph);
4603
376
    if (!sol)
4604
0
      return isl_stat_error;
4605
376
    
if (376
sol->size == 0376
)
{103
4606
103
      int empty = graph->n_total_row == graph->band_start;
4607
103
4608
103
      isl_vec_free(sol);
4609
103
      if (
use_coincidence && 103
(!force_coincidence || 78
!empty9
))
{69
4610
69
        use_coincidence = 0;
4611
69
        continue;
4612
69
      }
4613
34
      return isl_stat_ok;
4614
103
    }
4615
273
    
coincident = !has_coincidence || 273
use_coincidence128
;
4616
273
    if (update_schedule(graph, sol, 1, coincident) < 0)
4617
0
      return isl_stat_error;
4618
273
4619
273
    
if (273
!check_conditional273
)
4620
242
      continue;
4621
31
    violated = has_violated_conditional_constraint(ctx, graph);
4622
31
    if (violated < 0)
4623
0
      return isl_stat_error;
4624
31
    
if (31
!violated31
)
4625
25
      continue;
4626
6
    
if (6
reset_band(graph) < 06
)
4627
0
      return isl_stat_error;
4628
6
    use_coincidence = has_coincidence;
4629
6
  }
4630
219
4631
185
  return isl_stat_ok;
4632
219
}
4633
4634
/* Compute a schedule for a connected dependence graph by considering
4635
 * the graph as a whole and return the updated schedule node.
4636
 *
4637
 * The actual schedule rows of the current band are computed by
4638
 * compute_schedule_wcc_band.  compute_schedule_finish_band takes
4639
 * care of integrating the band into "node" and continuing
4640
 * the computation.
4641
 */
4642
static __isl_give isl_schedule_node *compute_schedule_wcc_whole(
4643
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4644
156
{
4645
156
  isl_ctx *ctx;
4646
156
4647
156
  if (!node)
4648
0
    return NULL;
4649
156
4650
156
  ctx = isl_schedule_node_get_ctx(node);
4651
156
  if (compute_schedule_wcc_band(ctx, graph) < 0)
4652
0
    return isl_schedule_node_free(node);
4653
156
4654
156
  return compute_schedule_finish_band(node, graph, 1);
4655
156
}
4656
4657
/* Clustering information used by compute_schedule_wcc_clustering.
4658
 *
4659
 * "n" is the number of SCCs in the original dependence graph
4660
 * "scc" is an array of "n" elements, each representing an SCC
4661
 * of the original dependence graph.  All entries in the same cluster
4662
 * have the same number of schedule rows.
4663
 * "scc_cluster" maps each SCC index to the cluster to which it belongs,
4664
 * where each cluster is represented by the index of the first SCC
4665
 * in the cluster.  Initially, each SCC belongs to a cluster containing
4666
 * only that SCC.
4667
 *
4668
 * "scc_in_merge" is used by merge_clusters_along_edge to keep
4669
 * track of which SCCs need to be merged.
4670
 *
4671
 * "cluster" contains the merged clusters of SCCs after the clustering
4672
 * has completed.
4673
 *
4674
 * "scc_node" is a temporary data structure used inside copy_partial.
4675
 * For each SCC, it keeps track of the number of nodes in the SCC
4676
 * that have already been copied.
4677
 */
4678
struct isl_clustering {
4679
  int n;
4680
  struct isl_sched_graph *scc;
4681
  struct isl_sched_graph *cluster;
4682
  int *scc_cluster;
4683
  int *scc_node;
4684
  int *scc_in_merge;
4685
};
4686
4687
/* Initialize the clustering data structure "c" from "graph".
4688
 *
4689
 * In particular, allocate memory, extract the SCCs from "graph"
4690
 * into c->scc, initialize scc_cluster and construct
4691
 * a band of schedule rows for each SCC.
4692
 * Within each SCC, there is only one SCC by definition.
4693
 * Each SCC initially belongs to a cluster containing only that SCC.
4694
 */
4695
static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
4696
  struct isl_sched_graph *graph)
4697
16
{
4698
16
  int i;
4699
16
4700
16
  c->n = graph->scc;
4701
16
  c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
4702
16
  c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
4703
16
  c->scc_cluster = isl_calloc_array(ctx, int, c->n);
4704
16
  c->scc_node = isl_calloc_array(ctx, int, c->n);
4705
16
  c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
4706
16
  if (
!c->scc || 16
!c->cluster16
||
4707
16
      
!c->scc_cluster16
||
!c->scc_node16
||
!c->scc_in_merge16
)
4708
0
    return isl_stat_error;
4709
16
4710
57
  
for (i = 0; 16
i < c->n57
;
++i41
)
{41
4711
41
    if (extract_sub_graph(ctx, graph, &node_scc_exactly,
4712
41
          &edge_scc_exactly, i, &c->scc[i]) < 0)
4713
0
      return isl_stat_error;
4714
41
    c->scc[i].scc = 1;
4715
41
    if (compute_maxvar(&c->scc[i]) < 0)
4716
0
      return isl_stat_error;
4717
41
    
if (41
compute_schedule_wcc_band(ctx, &c->scc[i]) < 041
)
4718
0
      return isl_stat_error;
4719
41
    c->scc_cluster[i] = i;
4720
41
  }
4721
16
4722
16
  return isl_stat_ok;
4723
16
}
4724
4725
/* Free all memory allocated for "c".
4726
 */
4727
static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
4728
16
{
4729
16
  int i;
4730
16
4731
16
  if (c->scc)
4732
57
    
for (i = 0; 16
i < c->n57
;
++i41
)
4733
41
      graph_free(ctx, &c->scc[i]);
4734
16
  free(c->scc);
4735
16
  if (c->cluster)
4736
57
    
for (i = 0; 16
i < c->n57
;
++i41
)
4737
41
      graph_free(ctx, &c->cluster[i]);
4738
16
  free(c->cluster);
4739
16
  free(c->scc_cluster);
4740
16
  free(c->scc_node);
4741
16
  free(c->scc_in_merge);
4742
16
}
4743
4744
/* Should we refrain from merging the cluster in "graph" with
4745
 * any other cluster?
4746
 * In particular, is its current schedule band empty and incomplete.
4747
 */
4748
static int bad_cluster(struct isl_sched_graph *graph)
4749
254
{
4750
254
  return graph->n_row < graph->maxvar &&
4751
12
    graph->n_total_row == graph->band_start;
4752
254
}
4753
4754
/* Return the index of an edge in "graph" that can be used to merge
4755
 * two clusters in "c".
4756
 * Return graph->n_edge if no such edge can be found.
4757
 * Return -1 on error.
4758
 *
4759
 * In particular, return a proximity edge between two clusters
4760
 * that is not marked "no_merge" and such that neither of the
4761
 * two clusters has an incomplete, empty band.
4762
 *
4763
 * If there are multiple such edges, then try and find the most
4764
 * appropriate edge to use for merging.  In particular, pick the edge
4765
 * with the greatest weight.  If there are multiple of those,
4766
 * then pick one with the shortest distance between
4767
 * the two cluster representatives.
4768
 */
4769
static int find_proximity(struct isl_sched_graph *graph,
4770
  struct isl_clustering *c)
4771
39
{
4772
39
  int i, best = graph->n_edge, best_dist, best_weight;
4773
39
4774
164
  for (i = 0; 
i < graph->n_edge164
;
++i125
)
{125
4775
125
    struct isl_sched_edge *edge = &graph->edge[i];
4776
125
    int dist, weight;
4777
125
4778
125
    if (!is_proximity(edge))
4779
26
      continue;
4780
99
    
if (99
edge->no_merge99
)
4781
6
      continue;
4782
93
    
if (93
bad_cluster(&c->scc[edge->src->scc]) ||93
4783
93
        bad_cluster(&c->scc[edge->dst->scc]))
4784
0
      continue;
4785
93
    dist = c->scc_cluster[edge->dst->scc] -
4786
93
      c->scc_cluster[edge->src->scc];
4787
93
    if (dist == 0)
4788
57
      continue;
4789
36
    weight = edge->weight;
4790
36
    if (
best < graph->n_edge36
)
{13
4791
13
      if (best_weight > weight)
4792
0
        continue;
4793
13
      
if (13
best_weight == weight && 13
best_dist <= dist13
)
4794
11
        continue;
4795
13
    }
4796
25
    best = i;
4797
25
    best_dist = dist;
4798
25
    best_weight = weight;
4799
25
  }
4800
39
4801
39
  return best;
4802
39
}
4803
4804
/* Internal data structure used in mark_merge_sccs.
4805
 *
4806
 * "graph" is the dependence graph in which a strongly connected
4807
 * component is constructed.
4808
 * "scc_cluster" maps each SCC index to the cluster to which it belongs.
4809
 * "src" and "dst" are the indices of the nodes that are being merged.
4810
 */
4811
struct isl_mark_merge_sccs_data {
4812
  struct isl_sched_graph *graph;
4813
  int *scc_cluster;
4814
  int src;
4815
  int dst;
4816
};
4817
4818
/* Check whether the cluster containing node "i" depends on the cluster
4819
 * containing node "j".  If "i" and "j" belong to the same cluster,
4820
 * then they are taken to depend on each other to ensure that
4821
 * the resulting strongly connected component consists of complete
4822
 * clusters.  Furthermore, if "i" and "j" are the two nodes that
4823
 * are being merged, then they are taken to depend on each other as well.
4824
 * Otherwise, check if there is a (conditional) validity dependence
4825
 * from node[j] to node[i], forcing node[i] to follow node[j].
4826
 */
4827
static isl_bool cluster_follows(int i, int j, void *user)
4828
108
{
4829
108
  struct isl_mark_merge_sccs_data *data = user;
4830
108
  struct isl_sched_graph *graph = data->graph;
4831
108
  int *scc_cluster = data->scc_cluster;
4832
108
4833
108
  if (
data->src == i && 108
data->dst == j39
)
4834
23
    return isl_bool_true;
4835
85
  
if (85
data->src == j && 85
data->dst == i35
)
4836
21
    return isl_bool_true;
4837
64
  
if (64
scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc]64
)
4838
16
    return isl_bool_true;
4839
64
4840
48
  return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
4841
64
}
4842
4843
/* Mark all SCCs that belong to either of the two clusters in "c"
4844
 * connected by the edge in "graph" with index "edge", or to any
4845
 * of the intermediate clusters.
4846
 * The marking is recorded in c->scc_in_merge.
4847
 *
4848
 * The given edge has been selected for merging two clusters,
4849
 * meaning that there is at least a proximity edge between the two nodes.
4850
 * However, there may also be (indirect) validity dependences
4851
 * between the two nodes.  When merging the two clusters, all clusters
4852
 * containing one or more of the intermediate nodes along the
4853
 * indirect validity dependences need to be merged in as well.
4854
 *
4855
 * First collect all such nodes by computing the strongly connected
4856
 * component (SCC) containing the two nodes connected by the edge, where
4857
 * the two nodes are considered to depend on each other to make
4858
 * sure they end up in the same SCC.  Similarly, each node is considered
4859
 * to depend on every other node in the same cluster to ensure
4860
 * that the SCC consists of complete clusters.
4861
 *
4862
 * Then the original SCCs that contain any of these nodes are marked
4863
 * in c->scc_in_merge.
4864
 */
4865
static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
4866
  int edge, struct isl_clustering *c)
4867
23
{
4868
23
  struct isl_mark_merge_sccs_data data;
4869
23
  struct isl_tarjan_graph *g;
4870
23
  int i;
4871
23
4872
88
  for (i = 0; 
i < c->n88
;
++i65
)
4873
65
    c->scc_in_merge[i] = 0;
4874
23
4875
23
  data.graph = graph;
4876
23
  data.scc_cluster = c->scc_cluster;
4877
23
  data.src = graph->edge[edge].src - graph->node;
4878
23
  data.dst = graph->edge[edge].dst - graph->node;
4879
23
4880
23
  g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
4881
23
          &cluster_follows, &data);
4882
23
  if (!g)
4883
0
    goto error;
4884
23
4885
23
  i = g->op;
4886
23
  if (i < 3)
4887
0
    isl_die(ctx, isl_error_internal,
4888
23
      "expecting at least two nodes in component",
4889
23
      goto error);
4890
23
  
if (23
g->order[--i] != -123
)
4891
0
    isl_die(ctx, isl_error_internal,
4892
23
      "expecting end of component marker", goto error);
4893
23
4894
78
  
for (--i; 23
i >= 0 && 78
g->order[i] != -160
;
--i55
)
{55
4895
55
    int scc = graph->node[g->order[i]].scc;
4896
55
    c->scc_in_merge[scc] = 1;
4897
55
  }
4898
23
4899
23
  isl_tarjan_graph_free(g);
4900
23
  return isl_stat_ok;
4901
0
error:
4902
0
  isl_tarjan_graph_free(g);
4903
0
  return isl_stat_error;
4904
23
}
4905
4906
/* Construct the identifier "cluster_i".
4907
 */
4908
static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
4909
179
{
4910
179
  char name[40];
4911
179
4912
179
  snprintf(name, sizeof(name), "cluster_%d", i);
4913
179
  return isl_id_alloc(ctx, name, NULL);
4914
179
}
4915
4916
/* Construct the space of the cluster with index "i" containing
4917
 * the strongly connected component "scc".
4918
 *
4919
 * In particular, construct a space called cluster_i with dimension equal
4920
 * to the number of schedule rows in the current band of "scc".
4921
 */
4922
static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
4923
127
{
4924
127
  int nvar;
4925
127
  isl_space *space;
4926
127
  isl_id *id;
4927
127
4928
127
  nvar = scc->n_total_row - scc->band_start;
4929
127
  space = isl_space_copy(scc->node[0].space);
4930
127
  space = isl_space_params(space);
4931
127
  space = isl_space_set_from_params(space);
4932
127
  space = isl_space_add_dims(space, isl_dim_set, nvar);
4933
127
  id = cluster_id(isl_space_get_ctx(space), i);
4934
127
  space = isl_space_set_tuple_id(space, isl_dim_set, id);
4935
127
4936
127
  return space;
4937
127
}
4938
4939
/* Collect the domain of the graph for merging clusters.
4940
 *
4941
 * In particular, for each cluster with first SCC "i", construct
4942
 * a set in the space called cluster_i with dimension equal
4943
 * to the number of schedule rows in the current band of the cluster.
4944
 */
4945
static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
4946
  struct isl_sched_graph *graph, struct isl_clustering *c)
4947
22
{
4948
22
  int i;
4949
22
  isl_space *space;
4950
22
  isl_union_set *domain;
4951
22
4952
22
  space = isl_space_params_alloc(ctx, 0);
4953
22
  domain = isl_union_set_empty(space);
4954
22
4955
84
  for (i = 0; 
i < graph->scc84
;
++i62
)
{62
4956
62
    isl_space *space;
4957
62
4958
62
    if (!c->scc_in_merge[i])
4959
10
      continue;
4960
52
    
if (52
c->scc_cluster[i] != i52
)
4961
8
      continue;
4962
44
    space = cluster_space(&c->scc[i], i);
4963
44
    domain = isl_union_set_add_set(domain, isl_set_universe(space));
4964
44
  }
4965
22
4966
22
  return domain;
4967
22
}
4968
4969
/* Construct a map from the original instances to the corresponding
4970
 * cluster instance in the current bands of the clusters in "c".
4971
 */
4972
static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
4973
  struct isl_sched_graph *graph, struct isl_clustering *c)
4974
22
{
4975
22
  int i, j;
4976
22
  isl_space *space;
4977
22
  isl_union_map *cluster_map;
4978
22
4979
22
  space = isl_space_params_alloc(ctx, 0);
4980
22
  cluster_map = isl_union_map_empty(space);
4981
84
  for (i = 0; 
i < graph->scc84
;
++i62
)
{62
4982
62
    int start, n;
4983
62
    isl_id *id;
4984
62
4985
62
    if (!c->scc_in_merge[i])
4986
10
      continue;
4987
62
4988
52
    id = cluster_id(ctx, c->scc_cluster[i]);
4989
52
    start = c->scc[i].band_start;
4990
52
    n = c->scc[i].n_total_row - start;
4991
104
    for (j = 0; 
j < c->scc[i].n104
;
++j52
)
{52
4992
52
      isl_multi_aff *ma;
4993
52
      isl_map *map;
4994
52
      struct isl_sched_node *node = &c->scc[i].node[j];
4995
52
4996
52
      ma = node_extract_partial_schedule_multi_aff(node,
4997
52
                    start, n);
4998
52
      ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
4999
52
                  isl_id_copy(id));
5000
52
      map = isl_map_from_multi_aff(ma);
5001
52
      cluster_map = isl_union_map_add_map(cluster_map, map);
5002
52
    }
5003
52
    isl_id_free(id);
5004
52
  }
5005
22
5006
22
  return cluster_map;
5007
22
}
5008
5009
/* Add "umap" to the schedule constraints "sc" of all types of "edge"
5010
 * that are not isl_edge_condition or isl_edge_conditional_validity.
5011
 */
5012
static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
5013
  struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
5014
  __isl_take isl_schedule_constraints *sc)
5015
58
{
5016
58
  enum isl_edge_type t;
5017
58
5018
58
  if (!sc)
5019
0
    return NULL;
5020
58
5021
348
  
for (t = isl_edge_first; 58
t <= isl_edge_last348
;
++t290
)
{290
5022
290
    if (t == isl_edge_condition ||
5023
232
        t == isl_edge_conditional_validity)
5024
116
      continue;
5025
174
    
if (174
!is_type(edge, t)174
)
5026
35
      continue;
5027
139
    sc = isl_schedule_constraints_add(sc, t,
5028
139
                isl_union_map_copy(umap));
5029
139
  }
5030
58
5031
58
  return sc;
5032
58
}
5033
5034
/* Add schedule constraints of types isl_edge_condition and
5035
 * isl_edge_conditional_validity to "sc" by applying "umap" to
5036
 * the domains of the wrapped relations in domain and range
5037
 * of the corresponding tagged constraints of "edge".
5038
 */
5039
static __isl_give isl_schedule_constraints *add_conditional_constraints(
5040
  struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
5041
  __isl_take isl_schedule_constraints *sc)
5042
13
{
5043
13
  enum isl_edge_type t;
5044
13
  isl_union_map *tagged;
50