Coverage Report

Created: 2017-11-21 16:49

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/External/isl/isl_scheduler.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2011      INRIA Saclay
3
 * Copyright 2012-2014 Ecole Normale Superieure
4
 * Copyright 2015-2016 Sven Verdoolaege
5
 * Copyright 2016      INRIA Paris
6
 * Copyright 2017      Sven Verdoolaege
7
 *
8
 * Use of this software is governed by the MIT license
9
 *
10
 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
11
 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
12
 * 91893 Orsay, France
13
 * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14
 * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
15
 * CS 42112, 75589 Paris Cedex 12, France
16
 */
17
18
#include <isl_ctx_private.h>
19
#include <isl_map_private.h>
20
#include <isl_space_private.h>
21
#include <isl_aff_private.h>
22
#include <isl/hash.h>
23
#include <isl/constraint.h>
24
#include <isl/schedule.h>
25
#include <isl_schedule_constraints.h>
26
#include <isl/schedule_node.h>
27
#include <isl_mat_private.h>
28
#include <isl_vec_private.h>
29
#include <isl/set.h>
30
#include <isl_union_set_private.h>
31
#include <isl_seq.h>
32
#include <isl_tab.h>
33
#include <isl_dim_map.h>
34
#include <isl/map_to_basic_set.h>
35
#include <isl_sort.h>
36
#include <isl_options_private.h>
37
#include <isl_tarjan.h>
38
#include <isl_morph.h>
39
#include <isl/ilp.h>
40
#include <isl_val_private.h>
41
42
/*
43
 * The scheduling algorithm implemented in this file was inspired by
44
 * Bondhugula et al., "Automatic Transformations for Communication-Minimized
45
 * Parallelization and Locality Optimization in the Polyhedral Model".
46
 */
47
48
49
/* Internal information about a node that is used during the construction
50
 * of a schedule.
51
 * space represents the original space in which the domain lives;
52
 *  that is, the space is not affected by compression
53
 * sched is a matrix representation of the schedule being constructed
54
 *  for this node; if compressed is set, then this schedule is
55
 *  defined over the compressed domain space
56
 * sched_map is an isl_map representation of the same (partial) schedule
57
 *  sched_map may be NULL; if compressed is set, then this map
58
 *  is defined over the uncompressed domain space
59
 * rank is the number of linearly independent rows in the linear part
60
 *  of sched
61
 * the rows of "vmap" represent a change of basis for the node
62
 *  variables; the first rank rows span the linear part of
63
 *  the schedule rows; the remaining rows are linearly independent
64
 * the rows of "indep" represent linear combinations of the schedule
65
 * coefficients that are non-zero when the schedule coefficients are
66
 * linearly independent of previously computed schedule rows.
67
 * start is the first variable in the LP problem in the sequences that
68
 *  represents the schedule coefficients of this node
69
 * nvar is the dimension of the domain
70
 * nparam is the number of parameters or 0 if we are not constructing
71
 *  a parametric schedule
72
 *
73
 * If compressed is set, then hull represents the constraints
74
 * that were used to derive the compression, while compress and
75
 * decompress map the original space to the compressed space and
76
 * vice versa.
77
 *
78
 * scc is the index of SCC (or WCC) this node belongs to
79
 *
80
 * "cluster" is only used inside extract_clusters and identifies
81
 * the cluster of SCCs that the node belongs to.
82
 *
83
 * coincident contains a boolean for each of the rows of the schedule,
84
 * indicating whether the corresponding scheduling dimension satisfies
85
 * the coincidence constraints in the sense that the corresponding
86
 * dependence distances are zero.
87
 *
88
 * If the schedule_treat_coalescing option is set, then
89
 * "sizes" contains the sizes of the (compressed) instance set
90
 * in each direction.  If there is no fixed size in a given direction,
91
 * then the corresponding size value is set to infinity.
92
 * If the schedule_treat_coalescing option or the schedule_max_coefficient
93
 * option is set, then "max" contains the maximal values for
94
 * schedule coefficients of the (compressed) variables.  If no bound
95
 * needs to be imposed on a particular variable, then the corresponding
96
 * value is negative.
97
 * If not NULL, then "bounds" contains a non-parametric set
98
 * in the compressed space that is bounded by the size in each direction.
99
 */
100
struct isl_sched_node {
101
  isl_space *space;
102
  int compressed;
103
  isl_set *hull;
104
  isl_multi_aff *compress;
105
  isl_multi_aff *decompress;
106
  isl_mat *sched;
107
  isl_map *sched_map;
108
  int  rank;
109
  isl_mat *indep;
110
  isl_mat *vmap;
111
  int  start;
112
  int  nvar;
113
  int  nparam;
114
115
  int  scc;
116
  int  cluster;
117
118
  int *coincident;
119
120
  isl_multi_val *sizes;
121
  isl_basic_set *bounds;
122
  isl_vec *max;
123
};
124
125
static int node_has_tuples(const void *entry, const void *val)
126
98
{
127
98
  struct isl_sched_node *node = (struct isl_sched_node *)entry;
128
98
  isl_space *space = (isl_space *) val;
129
98
130
98
  return isl_space_has_equal_tuples(node->space, space);
131
98
}
132
133
static int node_scc_exactly(struct isl_sched_node *node, int scc)
134
64
{
135
64
  return node->scc == scc;
136
64
}
137
138
static int node_scc_at_most(struct isl_sched_node *node, int scc)
139
0
{
140
0
  return node->scc <= scc;
141
0
}
142
143
static int node_scc_at_least(struct isl_sched_node *node, int scc)
144
0
{
145
0
  return node->scc >= scc;
146
0
}
147
148
/* An edge in the dependence graph.  An edge may be used to
149
 * ensure validity of the generated schedule, to minimize the dependence
150
 * distance or both
151
 *
152
 * map is the dependence relation, with i -> j in the map if j depends on i
153
 * tagged_condition and tagged_validity contain the union of all tagged
154
 *  condition or conditional validity dependence relations that
155
 *  specialize the dependence relation "map"; that is,
156
 *  if (i -> a) -> (j -> b) is an element of "tagged_condition"
157
 *  or "tagged_validity", then i -> j is an element of "map".
158
 *  If these fields are NULL, then they represent the empty relation.
159
 * src is the source node
160
 * dst is the sink node
161
 *
162
 * types is a bit vector containing the types of this edge.
163
 * validity is set if the edge is used to ensure correctness
164
 * coincidence is used to enforce zero dependence distances
165
 * proximity is set if the edge is used to minimize dependence distances
166
 * condition is set if the edge represents a condition
167
 *  for a conditional validity schedule constraint
168
 * local can only be set for condition edges and indicates that
169
 *  the dependence distance over the edge should be zero
170
 * conditional_validity is set if the edge is used to conditionally
171
 *  ensure correctness
172
 *
173
 * For validity edges, start and end mark the sequence of inequality
174
 * constraints in the LP problem that encode the validity constraint
175
 * corresponding to this edge.
176
 *
177
 * During clustering, an edge may be marked "no_merge" if it should
178
 * not be used to merge clusters.
179
 * The weight is also only used during clustering and it is
180
 * an indication of how many schedule dimensions on either side
181
 * of the schedule constraints can be aligned.
182
 * If the weight is negative, then this means that this edge was postponed
183
 * by has_bounded_distances or any_no_merge.  The original weight can
184
 * be retrieved by adding 1 + graph->max_weight, with "graph"
185
 * the graph containing this edge.
186
 */
187
struct isl_sched_edge {
188
  isl_map *map;
189
  isl_union_map *tagged_condition;
190
  isl_union_map *tagged_validity;
191
192
  struct isl_sched_node *src;
193
  struct isl_sched_node *dst;
194
195
  unsigned types;
196
197
  int start;
198
  int end;
199
200
  int no_merge;
201
  int weight;
202
};
203
204
/* Is "edge" marked as being of type "type"?
205
 */
206
static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
207
614
{
208
614
  return ISL_FL_ISSET(edge->types, 1 << type);
209
614
}
210
211
/* Mark "edge" as being of type "type".
212
 */
213
static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
214
45
{
215
45
  ISL_FL_SET(edge->types, 1 << type);
216
45
}
217
218
/* No longer mark "edge" as being of type "type"?
219
 */
220
static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
221
0
{
222
0
  ISL_FL_CLR(edge->types, 1 << type);
223
0
}
224
225
/* Is "edge" marked as a validity edge?
226
 */
227
static int is_validity(struct isl_sched_edge *edge)
228
140
{
229
140
  return is_type(edge, isl_edge_validity);
230
140
}
231
232
/* Mark "edge" as a validity edge.
233
 */
234
static void set_validity(struct isl_sched_edge *edge)
235
0
{
236
0
  set_type(edge, isl_edge_validity);
237
0
}
238
239
/* Is "edge" marked as a proximity edge?
240
 */
241
static int is_proximity(struct isl_sched_edge *edge)
242
95
{
243
95
  return is_type(edge, isl_edge_proximity);
244
95
}
245
246
/* Is "edge" marked as a local edge?
247
 */
248
static int is_local(struct isl_sched_edge *edge)
249
128
{
250
128
  return is_type(edge, isl_edge_local);
251
128
}
252
253
/* Mark "edge" as a local edge.
254
 */
255
static void set_local(struct isl_sched_edge *edge)
256
0
{
257
0
  set_type(edge, isl_edge_local);
258
0
}
259
260
/* No longer mark "edge" as a local edge.
261
 */
262
static void clear_local(struct isl_sched_edge *edge)
263
0
{
264
0
  clear_type(edge, isl_edge_local);
265
0
}
266
267
/* Is "edge" marked as a coincidence edge?
268
 */
269
static int is_coincidence(struct isl_sched_edge *edge)
270
98
{
271
98
  return is_type(edge, isl_edge_coincidence);
272
98
}
273
274
/* Is "edge" marked as a condition edge?
275
 */
276
static int is_condition(struct isl_sched_edge *edge)
277
83
{
278
83
  return is_type(edge, isl_edge_condition);
279
83
}
280
281
/* Is "edge" marked as a conditional validity edge?
282
 */
283
static int is_conditional_validity(struct isl_sched_edge *edge)
284
70
{
285
70
  return is_type(edge, isl_edge_conditional_validity);
286
70
}
287
288
/* Internal information about the dependence graph used during
289
 * the construction of the schedule.
290
 *
291
 * intra_hmap is a cache, mapping dependence relations to their dual,
292
 *  for dependences from a node to itself, possibly without
293
 *  coefficients for the parameters
294
 * intra_hmap_param is a cache, mapping dependence relations to their dual,
295
 *  for dependences from a node to itself, including coefficients
296
 *  for the parameters
297
 * inter_hmap is a cache, mapping dependence relations to their dual,
298
 *  for dependences between distinct nodes
299
 * if compression is involved then the key for these maps
300
 * is the original, uncompressed dependence relation, while
301
 * the value is the dual of the compressed dependence relation.
302
 *
303
 * n is the number of nodes
304
 * node is the list of nodes
305
 * maxvar is the maximal number of variables over all nodes
306
 * max_row is the allocated number of rows in the schedule
307
 * n_row is the current (maximal) number of linearly independent
308
 *  rows in the node schedules
309
 * n_total_row is the current number of rows in the node schedules
310
 * band_start is the starting row in the node schedules of the current band
311
 * root is set to the the original dependence graph from which this graph
312
 *  is derived through splitting.  If this graph is not the result of
313
 *  splitting, then the root field points to the graph itself.
314
 *
315
 * sorted contains a list of node indices sorted according to the
316
 *  SCC to which a node belongs
317
 *
318
 * n_edge is the number of edges
319
 * edge is the list of edges
320
 * max_edge contains the maximal number of edges of each type;
321
 *  in particular, it contains the number of edges in the inital graph.
322
 * edge_table contains pointers into the edge array, hashed on the source
323
 *  and sink spaces; there is one such table for each type;
324
 *  a given edge may be referenced from more than one table
325
 *  if the corresponding relation appears in more than one of the
326
 *  sets of dependences; however, for each type there is only
327
 *  a single edge between a given pair of source and sink space
328
 *  in the entire graph
329
 *
330
 * node_table contains pointers into the node array, hashed on the space tuples
331
 *
332
 * region contains a list of variable sequences that should be non-trivial
333
 *
334
 * lp contains the (I)LP problem used to obtain new schedule rows
335
 *
336
 * src_scc and dst_scc are the source and sink SCCs of an edge with
337
 *  conflicting constraints
338
 *
339
 * scc represents the number of components
340
 * weak is set if the components are weakly connected
341
 *
342
 * max_weight is used during clustering and represents the maximal
343
 * weight of the relevant proximity edges.
344
 */
345
struct isl_sched_graph {
346
  isl_map_to_basic_set *intra_hmap;
347
  isl_map_to_basic_set *intra_hmap_param;
348
  isl_map_to_basic_set *inter_hmap;
349
350
  struct isl_sched_node *node;
351
  int n;
352
  int maxvar;
353
  int max_row;
354
  int n_row;
355
356
  int *sorted;
357
358
  int n_total_row;
359
  int band_start;
360
361
  struct isl_sched_graph *root;
362
363
  struct isl_sched_edge *edge;
364
  int n_edge;
365
  int max_edge[isl_edge_last + 1];
366
  struct isl_hash_table *edge_table[isl_edge_last + 1];
367
368
  struct isl_hash_table *node_table;
369
  struct isl_trivial_region *region;
370
371
  isl_basic_set *lp;
372
373
  int src_scc;
374
  int dst_scc;
375
376
  int scc;
377
  int weak;
378
379
  int max_weight;
380
};
381
382
/* Initialize node_table based on the list of nodes.
383
 */
384
static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
385
22
{
386
22
  int i;
387
22
388
22
  graph->node_table = isl_hash_table_alloc(ctx, graph->n);
389
22
  if (!graph->node_table)
390
0
    return -1;
391
22
392
50
  
for (i = 0; 22
i < graph->n;
++i28
) {
393
28
    struct isl_hash_table_entry *entry;
394
28
    uint32_t hash;
395
28
396
28
    hash = isl_space_get_tuple_hash(graph->node[i].space);
397
28
    entry = isl_hash_table_find(ctx, graph->node_table, hash,
398
28
              &node_has_tuples,
399
28
              graph->node[i].space, 1);
400
28
    if (!entry)
401
0
      return -1;
402
28
    entry->data = &graph->node[i];
403
28
  }
404
22
405
22
  return 0;
406
22
}
407
408
/* Return a pointer to the node that lives within the given space,
409
 * or NULL if there is no such node.
410
 */
411
static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
412
  struct isl_sched_graph *graph, __isl_keep isl_space *space)
413
98
{
414
98
  struct isl_hash_table_entry *entry;
415
98
  uint32_t hash;
416
98
417
98
  hash = isl_space_get_tuple_hash(space);
418
98
  entry = isl_hash_table_find(ctx, graph->node_table, hash,
419
98
            &node_has_tuples, space, 0);
420
98
421
98
  return entry ? entry->data : NULL;
422
98
}
423
424
/* Is "node" a node in "graph"?
425
 */
426
static int is_node(struct isl_sched_graph *graph,
427
  struct isl_sched_node *node)
428
0
{
429
0
  return node && node >= &graph->node[0] && node < &graph->node[graph->n];
430
0
}
431
432
static int edge_has_src_and_dst(const void *entry, const void *val)
433
52
{
434
52
  const struct isl_sched_edge *edge = entry;
435
52
  const struct isl_sched_edge *temp = val;
436
52
437
52
  return edge->src == temp->src && edge->dst == temp->dst;
438
52
}
439
440
/* Add the given edge to graph->edge_table[type].
441
 */
442
static isl_stat graph_edge_table_add(isl_ctx *ctx,
443
  struct isl_sched_graph *graph, enum isl_edge_type type,
444
  struct isl_sched_edge *edge)
445
57
{
446
57
  struct isl_hash_table_entry *entry;
447
57
  uint32_t hash;
448
57
449
57
  hash = isl_hash_init();
450
57
  hash = isl_hash_builtin(hash, edge->src);
451
57
  hash = isl_hash_builtin(hash, edge->dst);
452
57
  entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
453
57
            &edge_has_src_and_dst, edge, 1);
454
57
  if (!entry)
455
0
    return isl_stat_error;
456
57
  entry->data = edge;
457
57
458
57
  return isl_stat_ok;
459
57
}
460
461
/* Allocate the edge_tables based on the maximal number of edges of
462
 * each type.
463
 */
464
static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
465
22
{
466
22
  int i;
467
22
468
132
  for (i = 0; i <= isl_edge_last; 
++i110
) {
469
110
    graph->edge_table[i] = isl_hash_table_alloc(ctx,
470
110
                  graph->max_edge[i]);
471
110
    if (!graph->edge_table[i])
472
0
      return -1;
473
110
  }
474
22
475
22
  return 0;
476
22
}
477
478
/* If graph->edge_table[type] contains an edge from the given source
479
 * to the given destination, then return the hash table entry of this edge.
480
 * Otherwise, return NULL.
481
 */
482
static struct isl_hash_table_entry *graph_find_edge_entry(
483
  struct isl_sched_graph *graph,
484
  enum isl_edge_type type,
485
  struct isl_sched_node *src, struct isl_sched_node *dst)
486
151
{
487
151
  isl_ctx *ctx = isl_space_get_ctx(src->space);
488
151
  uint32_t hash;
489
151
  struct isl_sched_edge temp = { .src = src, .dst = dst };
490
151
491
151
  hash = isl_hash_init();
492
151
  hash = isl_hash_builtin(hash, temp.src);
493
151
  hash = isl_hash_builtin(hash, temp.dst);
494
151
  return isl_hash_table_find(ctx, graph->edge_table[type], hash,
495
151
            &edge_has_src_and_dst, &temp, 0);
496
151
}
497
498
499
/* If graph->edge_table[type] contains an edge from the given source
500
 * to the given destination, then return this edge.
501
 * Otherwise, return NULL.
502
 */
503
static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
504
  enum isl_edge_type type,
505
  struct isl_sched_node *src, struct isl_sched_node *dst)
506
141
{
507
141
  struct isl_hash_table_entry *entry;
508
141
509
141
  entry = graph_find_edge_entry(graph, type, src, dst);
510
141
  if (!entry)
511
95
    return NULL;
512
46
513
46
  return entry->data;
514
46
}
515
516
/* Check whether the dependence graph has an edge of the given type
517
 * between the given two nodes.
518
 */
519
static isl_bool graph_has_edge(struct isl_sched_graph *graph,
520
  enum isl_edge_type type,
521
  struct isl_sched_node *src, struct isl_sched_node *dst)
522
16
{
523
16
  struct isl_sched_edge *edge;
524
16
  isl_bool empty;
525
16
526
16
  edge = graph_find_edge(graph, type, src, dst);
527
16
  if (!edge)
528
12
    return 0;
529
4
530
4
  empty = isl_map_plain_is_empty(edge->map);
531
4
  if (empty < 0)
532
0
    return isl_bool_error;
533
4
534
4
  return !empty;
535
4
}
536
537
/* Look for any edge with the same src, dst and map fields as "model".
538
 *
539
 * Return the matching edge if one can be found.
540
 * Return "model" if no matching edge is found.
541
 * Return NULL on error.
542
 */
543
static struct isl_sched_edge *graph_find_matching_edge(
544
  struct isl_sched_graph *graph, struct isl_sched_edge *model)
545
45
{
546
45
  enum isl_edge_type i;
547
45
  struct isl_sched_edge *edge;
548
45
549
120
  for (i = isl_edge_first; i <= isl_edge_last; 
++i75
) {
550
105
    int is_equal;
551
105
552
105
    edge = graph_find_edge(graph, i, model->src, model->dst);
553
105
    if (!edge)
554
75
      continue;
555
30
    is_equal = isl_map_plain_is_equal(model->map, edge->map);
556
30
    if (is_equal < 0)
557
0
      return NULL;
558
30
    if (is_equal)
559
30
      return edge;
560
105
  }
561
45
562
45
  
return model15
;
563
45
}
564
565
/* Remove the given edge from all the edge_tables that refer to it.
566
 */
567
static void graph_remove_edge(struct isl_sched_graph *graph,
568
  struct isl_sched_edge *edge)
569
2
{
570
2
  isl_ctx *ctx = isl_map_get_ctx(edge->map);
571
2
  enum isl_edge_type i;
572
2
573
12
  for (i = isl_edge_first; i <= isl_edge_last; 
++i10
) {
574
10
    struct isl_hash_table_entry *entry;
575
10
576
10
    entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
577
10
    if (!entry)
578
4
      continue;
579
6
    if (entry->data != edge)
580
0
      continue;
581
6
    isl_hash_table_remove(ctx, graph->edge_table[i], entry);
582
6
  }
583
2
}
584
585
/* Check whether the dependence graph has any edge
586
 * between the given two nodes.
587
 */
588
static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
589
  struct isl_sched_node *src, struct isl_sched_node *dst)
590
0
{
591
0
  enum isl_edge_type i;
592
0
  isl_bool r;
593
0
594
0
  for (i = isl_edge_first; i <= isl_edge_last; ++i) {
595
0
    r = graph_has_edge(graph, i, src, dst);
596
0
    if (r < 0 || r)
597
0
      return r;
598
0
  }
599
0
600
0
  return r;
601
0
}
602
603
/* Check whether the dependence graph has a validity edge
604
 * between the given two nodes.
605
 *
606
 * Conditional validity edges are essentially validity edges that
607
 * can be ignored if the corresponding condition edges are iteration private.
608
 * Here, we are only checking for the presence of validity
609
 * edges, so we need to consider the conditional validity edges too.
610
 * In particular, this function is used during the detection
611
 * of strongly connected components and we cannot ignore
612
 * conditional validity edges during this detection.
613
 */
614
static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
615
  struct isl_sched_node *src, struct isl_sched_node *dst)
616
10
{
617
10
  isl_bool r;
618
10
619
10
  r = graph_has_edge(graph, isl_edge_validity, src, dst);
620
10
  if (r < 0 || r)
621
4
    return r;
622
6
623
6
  return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
624
6
}
625
626
static int graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
627
  int n_node, int n_edge)
628
22
{
629
22
  int i;
630
22
631
22
  graph->n = n_node;
632
22
  graph->n_edge = n_edge;
633
22
  graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
634
22
  graph->sorted = isl_calloc_array(ctx, int, graph->n);
635
22
  graph->region = isl_alloc_array(ctx,
636
22
          struct isl_trivial_region, graph->n);
637
22
  graph->edge = isl_calloc_array(ctx,
638
22
          struct isl_sched_edge, graph->n_edge);
639
22
640
22
  graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
641
22
  graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
642
22
  graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
643
22
644
22
  if (!graph->node || !graph->region || (graph->n_edge && 
!graph->edge13
) ||
645
22
      !graph->sorted)
646
0
    return -1;
647
22
648
50
  
for(i = 0; 22
i < graph->n;
++i28
)
649
28
    graph->sorted[i] = i;
650
22
651
22
  return 0;
652
22
}
653
654
static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
655
22
{
656
22
  int i;
657
22
658
22
  isl_map_to_basic_set_free(graph->intra_hmap);
659
22
  isl_map_to_basic_set_free(graph->intra_hmap_param);
660
22
  isl_map_to_basic_set_free(graph->inter_hmap);
661
22
662
22
  if (graph->node)
663
50
    
for (i = 0; 22
i < graph->n;
++i28
) {
664
28
      isl_space_free(graph->node[i].space);
665
28
      isl_set_free(graph->node[i].hull);
666
28
      isl_multi_aff_free(graph->node[i].compress);
667
28
      isl_multi_aff_free(graph->node[i].decompress);
668
28
      isl_mat_free(graph->node[i].sched);
669
28
      isl_map_free(graph->node[i].sched_map);
670
28
      isl_mat_free(graph->node[i].indep);
671
28
      isl_mat_free(graph->node[i].vmap);
672
28
      if (graph->root == graph)
673
18
        free(graph->node[i].coincident);
674
22
      isl_multi_val_free(graph->node[i].sizes);
675
22
      isl_basic_set_free(graph->node[i].bounds);
676
22
      isl_vec_free(graph->node[i].max);
677
22
    }
678
22
  free(graph->node);
679
22
  free(graph->sorted);
680
22
  if (graph->edge)
681
41
    
for (i = 0; 22
i < graph->n_edge;
++i19
) {
682
19
      isl_map_free(graph->edge[i].map);
683
19
      isl_union_map_free(graph->edge[i].tagged_condition);
684
19
      isl_union_map_free(graph->edge[i].tagged_validity);
685
19
    }
686
22
  free(graph->edge);
687
22
  free(graph->region);
688
132
  for (i = 0; i <= isl_edge_last; 
++i110
)
689
110
    isl_hash_table_free(ctx, graph->edge_table[i]);
690
22
  isl_hash_table_free(ctx, graph->node_table);
691
22
  isl_basic_set_free(graph->lp);
692
22
}
693
694
/* For each "set" on which this function is called, increment
695
 * graph->n by one and update graph->maxvar.
696
 */
697
static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
698
18
{
699
18
  struct isl_sched_graph *graph = user;
700
18
  int nvar = isl_set_dim(set, isl_dim_set);
701
18
702
18
  graph->n++;
703
18
  if (nvar > graph->maxvar)
704
12
    graph->maxvar = nvar;
705
18
706
18
  isl_set_free(set);
707
18
708
18
  return isl_stat_ok;
709
18
}
710
711
/* Compute the number of rows that should be allocated for the schedule.
712
 * In particular, we need one row for each variable or one row
713
 * for each basic map in the dependences.
714
 * Note that it is practically impossible to exhaust both
715
 * the number of dependences and the number of variables.
716
 */
717
static isl_stat compute_max_row(struct isl_sched_graph *graph,
718
  __isl_keep isl_schedule_constraints *sc)
719
12
{
720
12
  int n_edge;
721
12
  isl_stat r;
722
12
  isl_union_set *domain;
723
12
724
12
  graph->n = 0;
725
12
  graph->maxvar = 0;
726
12
  domain = isl_schedule_constraints_get_domain(sc);
727
12
  r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
728
12
  isl_union_set_free(domain);
729
12
  if (r < 0)
730
0
    return isl_stat_error;
731
12
  n_edge = isl_schedule_constraints_n_basic_map(sc);
732
12
  if (n_edge < 0)
733
0
    return isl_stat_error;
734
12
  graph->max_row = n_edge + graph->maxvar;
735
12
736
12
  return isl_stat_ok;
737
12
}
738
739
/* Does "bset" have any defining equalities for its set variables?
740
 */
741
static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
742
18
{
743
18
  int i, n;
744
18
745
18
  if (!bset)
746
0
    return isl_bool_error;
747
18
748
18
  n = isl_basic_set_dim(bset, isl_dim_set);
749
60
  for (i = 0; i < n; 
++i42
) {
750
42
    isl_bool has;
751
42
752
42
    has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
753
42
              NULL);
754
42
    if (has < 0 || has)
755
0
      return has;
756
42
  }
757
18
758
18
  return isl_bool_false;
759
18
}
760
761
/* Set the entries of node->max to the value of the schedule_max_coefficient
762
 * option, if set.
763
 */
764
static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
765
0
{
766
0
  int max;
767
0
768
0
  max = isl_options_get_schedule_max_coefficient(ctx);
769
0
  if (max == -1)
770
0
    return isl_stat_ok;
771
0
772
0
  node->max = isl_vec_alloc(ctx, node->nvar);
773
0
  node->max = isl_vec_set_si(node->max, max);
774
0
  if (!node->max)
775
0
    return isl_stat_error;
776
0
777
0
  return isl_stat_ok;
778
0
}
779
780
/* Set the entries of node->max to the minimum of the schedule_max_coefficient
781
 * option (if set) and half of the minimum of the sizes in the other
782
 * dimensions.  Round up when computing the half such that
783
 * if the minimum of the sizes is one, half of the size is taken to be one
784
 * rather than zero.
785
 * If the global minimum is unbounded (i.e., if both
786
 * the schedule_max_coefficient is not set and the sizes in the other
787
 * dimensions are unbounded), then store a negative value.
788
 * If the schedule coefficient is close to the size of the instance set
789
 * in another dimension, then the schedule may represent a loop
790
 * coalescing transformation (especially if the coefficient
791
 * in that other dimension is one).  Forcing the coefficient to be
792
 * smaller than or equal to half the minimal size should avoid this
793
 * situation.
794
 */
795
static isl_stat compute_max_coefficient(isl_ctx *ctx,
796
  struct isl_sched_node *node)
797
18
{
798
18
  int max;
799
18
  int i, j;
800
18
  isl_vec *v;
801
18
802
18
  max = isl_options_get_schedule_max_coefficient(ctx);
803
18
  v = isl_vec_alloc(ctx, node->nvar);
804
18
  if (!v)
805
0
    return isl_stat_error;
806
18
807
60
  
for (i = 0; 18
i < node->nvar;
++i42
) {
808
42
    isl_int_set_si(v->el[i], max);
809
42
    isl_int_mul_si(v->el[i], v->el[i], 2);
810
42
  }
811
18
812
60
  for (i = 0; i < node->nvar; 
++i42
) {
813
42
    isl_val *size;
814
42
815
42
    size = isl_multi_val_get_val(node->sizes, i);
816
42
    if (!size)
817
0
      goto error;
818
42
    if (!isl_val_is_int(size)) {
819
4
      isl_val_free(size);
820
4
      continue;
821
4
    }
822
133
    
for (j = 0; 38
j < node->nvar;
++j95
) {
823
95
      if (j == i)
824
38
        continue;
825
57
      if (isl_int_is_neg(v->el[j]) ||
826
57
          isl_int_gt(v->el[j], size->n))
827
57
        
isl_int_set0
(v->el[j], size->n);
828
95
    }
829
42
    isl_val_free(size);
830
42
  }
831
18
832
60
  
for (i = 0; 18
i < node->nvar;
++i42
)
833
42
    isl_int_cdiv_q_ui(v->el[i], v->el[i], 2);
834
18
835
18
  node->max = v;
836
18
  return isl_stat_ok;
837
0
error:
838
0
  isl_vec_free(v);
839
0
  return isl_stat_error;
840
18
}
841
842
/* Compute and return the size of "set" in dimension "dim".
843
 * The size is taken to be the difference in values for that variable
844
 * for fixed values of the other variables.
845
 * In particular, the variable is first isolated from the other variables
846
 * in the range of a map
847
 *
848
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
849
 *
850
 * and then duplicated
851
 *
852
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
853
 *
854
 * The shared variables are then projected out and the maximal value
855
 * of i_dim' - i_dim is computed.
856
 */
857
static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
858
42
{
859
42
  isl_map *map;
860
42
  isl_local_space *ls;
861
42
  isl_aff *obj;
862
42
  isl_val *v;
863
42
864
42
  map = isl_set_project_onto_map(set, isl_dim_set, dim, 1);
865
42
  map = isl_map_project_out(map, isl_dim_in, dim, 1);
866
42
  map = isl_map_range_product(map, isl_map_copy(map));
867
42
  map = isl_set_unwrap(isl_map_range(map));
868
42
  set = isl_map_deltas(map);
869
42
  ls = isl_local_space_from_space(isl_set_get_space(set));
870
42
  obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
871
42
  v = isl_set_max_val(set, obj);
872
42
  isl_aff_free(obj);
873
42
  isl_set_free(set);
874
42
875
42
  return v;
876
42
}
877
878
/* Compute the size of the instance set "set" of "node", after compression,
879
 * as well as bounds on the corresponding coefficients, if needed.
880
 *
881
 * The sizes are needed when the schedule_treat_coalescing option is set.
882
 * The bounds are needed when the schedule_treat_coalescing option or
883
 * the schedule_max_coefficient option is set.
884
 *
885
 * If the schedule_treat_coalescing option is not set, then at most
886
 * the bounds need to be set and this is done in set_max_coefficient.
887
 * Otherwise, compress the domain if needed, compute the size
888
 * in each direction and store the results in node->size.
889
 * Finally, set the bounds on the coefficients based on the sizes
890
 * and the schedule_max_coefficient option in compute_max_coefficient.
891
 */
892
static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
893
  __isl_take isl_set *set)
894
18
{
895
18
  int j, n;
896
18
  isl_multi_val *mv;
897
18
898
18
  if (!isl_options_get_schedule_treat_coalescing(ctx)) {
899
0
    isl_set_free(set);
900
0
    return set_max_coefficient(ctx, node);
901
0
  }
902
18
903
18
  if (node->compressed)
904
0
    set = isl_set_preimage_multi_aff(set,
905
0
          isl_multi_aff_copy(node->decompress));
906
18
  mv = isl_multi_val_zero(isl_set_get_space(set));
907
18
  n = isl_set_dim(set, isl_dim_set);
908
60
  for (j = 0; j < n; 
++j42
) {
909
42
    isl_val *v;
910
42
911
42
    v = compute_size(isl_set_copy(set), j);
912
42
    mv = isl_multi_val_set_val(mv, j, v);
913
42
  }
914
18
  node->sizes = mv;
915
18
  isl_set_free(set);
916
18
  if (!node->sizes)
917
0
    return isl_stat_error;
918
18
  return compute_max_coefficient(ctx, node);
919
18
}
920
921
/* Add a new node to the graph representing the given instance set.
922
 * "nvar" is the (possibly compressed) number of variables and
923
 * may be smaller than then number of set variables in "set"
924
 * if "compressed" is set.
925
 * If "compressed" is set, then "hull" represents the constraints
926
 * that were used to derive the compression, while "compress" and
927
 * "decompress" map the original space to the compressed space and
928
 * vice versa.
929
 * If "compressed" is not set, then "hull", "compress" and "decompress"
930
 * should be NULL.
931
 *
932
 * Compute the size of the instance set and bounds on the coefficients,
933
 * if needed.
934
 */
935
static isl_stat add_node(struct isl_sched_graph *graph,
936
  __isl_take isl_set *set, int nvar, int compressed,
937
  __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
938
  __isl_take isl_multi_aff *decompress)
939
18
{
940
18
  int nparam;
941
18
  isl_ctx *ctx;
942
18
  isl_mat *sched;
943
18
  isl_space *space;
944
18
  int *coincident;
945
18
  struct isl_sched_node *node;
946
18
947
18
  if (!set)
948
0
    return isl_stat_error;
949
18
950
18
  ctx = isl_set_get_ctx(set);
951
18
  nparam = isl_set_dim(set, isl_dim_param);
952
18
  if (!ctx->opt->schedule_parametric)
953
0
    nparam = 0;
954
18
  sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
955
18
  node = &graph->node[graph->n];
956
18
  graph->n++;
957
18
  space = isl_set_get_space(set);
958
18
  node->space = space;
959
18
  node->nvar = nvar;
960
18
  node->nparam = nparam;
961
18
  node->sched = sched;
962
18
  node->sched_map = NULL;
963
18
  coincident = isl_calloc_array(ctx, int, graph->max_row);
964
18
  node->coincident = coincident;
965
18
  node->compressed = compressed;
966
18
  node->hull = hull;
967
18
  node->compress = compress;
968
18
  node->decompress = decompress;
969
18
  if (compute_sizes_and_max(ctx, node, set) < 0)
970
0
    return isl_stat_error;
971
18
972
18
  if (!space || !sched || (graph->max_row && !coincident))
973
0
    return isl_stat_error;
974
18
  if (compressed && 
(0
!hull0
||
!compress0
||
!decompress0
))
975
0
    return isl_stat_error;
976
18
977
18
  return isl_stat_ok;
978
18
}
979
980
/* Construct an identifier for node "node", which will represent "set".
981
 * The name of the identifier is either "compressed" or
982
 * "compressed_<name>", with <name> the name of the space of "set".
983
 * The user pointer of the identifier points to "node".
984
 */
985
static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
986
  struct isl_sched_node *node)
987
0
{
988
0
  isl_bool has_name;
989
0
  isl_ctx *ctx;
990
0
  isl_id *id;
991
0
  isl_printer *p;
992
0
  const char *name;
993
0
  char *id_name;
994
0
995
0
  has_name = isl_set_has_tuple_name(set);
996
0
  if (has_name < 0)
997
0
    return NULL;
998
0
999
0
  ctx = isl_set_get_ctx(set);
1000
0
  if (!has_name)
1001
0
    return isl_id_alloc(ctx, "compressed", node);
1002
0
1003
0
  p = isl_printer_to_str(ctx);
1004
0
  name = isl_set_get_tuple_name(set);
1005
0
  p = isl_printer_print_str(p, "compressed_");
1006
0
  p = isl_printer_print_str(p, name);
1007
0
  id_name = isl_printer_get_str(p);
1008
0
  isl_printer_free(p);
1009
0
1010
0
  id = isl_id_alloc(ctx, id_name, node);
1011
0
  free(id_name);
1012
0
1013
0
  return id;
1014
0
}
1015
1016
/* Add a new node to the graph representing the given set.
1017
 *
1018
 * If any of the set variables is defined by an equality, then
1019
 * we perform variable compression such that we can perform
1020
 * the scheduling on the compressed domain.
1021
 * In this case, an identifier is used that references the new node
1022
 * such that each compressed space is unique and
1023
 * such that the node can be recovered from the compressed space.
1024
 */
1025
static isl_stat extract_node(__isl_take isl_set *set, void *user)
1026
18
{
1027
18
  int nvar;
1028
18
  isl_bool has_equality;
1029
18
  isl_id *id;
1030
18
  isl_basic_set *hull;
1031
18
  isl_set *hull_set;
1032
18
  isl_morph *morph;
1033
18
  isl_multi_aff *compress, *decompress;
1034
18
  struct isl_sched_graph *graph = user;
1035
18
1036
18
  hull = isl_set_affine_hull(isl_set_copy(set));
1037
18
  hull = isl_basic_set_remove_divs(hull);
1038
18
  nvar = isl_set_dim(set, isl_dim_set);
1039
18
  has_equality = has_any_defining_equality(hull);
1040
18
1041
18
  if (has_equality < 0)
1042
0
    goto error;
1043
18
  if (!has_equality) {
1044
18
    isl_basic_set_free(hull);
1045
18
    return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
1046
18
  }
1047
0
1048
0
  id = construct_compressed_id(set, &graph->node[graph->n]);
1049
0
  morph = isl_basic_set_variable_compression_with_id(hull,
1050
0
                  isl_dim_set, id);
1051
0
  isl_id_free(id);
1052
0
  nvar = isl_morph_ran_dim(morph, isl_dim_set);
1053
0
  compress = isl_morph_get_var_multi_aff(morph);
1054
0
  morph = isl_morph_inverse(morph);
1055
0
  decompress = isl_morph_get_var_multi_aff(morph);
1056
0
  isl_morph_free(morph);
1057
0
1058
0
  hull_set = isl_set_from_basic_set(hull);
1059
0
  return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
1060
0
error:
1061
0
  isl_basic_set_free(hull);
1062
0
  isl_set_free(set);
1063
0
  return isl_stat_error;
1064
18
}
1065
1066
struct isl_extract_edge_data {
1067
  enum isl_edge_type type;
1068
  struct isl_sched_graph *graph;
1069
};
1070
1071
/* Merge edge2 into edge1, freeing the contents of edge2.
1072
 * Return 0 on success and -1 on failure.
1073
 *
1074
 * edge1 and edge2 are assumed to have the same value for the map field.
1075
 */
1076
static int merge_edge(struct isl_sched_edge *edge1,
1077
  struct isl_sched_edge *edge2)
1078
30
{
1079
30
  edge1->types |= edge2->types;
1080
30
  isl_map_free(edge2->map);
1081
30
1082
30
  if (is_condition(edge2)) {
1083
0
    if (!edge1->tagged_condition)
1084
0
      edge1->tagged_condition = edge2->tagged_condition;
1085
0
    else
1086
0
      edge1->tagged_condition =
1087
0
        isl_union_map_union(edge1->tagged_condition,
1088
0
                edge2->tagged_condition);
1089
0
  }
1090
30
1091
30
  if (is_conditional_validity(edge2)) {
1092
0
    if (!edge1->tagged_validity)
1093
0
      edge1->tagged_validity = edge2->tagged_validity;
1094
0
    else
1095
0
      edge1->tagged_validity =
1096
0
        isl_union_map_union(edge1->tagged_validity,
1097
0
                edge2->tagged_validity);
1098
0
  }
1099
30
1100
30
  if (is_condition(edge2) && 
!edge1->tagged_condition0
)
1101
0
    return -1;
1102
30
  if (is_conditional_validity(edge2) && 
!edge1->tagged_validity0
)
1103
0
    return -1;
1104
30
1105
30
  return 0;
1106
30
}
1107
1108
/* Insert dummy tags in domain and range of "map".
1109
 *
1110
 * In particular, if "map" is of the form
1111
 *
1112
 *  A -> B
1113
 *
1114
 * then return
1115
 *
1116
 *  [A -> dummy_tag] -> [B -> dummy_tag]
1117
 *
1118
 * where the dummy_tags are identical and equal to any dummy tags
1119
 * introduced by any other call to this function.
1120
 */
1121
static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1122
0
{
1123
0
  static char dummy;
1124
0
  isl_ctx *ctx;
1125
0
  isl_id *id;
1126
0
  isl_space *space;
1127
0
  isl_set *domain, *range;
1128
0
1129
0
  ctx = isl_map_get_ctx(map);
1130
0
1131
0
  id = isl_id_alloc(ctx, NULL, &dummy);
1132
0
  space = isl_space_params(isl_map_get_space(map));
1133
0
  space = isl_space_set_from_params(space);
1134
0
  space = isl_space_set_tuple_id(space, isl_dim_set, id);
1135
0
  space = isl_space_map_from_set(space);
1136
0
1137
0
  domain = isl_map_wrap(map);
1138
0
  range = isl_map_wrap(isl_map_universe(space));
1139
0
  map = isl_map_from_domain_and_range(domain, range);
1140
0
  map = isl_map_zip(map);
1141
0
1142
0
  return map;
1143
0
}
1144
1145
/* Given that at least one of "src" or "dst" is compressed, return
1146
 * a map between the spaces of these nodes restricted to the affine
1147
 * hull that was used in the compression.
1148
 */
1149
static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1150
  struct isl_sched_node *dst)
1151
0
{
1152
0
  isl_set *dom, *ran;
1153
0
1154
0
  if (src->compressed)
1155
0
    dom = isl_set_copy(src->hull);
1156
0
  else
1157
0
    dom = isl_set_universe(isl_space_copy(src->space));
1158
0
  if (dst->compressed)
1159
0
    ran = isl_set_copy(dst->hull);
1160
0
  else
1161
0
    ran = isl_set_universe(isl_space_copy(dst->space));
1162
0
1163
0
  return isl_map_from_domain_and_range(dom, ran);
1164
0
}
1165
1166
/* Intersect the domains of the nested relations in domain and range
1167
 * of "tagged" with "map".
1168
 */
1169
static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1170
  __isl_keep isl_map *map)
1171
0
{
1172
0
  isl_set *set;
1173
0
1174
0
  tagged = isl_map_zip(tagged);
1175
0
  set = isl_map_wrap(isl_map_copy(map));
1176
0
  tagged = isl_map_intersect_domain(tagged, set);
1177
0
  tagged = isl_map_zip(tagged);
1178
0
  return tagged;
1179
0
}
1180
1181
/* Return a pointer to the node that lives in the domain space of "map"
1182
 * or NULL if there is no such node.
1183
 */
1184
static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1185
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1186
45
{
1187
45
  struct isl_sched_node *node;
1188
45
  isl_space *space;
1189
45
1190
45
  space = isl_space_domain(isl_map_get_space(map));
1191
45
  node = graph_find_node(ctx, graph, space);
1192
45
  isl_space_free(space);
1193
45
1194
45
  return node;
1195
45
}
1196
1197
/* Return a pointer to the node that lives in the range space of "map"
1198
 * or NULL if there is no such node.
1199
 */
1200
static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1201
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1202
45
{
1203
45
  struct isl_sched_node *node;
1204
45
  isl_space *space;
1205
45
1206
45
  space = isl_space_range(isl_map_get_space(map));
1207
45
  node = graph_find_node(ctx, graph, space);
1208
45
  isl_space_free(space);
1209
45
1210
45
  return node;
1211
45
}
1212
1213
/* Add a new edge to the graph based on the given map
1214
 * and add it to data->graph->edge_table[data->type].
1215
 * If a dependence relation of a given type happens to be identical
1216
 * to one of the dependence relations of a type that was added before,
1217
 * then we don't create a new edge, but instead mark the original edge
1218
 * as also representing a dependence of the current type.
1219
 *
1220
 * Edges of type isl_edge_condition or isl_edge_conditional_validity
1221
 * may be specified as "tagged" dependence relations.  That is, "map"
1222
 * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1223
 * the dependence on iterations and a and b are tags.
1224
 * edge->map is set to the relation containing the elements i -> j,
1225
 * while edge->tagged_condition and edge->tagged_validity contain
1226
 * the union of all the "map" relations
1227
 * for which extract_edge is called that result in the same edge->map.
1228
 *
1229
 * If the source or the destination node is compressed, then
1230
 * intersect both "map" and "tagged" with the constraints that
1231
 * were used to construct the compression.
1232
 * This ensures that there are no schedule constraints defined
1233
 * outside of these domains, while the scheduler no longer has
1234
 * any control over those outside parts.
1235
 */
1236
static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1237
45
{
1238
45
  isl_ctx *ctx = isl_map_get_ctx(map);
1239
45
  struct isl_extract_edge_data *data = user;
1240
45
  struct isl_sched_graph *graph = data->graph;
1241
45
  struct isl_sched_node *src, *dst;
1242
45
  struct isl_sched_edge *edge;
1243
45
  isl_map *tagged = NULL;
1244
45
1245
45
  if (data->type == isl_edge_condition ||
1246
45
      data->type == isl_edge_conditional_validity) {
1247
0
    if (isl_map_can_zip(map)) {
1248
0
      tagged = isl_map_copy(map);
1249
0
      map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1250
0
    } else {
1251
0
      tagged = insert_dummy_tags(isl_map_copy(map));
1252
0
    }
1253
0
  }
1254
45
1255
45
  src = find_domain_node(ctx, graph, map);
1256
45
  dst = find_range_node(ctx, graph, map);
1257
45
1258
45
  if (!src || !dst) {
1259
0
    isl_map_free(map);
1260
0
    isl_map_free(tagged);
1261
0
    return isl_stat_ok;
1262
0
  }
1263
45
1264
45
  if (src->compressed || dst->compressed) {
1265
0
    isl_map *hull;
1266
0
    hull = extract_hull(src, dst);
1267
0
    if (tagged)
1268
0
      tagged = map_intersect_domains(tagged, hull);
1269
0
    map = isl_map_intersect(map, hull);
1270
0
  }
1271
45
1272
45
  graph->edge[graph->n_edge].src = src;
1273
45
  graph->edge[graph->n_edge].dst = dst;
1274
45
  graph->edge[graph->n_edge].map = map;
1275
45
  graph->edge[graph->n_edge].types = 0;
1276
45
  graph->edge[graph->n_edge].tagged_condition = NULL;
1277
45
  graph->edge[graph->n_edge].tagged_validity = NULL;
1278
45
  set_type(&graph->edge[graph->n_edge], data->type);
1279
45
  if (data->type == isl_edge_condition)
1280
0
    graph->edge[graph->n_edge].tagged_condition =
1281
0
          isl_union_map_from_map(tagged);
1282
45
  if (data->type == isl_edge_conditional_validity)
1283
0
    graph->edge[graph->n_edge].tagged_validity =
1284
0
          isl_union_map_from_map(tagged);
1285
45
1286
45
  edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1287
45
  if (!edge) {
1288
0
    graph->n_edge++;
1289
0
    return isl_stat_error;
1290
0
  }
1291
45
  if (edge == &graph->edge[graph->n_edge])
1292
15
    return graph_edge_table_add(ctx, graph, data->type,
1293
15
            &graph->edge[graph->n_edge++]);
1294
30
1295
30
  if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0)
1296
0
    return -1;
1297
30
1298
30
  return graph_edge_table_add(ctx, graph, data->type, edge);
1299
30
}
1300
1301
/* Initialize the schedule graph "graph" from the schedule constraints "sc".
1302
 *
1303
 * The context is included in the domain before the nodes of
1304
 * the graphs are extracted in order to be able to exploit
1305
 * any possible additional equalities.
1306
 * Note that this intersection is only performed locally here.
1307
 */
1308
static isl_stat graph_init(struct isl_sched_graph *graph,
1309
  __isl_keep isl_schedule_constraints *sc)
1310
12
{
1311
12
  isl_ctx *ctx;
1312
12
  isl_union_set *domain;
1313
12
  isl_union_map *c;
1314
12
  struct isl_extract_edge_data data;
1315
12
  enum isl_edge_type i;
1316
12
  isl_stat r;
1317
12
1318
12
  if (!sc)
1319
0
    return isl_stat_error;
1320
12
1321
12
  ctx = isl_schedule_constraints_get_ctx(sc);
1322
12
1323
12
  domain = isl_schedule_constraints_get_domain(sc);
1324
12
  graph->n = isl_union_set_n_set(domain);
1325
12
  isl_union_set_free(domain);
1326
12
1327
12
  if (graph_alloc(ctx, graph, graph->n,
1328
12
      isl_schedule_constraints_n_map(sc)) < 0)
1329
0
    return isl_stat_error;
1330
12
1331
12
  if (compute_max_row(graph, sc) < 0)
1332
0
    return isl_stat_error;
1333
12
  graph->root = graph;
1334
12
  graph->n = 0;
1335
12
  domain = isl_schedule_constraints_get_domain(sc);
1336
12
  domain = isl_union_set_intersect_params(domain,
1337
12
            isl_schedule_constraints_get_context(sc));
1338
12
  r = isl_union_set_foreach_set(domain, &extract_node, graph);
1339
12
  isl_union_set_free(domain);
1340
12
  if (r < 0)
1341
0
    return isl_stat_error;
1342
12
  if (graph_init_table(ctx, graph) < 0)
1343
0
    return isl_stat_error;
1344
72
  
for (i = isl_edge_first; 12
i <= isl_edge_last;
++i60
) {
1345
60
    c = isl_schedule_constraints_get(sc, i);
1346
60
    graph->max_edge[i] = isl_union_map_n_map(c);
1347
60
    isl_union_map_free(c);
1348
60
    if (!c)
1349
0
      return isl_stat_error;
1350
60
  }
1351
12
  if (graph_init_edge_tables(ctx, graph) < 0)
1352
0
    return isl_stat_error;
1353
12
  graph->n_edge = 0;
1354
12
  data.graph = graph;
1355
72
  for (i = isl_edge_first; i <= isl_edge_last; 
++i60
) {
1356
60
    isl_stat r;
1357
60
1358
60
    data.type = i;
1359
60
    c = isl_schedule_constraints_get(sc, i);
1360
60
    r = isl_union_map_foreach_map(c, &extract_edge, &data);
1361
60
    isl_union_map_free(c);
1362
60
    if (r < 0)
1363
0
      return isl_stat_error;
1364
60
  }
1365
12
1366
12
  return isl_stat_ok;
1367
12
}
1368
1369
/* Check whether there is any dependence from node[j] to node[i]
1370
 * or from node[i] to node[j].
1371
 */
1372
static isl_bool node_follows_weak(int i, int j, void *user)
1373
0
{
1374
0
  isl_bool f;
1375
0
  struct isl_sched_graph *graph = user;
1376
0
1377
0
  f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
1378
0
  if (f < 0 || f)
1379
0
    return f;
1380
0
  return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
1381
0
}
1382
1383
/* Check whether there is a (conditional) validity dependence from node[j]
1384
 * to node[i], forcing node[i] to follow node[j].
1385
 */
1386
static isl_bool node_follows_strong(int i, int j, void *user)
1387
10
{
1388
10
  struct isl_sched_graph *graph = user;
1389
10
1390
10
  return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
1391
10
}
1392
1393
/* Use Tarjan's algorithm for computing the strongly connected components
1394
 * in the dependence graph only considering those edges defined by "follows".
1395
 */
1396
static int detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
1397
  isl_bool (*follows)(int i, int j, void *user))
1398
30
{
1399
30
  int i, n;
1400
30
  struct isl_tarjan_graph *g = NULL;
1401
30
1402
30
  g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
1403
30
  if (!g)
1404
0
    return -1;
1405
30
1406
30
  graph->scc = 0;
1407
30
  i = 0;
1408
30
  n = graph->n;
1409
66
  while (n) {
1410
74
    while (g->order[i] != -1) {
1411
38
      graph->node[g->order[i]].scc = graph->scc;
1412
38
      --n;
1413
38
      ++i;
1414
38
    }
1415
36
    ++i;
1416
36
    graph->scc++;
1417
36
  }
1418
30
1419
30
  isl_tarjan_graph_free(g);
1420
30
1421
30
  return 0;
1422
30
}
1423
1424
/* Apply Tarjan's algorithm to detect the strongly connected components
1425
 * in the dependence graph.
1426
 * Only consider the (conditional) validity dependences and clear "weak".
1427
 */
1428
static int detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1429
30
{
1430
30
  graph->weak = 0;
1431
30
  return detect_ccs(ctx, graph, &node_follows_strong);
1432
30
}
1433
1434
/* Apply Tarjan's algorithm to detect the (weakly) connected components
1435
 * in the dependence graph.
1436
 * Consider all dependences and set "weak".
1437
 */
1438
static int detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1439
0
{
1440
0
  graph->weak = 1;
1441
0
  return detect_ccs(ctx, graph, &node_follows_weak);
1442
0
}
1443
1444
static int cmp_scc(const void *a, const void *b, void *data)
1445
1
{
1446
1
  struct isl_sched_graph *graph = data;
1447
1
  const int *i1 = a;
1448
1
  const int *i2 = b;
1449
1
1450
1
  return graph->node[*i1].scc - graph->node[*i2].scc;
1451
1
}
1452
1453
/* Sort the elements of graph->sorted according to the corresponding SCCs.
1454
 */
1455
static int sort_sccs(struct isl_sched_graph *graph)
1456
17
{
1457
17
  return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
1458
17
}
1459
1460
/* Return a non-parametric set in the compressed space of "node" that is
1461
 * bounded by the size in each direction
1462
 *
1463
 *  { [x] : -S_i <= x_i <= S_i }
1464
 *
1465
 * If S_i is infinity in direction i, then there are no constraints
1466
 * in that direction.
1467
 *
1468
 * Cache the result in node->bounds.
1469
 */
1470
static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node)
1471
8
{
1472
8
  isl_space *space;
1473
8
  isl_basic_set *bounds;
1474
8
  int i;
1475
8
  unsigned nparam;
1476
8
1477
8
  if (node->bounds)
1478
0
    return isl_basic_set_copy(node->bounds);
1479
8
1480
8
  if (node->compressed)
1481
0
    space = isl_multi_aff_get_domain_space(node->decompress);
1482
8
  else
1483
8
    space = isl_space_copy(node->space);
1484
8
  nparam = isl_space_dim(space, isl_dim_param);
1485
8
  space = isl_space_drop_dims(space, isl_dim_param, 0, nparam);
1486
8
  bounds = isl_basic_set_universe(space);
1487
8
1488
29
  for (i = 0; i < node->nvar; 
++i21
) {
1489
21
    isl_val *size;
1490
21
1491
21
    size = isl_multi_val_get_val(node->sizes, i);
1492
21
    if (!size)
1493
0
      return isl_basic_set_free(bounds);
1494
21
    if (!isl_val_is_int(size)) {
1495
4
      isl_val_free(size);
1496
4
      continue;
1497
4
    }
1498
17
    bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i,
1499
17
              isl_val_copy(size));
1500
17
    bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i,
1501
17
              isl_val_neg(size));
1502
17
  }
1503
8
1504
8
  node->bounds = isl_basic_set_copy(bounds);
1505
8
  return bounds;
1506
8
}
1507
1508
/* Drop some constraints from "delta" that could be exploited
1509
 * to construct loop coalescing schedules.
1510
 * In particular, drop those constraint that bound the difference
1511
 * to the size of the domain.
1512
 * First project out the parameters to improve the effectiveness.
1513
 */
1514
static __isl_give isl_set *drop_coalescing_constraints(
1515
  __isl_take isl_set *delta, struct isl_sched_node *node)
1516
8
{
1517
8
  unsigned nparam;
1518
8
  isl_basic_set *bounds;
1519
8
1520
8
  bounds = get_size_bounds(node);
1521
8
1522
8
  nparam = isl_set_dim(delta, isl_dim_param);
1523
8
  delta = isl_set_project_out(delta, isl_dim_param, 0, nparam);
1524
8
  delta = isl_set_remove_divs(delta);
1525
8
  delta = isl_set_plain_gist_basic_set(delta, bounds);
1526
8
  return delta;
1527
8
}
1528
1529
/* Given a dependence relation R from "node" to itself,
1530
 * construct the set of coefficients of valid constraints for elements
1531
 * in that dependence relation.
1532
 * In particular, the result contains tuples of coefficients
1533
 * c_0, c_n, c_x such that
1534
 *
1535
 *  c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
1536
 *
1537
 * or, equivalently,
1538
 *
1539
 *  c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
1540
 *
1541
 * We choose here to compute the dual of delta R.
1542
 * Alternatively, we could have computed the dual of R, resulting
1543
 * in a set of tuples c_0, c_n, c_x, c_y, and then
1544
 * plugged in (c_0, c_n, c_x, -c_x).
1545
 *
1546
 * If "need_param" is set, then the resulting coefficients effectively
1547
 * include coefficients for the parameters c_n.  Otherwise, they may
1548
 * have been projected out already.
1549
 * Since the constraints may be different for these two cases,
1550
 * they are stored in separate caches.
1551
 * In particular, if no parameter coefficients are required and
1552
 * the schedule_treat_coalescing option is set, then the parameters
1553
 * are projected out and some constraints that could be exploited
1554
 * to construct coalescing schedules are removed before the dual
1555
 * is computed.
1556
 *
1557
 * If "node" has been compressed, then the dependence relation
1558
 * is also compressed before the set of coefficients is computed.
1559
 */
1560
static __isl_give isl_basic_set *intra_coefficients(
1561
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1562
  __isl_take isl_map *map, int need_param)
1563
95
{
1564
95
  isl_ctx *ctx;
1565
95
  isl_set *delta;
1566
95
  isl_map *key;
1567
95
  isl_basic_set *coef;
1568
95
  isl_maybe_isl_basic_set m;
1569
95
  isl_map_to_basic_set **hmap = &graph->intra_hmap;
1570
95
  int treat;
1571
95
1572
95
  if (!map)
1573
0
    return NULL;
1574
95
1575
95
  ctx = isl_map_get_ctx(map);
1576
95
  treat = !need_param && 
isl_options_get_schedule_treat_coalescing(ctx)79
;
1577
95
  if (!treat)
1578
16
    hmap = &graph->intra_hmap_param;
1579
95
  m = isl_map_to_basic_set_try_get(*hmap, map);
1580
95
  if (m.valid < 0 || m.valid) {
1581
79
    isl_map_free(map);
1582
79
    return m.value;
1583
79
  }
1584
16
1585
16
  key = isl_map_copy(map);
1586
16
  if (node->compressed) {
1587
0
    map = isl_map_preimage_domain_multi_aff(map,
1588
0
            isl_multi_aff_copy(node->decompress));
1589
0
    map = isl_map_preimage_range_multi_aff(map,
1590
0
            isl_multi_aff_copy(node->decompress));
1591
0
  }
1592
16
  delta = isl_map_deltas(map);
1593
16
  if (treat)
1594
8
    delta = drop_coalescing_constraints(delta, node);
1595
95
  delta = isl_set_remove_divs(delta);
1596
95
  coef = isl_set_coefficients(delta);
1597
95
  *hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef));
1598
95
1599
95
  return coef;
1600
95
}
1601
1602
/* Given a dependence relation R, construct the set of coefficients
1603
 * of valid constraints for elements in that dependence relation.
1604
 * In particular, the result contains tuples of coefficients
1605
 * c_0, c_n, c_x, c_y such that
1606
 *
1607
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
1608
 *
1609
 * If the source or destination nodes of "edge" have been compressed,
1610
 * then the dependence relation is also compressed before
1611
 * the set of coefficients is computed.
1612
 */
1613
static __isl_give isl_basic_set *inter_coefficients(
1614
  struct isl_sched_graph *graph, struct isl_sched_edge *edge,
1615
  __isl_take isl_map *map)
1616
12
{
1617
12
  isl_set *set;
1618
12
  isl_map *key;
1619
12
  isl_basic_set *coef;
1620
12
  isl_maybe_isl_basic_set m;
1621
12
1622
12
  m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
1623
12
  if (m.valid < 0 || m.valid) {
1624
10
    isl_map_free(map);
1625
10
    return m.value;
1626
10
  }
1627
2
1628
2
  key = isl_map_copy(map);
1629
2
  if (edge->src->compressed)
1630
0
    map = isl_map_preimage_domain_multi_aff(map,
1631
0
            isl_multi_aff_copy(edge->src->decompress));
1632
2
  if (edge->dst->compressed)
1633
0
    map = isl_map_preimage_range_multi_aff(map,
1634
0
            isl_multi_aff_copy(edge->dst->decompress));
1635
12
  set = isl_map_wrap(isl_map_remove_divs(map));
1636
12
  coef = isl_set_coefficients(set);
1637
12
  graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
1638
12
          isl_basic_set_copy(coef));
1639
12
1640
12
  return coef;
1641
12
}
1642
1643
/* Return the position of the coefficients of the variables in
1644
 * the coefficients constraints "coef".
1645
 *
1646
 * The space of "coef" is of the form
1647
 *
1648
 *  { coefficients[[cst, params] -> S] }
1649
 *
1650
 * Return the position of S.
1651
 */
1652
static int coef_var_offset(__isl_keep isl_basic_set *coef)
1653
66
{
1654
66
  int offset;
1655
66
  isl_space *space;
1656
66
1657
66
  space = isl_space_unwrap(isl_basic_set_get_space(coef));
1658
66
  offset = isl_space_dim(space, isl_dim_in);
1659
66
  isl_space_free(space);
1660
66
1661
66
  return offset;
1662
66
}
1663
1664
/* Return the offset of the coefficient of the constant term of "node"
1665
 * within the (I)LP.
1666
 *
1667
 * Within each node, the coefficients have the following order:
1668
 *  - positive and negative parts of c_i_x
1669
 *  - c_i_n (if parametric)
1670
 *  - c_i_0
1671
 */
1672
static int node_cst_coef_offset(struct isl_sched_node *node)
1673
109
{
1674
109
  return node->start + 2 * node->nvar + node->nparam;
1675
109
}
1676
1677
/* Return the offset of the coefficients of the parameters of "node"
1678
 * within the (I)LP.
1679
 *
1680
 * Within each node, the coefficients have the following order:
1681
 *  - positive and negative parts of c_i_x
1682
 *  - c_i_n (if parametric)
1683
 *  - c_i_0
1684
 */
1685
static int node_par_coef_offset(struct isl_sched_node *node)
1686
124
{
1687
124
  return node->start + 2 * node->nvar;
1688
124
}
1689
1690
/* Return the offset of the coefficients of the variables of "node"
1691
 * within the (I)LP.
1692
 *
1693
 * Within each node, the coefficients have the following order:
1694
 *  - positive and negative parts of c_i_x
1695
 *  - c_i_n (if parametric)
1696
 *  - c_i_0
1697
 */
1698
static int node_var_coef_offset(struct isl_sched_node *node)
1699
346
{
1700
346
  return node->start;
1701
346
}
1702
1703
/* Return the position of the pair of variables encoding
1704
 * coefficient "i" of "node".
1705
 *
1706
 * The order of these variable pairs is the opposite of
1707
 * that of the coefficients, with 2 variables per coefficient.
1708
 */
1709
static int node_var_coef_pos(struct isl_sched_node *node, int i)
1710
202
{
1711
202
  return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i);
1712
202
}
1713
1714
/* Construct an isl_dim_map for mapping constraints on coefficients
1715
 * for "node" to the corresponding positions in graph->lp.
1716
 * "offset" is the offset of the coefficients for the variables
1717
 * in the input constraints.
1718
 * "s" is the sign of the mapping.
1719
 *
1720
 * The input constraints are given in terms of the coefficients
1721
 * (c_0, c_x) or (c_0, c_n, c_x).
1722
 * The mapping produced by this function essentially plugs in
1723
 * (0, c_i_x^+ - c_i_x^-) if s = 1 and
1724
 * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
1725
 * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
1726
 * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
1727
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1728
 * Furthermore, the order of these pairs is the opposite of that
1729
 * of the corresponding coefficients.
1730
 *
1731
 * The caller can extend the mapping to also map the other coefficients
1732
 * (and therefore not plug in 0).
1733
 */
1734
static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
1735
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1736
  int offset, int s)
1737
58
{
1738
58
  int pos;
1739
58
  unsigned total;
1740
58
  isl_dim_map *dim_map;
1741
58
1742
58
  if (!node)
1743
0
    return NULL;
1744
58
1745
58
  total = isl_basic_set_total_dim(graph->lp);
1746
58
  pos = node_var_coef_pos(node, 0);
1747
58
  dim_map = isl_dim_map_alloc(ctx, total);
1748
58
  isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s);
1749
58
  isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s);
1750
58
1751
58
  return dim_map;
1752
58
}
1753
1754
/* Construct an isl_dim_map for mapping constraints on coefficients
1755
 * for "src" (node i) and "dst" (node j) to the corresponding positions
1756
 * in graph->lp.
1757
 * "offset" is the offset of the coefficients for the variables of "src"
1758
 * in the input constraints.
1759
 * "s" is the sign of the mapping.
1760
 *
1761
 * The input constraints are given in terms of the coefficients
1762
 * (c_0, c_n, c_x, c_y).
1763
 * The mapping produced by this function essentially plugs in
1764
 * (c_j_0 - c_i_0, c_j_n - c_i_n,
1765
 *  -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
1766
 * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
1767
 *  c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
1768
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1769
 * Furthermore, the order of these pairs is the opposite of that
1770
 * of the corresponding coefficients.
1771
 *
1772
 * The caller can further extend the mapping.
1773
 */
1774
static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
1775
  struct isl_sched_graph *graph, struct isl_sched_node *src,
1776
  struct isl_sched_node *dst, int offset, int s)
1777
8
{
1778
8
  int pos;
1779
8
  unsigned total;
1780
8
  isl_dim_map *dim_map;
1781
8
1782
8
  if (!src || !dst)
1783
0
    return NULL;
1784
8
1785
8
  total = isl_basic_set_total_dim(graph->lp);
1786
8
  dim_map = isl_dim_map_alloc(ctx, total);
1787
8
1788
8
  pos = node_cst_coef_offset(dst);
1789
8
  isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s);
1790
8
  pos = node_par_coef_offset(dst);
1791
8
  isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s);
1792
8
  pos = node_var_coef_pos(dst, 0);
1793
8
  isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1,
1794
8
        dst->nvar, -s);
1795
8
  isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1,
1796
8
        dst->nvar, s);
1797
8
1798
8
  pos = node_cst_coef_offset(src);
1799
8
  isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s);
1800
8
  pos = node_par_coef_offset(src);
1801
8
  isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s);
1802
8
  pos = node_var_coef_pos(src, 0);
1803
8
  isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s);
1804
8
  isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s);
1805
8
1806
8
  return dim_map;
1807
8
}
1808
1809
/* Add the constraints from "src" to "dst" using "dim_map",
1810
 * after making sure there is enough room in "dst" for the extra constraints.
1811
 */
1812
static __isl_give isl_basic_set *add_constraints_dim_map(
1813
  __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
1814
  __isl_take isl_dim_map *dim_map)
1815
66
{
1816
66
  int n_eq, n_ineq;
1817
66
1818
66
  n_eq = isl_basic_set_n_equality(src);
1819
66
  n_ineq = isl_basic_set_n_inequality(src);
1820
66
  dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
1821
66
  dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
1822
66
  return dst;
1823
66
}
1824
1825
/* Add constraints to graph->lp that force validity for the given
1826
 * dependence from a node i to itself.
1827
 * That is, add constraints that enforce
1828
 *
1829
 *  (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
1830
 *  = c_i_x (y - x) >= 0
1831
 *
1832
 * for each (x,y) in R.
1833
 * We obtain general constraints on coefficients (c_0, c_x)
1834
 * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
1835
 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
1836
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1837
 * Note that the result of intra_coefficients may also contain
1838
 * parameter coefficients c_n, in which case 0 is plugged in for them as well.
1839
 */
1840
static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
1841
  struct isl_sched_edge *edge)
1842
29
{
1843
29
  int offset;
1844
29
  isl_map *map = isl_map_copy(edge->map);
1845
29
  isl_ctx *ctx = isl_map_get_ctx(map);
1846
29
  isl_dim_map *dim_map;
1847
29
  isl_basic_set *coef;
1848
29
  struct isl_sched_node *node = edge->src;
1849
29
1850
29
  coef = intra_coefficients(graph, node, map, 0);
1851
29
1852
29
  offset = coef_var_offset(coef);
1853
29
1854
29
  if (!coef)
1855
0
    return isl_stat_error;
1856
29
1857
29
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
1858
29
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1859
29
1860
29
  return isl_stat_ok;
1861
29
}
1862
1863
/* Add constraints to graph->lp that force validity for the given
1864
 * dependence from node i to node j.
1865
 * That is, add constraints that enforce
1866
 *
1867
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
1868
 *
1869
 * for each (x,y) in R.
1870
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1871
 * of valid constraints for R and then plug in
1872
 * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
1873
 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
1874
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1875
 */
1876
static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
1877
  struct isl_sched_edge *edge)
1878
4
{
1879
4
  int offset;
1880
4
  isl_map *map;
1881
4
  isl_ctx *ctx;
1882
4
  isl_dim_map *dim_map;
1883
4
  isl_basic_set *coef;
1884
4
  struct isl_sched_node *src = edge->src;
1885
4
  struct isl_sched_node *dst = edge->dst;
1886
4
1887
4
  if (!graph->lp)
1888
0
    return isl_stat_error;
1889
4
1890
4
  map = isl_map_copy(edge->map);
1891
4
  ctx = isl_map_get_ctx(map);
1892
4
  coef = inter_coefficients(graph, edge, map);
1893
4
1894
4
  offset = coef_var_offset(coef);
1895
4
1896
4
  if (!coef)
1897
0
    return isl_stat_error;
1898
4
1899
4
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
1900
4
1901
4
  edge->start = graph->lp->n_ineq;
1902
4
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1903
4
  if (!graph->lp)
1904
0
    return isl_stat_error;
1905
4
  edge->end = graph->lp->n_ineq;
1906
4
1907
4
  return isl_stat_ok;
1908
4
}
1909
1910
/* Add constraints to graph->lp that bound the dependence distance for the given
1911
 * dependence from a node i to itself.
1912
 * If s = 1, we add the constraint
1913
 *
1914
 *  c_i_x (y - x) <= m_0 + m_n n
1915
 *
1916
 * or
1917
 *
1918
 *  -c_i_x (y - x) + m_0 + m_n n >= 0
1919
 *
1920
 * for each (x,y) in R.
1921
 * If s = -1, we add the constraint
1922
 *
1923
 *  -c_i_x (y - x) <= m_0 + m_n n
1924
 *
1925
 * or
1926
 *
1927
 *  c_i_x (y - x) + m_0 + m_n n >= 0
1928
 *
1929
 * for each (x,y) in R.
1930
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
1931
 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
1932
 * with each coefficient (except m_0) represented as a pair of non-negative
1933
 * coefficients.
1934
 *
1935
 *
1936
 * If "local" is set, then we add constraints
1937
 *
1938
 *  c_i_x (y - x) <= 0
1939
 *
1940
 * or
1941
 *
1942
 *  -c_i_x (y - x) <= 0
1943
 *
1944
 * instead, forcing the dependence distance to be (less than or) equal to 0.
1945
 * That is, we plug in (0, 0, -s * c_i_x),
1946
 * intra_coefficients is not required to have c_n in its result when
1947
 * "local" is set.  If they are missing, then (0, -s * c_i_x) is plugged in.
1948
 * Note that dependences marked local are treated as validity constraints
1949
 * by add_all_validity_constraints and therefore also have
1950
 * their distances bounded by 0 from below.
1951
 */
1952
static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
1953
  struct isl_sched_edge *edge, int s, int local)
1954
29
{
1955
29
  int offset;
1956
29
  unsigned nparam;
1957
29
  isl_map *map = isl_map_copy(edge->map);
1958
29
  isl_ctx *ctx = isl_map_get_ctx(map);
1959
29
  isl_dim_map *dim_map;
1960
29
  isl_basic_set *coef;
1961
29
  struct isl_sched_node *node = edge->src;
1962
29
1963
29
  coef = intra_coefficients(graph, node, map, !local);
1964
29
1965
29
  offset = coef_var_offset(coef);
1966
29
1967
29
  if (!coef)
1968
0
    return isl_stat_error;
1969
29
1970
29
  nparam = isl_space_dim(node->space, isl_dim_param);
1971
29
  dim_map = intra_dim_map(ctx, graph, node, offset, -s);
1972
29
1973
29
  if (!local) {
1974
8
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
1975
8
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
1976
8
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
1977
8
  }
1978
29
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1979
29
1980
29
  return isl_stat_ok;
1981
29
}
1982
1983
/* Add constraints to graph->lp that bound the dependence distance for the given
1984
 * dependence from node i to node j.
1985
 * If s = 1, we add the constraint
1986
 *
1987
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
1988
 *    <= m_0 + m_n n
1989
 *
1990
 * or
1991
 *
1992
 *  -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
1993
 *    m_0 + m_n n >= 0
1994
 *
1995
 * for each (x,y) in R.
1996
 * If s = -1, we add the constraint
1997
 *
1998
 *  -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
1999
 *    <= m_0 + m_n n
2000
 *
2001
 * or
2002
 *
2003
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
2004
 *    m_0 + m_n n >= 0
2005
 *
2006
 * for each (x,y) in R.
2007
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2008
 * of valid constraints for R and then plug in
2009
 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
2010
 *  s*c_i_x, -s*c_j_x)
2011
 * with each coefficient (except m_0, c_*_0 and c_*_n)
2012
 * represented as a pair of non-negative coefficients.
2013
 *
2014
 *
2015
 * If "local" is set (and s = 1), then we add constraints
2016
 *
2017
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
2018
 *
2019
 * or
2020
 *
2021
 *  -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
2022
 *
2023
 * instead, forcing the dependence distance to be (less than or) equal to 0.
2024
 * That is, we plug in
2025
 * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
2026
 * Note that dependences marked local are treated as validity constraints
2027
 * by add_all_validity_constraints and therefore also have
2028
 * their distances bounded by 0 from below.
2029
 */
2030
static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
2031
  struct isl_sched_edge *edge, int s, int local)
2032
4
{
2033
4
  int offset;
2034
4
  unsigned nparam;
2035
4
  isl_map *map = isl_map_copy(edge->map);
2036
4
  isl_ctx *ctx = isl_map_get_ctx(map);
2037
4
  isl_dim_map *dim_map;
2038
4
  isl_basic_set *coef;
2039
4
  struct isl_sched_node *src = edge->src;
2040
4
  struct isl_sched_node *dst = edge->dst;
2041
4
2042
4
  coef = inter_coefficients(graph, edge, map);
2043
4
2044
4
  offset = coef_var_offset(coef);
2045
4
2046
4
  if (!coef)
2047
0
    return isl_stat_error;
2048
4
2049
4
  nparam = isl_space_dim(src->space, isl_dim_param);
2050
4
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
2051
4
2052
4
  if (!local) {
2053
2
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2054
2
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2055
2
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2056
2
  }
2057
4
2058
4
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2059
4
2060
4
  return isl_stat_ok;
2061
4
}
2062
2063
/* Should the distance over "edge" be forced to zero?
2064
 * That is, is it marked as a local edge?
2065
 * If "use_coincidence" is set, then coincidence edges are treated
2066
 * as local edges.
2067
 */
2068
static int force_zero(struct isl_sched_edge *edge, int use_coincidence)
2069
128
{
2070
128
  return is_local(edge) || (use_coincidence && 
is_coincidence(edge)90
);
2071
128
}
2072
2073
/* Add all validity constraints to graph->lp.
2074
 *
2075
 * An edge that is forced to be local needs to have its dependence
2076
 * distances equal to zero.  We take care of bounding them by 0 from below
2077
 * here.  add_all_proximity_constraints takes care of bounding them by 0
2078
 * from above.
2079
 *
2080
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2081
 * Otherwise, we ignore them.
2082
 */
2083
static int add_all_validity_constraints(struct isl_sched_graph *graph,
2084
  int use_coincidence)
2085
49
{
2086
49
  int i;
2087
49
2088
82
  for (i = 0; i < graph->n_edge; 
++i33
) {
2089
33
    struct isl_sched_edge *edge = &graph->edge[i];
2090
33
    int zero;
2091
33
2092
33
    zero = force_zero(edge, use_coincidence);
2093
33
    if (!is_validity(edge) && 
!zero0
)
2094
0
      continue;
2095
33
    if (edge->src != edge->dst)
2096
4
      continue;
2097
29
    if (add_intra_validity_constraints(graph, edge) < 0)
2098
0
      return -1;
2099
33
  }
2100
49
2101
82
  
for (i = 0; 49
i < graph->n_edge;
++i33
) {
2102
33
    struct isl_sched_edge *edge = &graph->edge[i];
2103
33
    int zero;
2104
33
2105
33
    zero = force_zero(edge, use_coincidence);
2106
33
    if (!is_validity(edge) && 
!zero0
)
2107
0
      continue;
2108
33
    if (edge->src == edge->dst)
2109
29
      continue;
2110
4
    if (add_inter_validity_constraints(graph, edge) < 0)
2111
0
      return -1;
2112
33
  }
2113
49
2114
49
  return 0;
2115
49
}
2116
2117
/* Add constraints to graph->lp that bound the dependence distance
2118
 * for all dependence relations.
2119
 * If a given proximity dependence is identical to a validity
2120
 * dependence, then the dependence distance is already bounded
2121
 * from below (by zero), so we only need to bound the distance
2122
 * from above.  (This includes the case of "local" dependences
2123
 * which are treated as validity dependence by add_all_validity_constraints.)
2124
 * Otherwise, we need to bound the distance both from above and from below.
2125
 *
2126
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2127
 * Otherwise, we ignore them.
2128
 */
2129
static int add_all_proximity_constraints(struct isl_sched_graph *graph,
2130
  int use_coincidence)
2131
49
{
2132
49
  int i;
2133
49
2134
82
  for (i = 0; i < graph->n_edge; 
++i33
) {
2135
33
    struct isl_sched_edge *edge = &graph->edge[i];
2136
33
    int zero;
2137
33
2138
33
    zero = force_zero(edge, use_coincidence);
2139
33
    if (!is_proximity(edge) && 
!zero0
)
2140
0
      continue;
2141
33
    if (edge->src == edge->dst &&
2142
33
        
add_intra_proximity_constraints(graph, edge, 1, zero) < 029
)
2143
0
      return -1;
2144
33
    if (edge->src != edge->dst &&
2145
33
        
add_inter_proximity_constraints(graph, edge, 1, zero) < 04
)
2146
0
      return -1;
2147
33
    if (is_validity(edge) || 
zero0
)
2148
33
      continue;
2149
0
    if (edge->src == edge->dst &&
2150
0
        add_intra_proximity_constraints(graph, edge, -1, 0) < 0)
2151
0
      return -1;
2152
0
    if (edge->src != edge->dst &&
2153
0
        add_inter_proximity_constraints(graph, edge, -1, 0) < 0)
2154
0
      return -1;
2155
33
  }
2156
49
2157
49
  return 0;
2158
49
}
2159
2160
/* Normalize the rows of "indep" such that all rows are lexicographically
2161
 * positive and such that each row contains as many final zeros as possible,
2162
 * given the choice for the previous rows.
2163
 * Do this by performing elementary row operations.
2164
 */
2165
static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep)
2166
69
{
2167
69
  indep = isl_mat_reverse_gauss(indep);
2168
69
  indep = isl_mat_lexnonneg_rows(indep);
2169
69
  return indep;
2170
69
}
2171
2172
/* Compute a basis for the rows in the linear part of the schedule
2173
 * and extend this basis to a full basis.  The remaining rows
2174
 * can then be used to force linear independence from the rows
2175
 * in the schedule.
2176
 *
2177
 * In particular, given the schedule rows S, we compute
2178
 *
2179
 *  S   = H Q
2180
 *  S U = H
2181
 *
2182
 * with H the Hermite normal form of S.  That is, all but the
2183
 * first rank columns of H are zero and so each row in S is
2184
 * a linear combination of the first rank rows of Q.
2185
 * The matrix Q can be used as a variable transformation
2186
 * that isolates the directions of S in the first rank rows.
2187
 * Transposing S U = H yields
2188
 *
2189
 *  U^T S^T = H^T
2190
 *
2191
 * with all but the first rank rows of H^T zero.
2192
 * The last rows of U^T are therefore linear combinations
2193
 * of schedule coefficients that are all zero on schedule
2194
 * coefficients that are linearly dependent on the rows of S.
2195
 * At least one of these combinations is non-zero on
2196
 * linearly independent schedule coefficients.
2197
 * The rows are normalized to involve as few of the last
2198
 * coefficients as possible and to have a positive initial value.
2199
 */
2200
static int node_update_vmap(struct isl_sched_node *node)
2201
69
{
2202
69
  isl_mat *H, *U, *Q;
2203
69
  int n_row = isl_mat_rows(node->sched);
2204
69
2205
69
  H = isl_mat_sub_alloc(node->sched, 0, n_row,
2206
69
            1 + node->nparam, node->nvar);
2207
69
2208
69
  H = isl_mat_left_hermite(H, 0, &U, &Q);
2209
69
  isl_mat_free(node->indep);
2210
69
  isl_mat_free(node->vmap);
2211
69
  node->vmap = Q;
2212
69
  node->indep = isl_mat_transpose(U);
2213
69
  node->rank = isl_mat_initial_non_zero_cols(H);
2214
69
  node->indep = isl_mat_drop_rows(node->indep, 0, node->rank);
2215
69
  node->indep = normalize_independent(node->indep);
2216
69
  isl_mat_free(H);
2217
69
2218
69
  if (!node->indep || !node->vmap || node->rank < 0)
2219
0
    return -1;
2220
69
  return 0;
2221
69
}
2222
2223
/* Is "edge" marked as a validity or a conditional validity edge?
2224
 */
2225
static int is_any_validity(struct isl_sched_edge *edge)
2226
0
{
2227
0
  return is_validity(edge) || is_conditional_validity(edge);
2228
0
}
2229
2230
/* How many times should we count the constraints in "edge"?
2231
 *
2232
 * We count as follows
2233
 * validity   -> 1 (>= 0)
2234
 * validity+proximity -> 2 (>= 0 and upper bound)
2235
 * proximity    -> 2 (lower and upper bound)
2236
 * local(+any)    -> 2 (>= 0 and <= 0)
2237
 *
2238
 * If an edge is only marked conditional_validity then it counts
2239
 * as zero since it is only checked afterwards.
2240
 *
2241
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2242
 * Otherwise, we ignore them.
2243
 */
2244
static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
2245
33
{
2246
33
  if (is_proximity(edge) || 
force_zero(edge, use_coincidence)0
)
2247
33
    return 2;
2248
0
  if (is_validity(edge))
2249
0
    return 1;
2250
0
  return 0;
2251
0
}
2252
2253
/* How many times should the constraints in "edge" be counted
2254
 * as a parametric intra-node constraint?
2255
 *
2256
 * Only proximity edges that are not forced zero need
2257
 * coefficient constraints that include coefficients for parameters.
2258
 * If the edge is also a validity edge, then only
2259
 * an upper bound is introduced.  Otherwise, both lower and upper bounds
2260
 * are introduced.
2261
 */
2262
static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge,
2263
  int use_coincidence)
2264
33
{
2265
33
  if (edge->src != edge->dst)
2266
4
    return 0;
2267
29
  if (!is_proximity(edge))
2268
0
    return 0;
2269
29
  if (force_zero(edge, use_coincidence))
2270
21
    return 0;
2271
8
  if (is_validity(edge))
2272
8
    return 1;
2273
0
  else
2274
0
    return 2;
2275
33
}
2276
2277
/* Add "f" times the number of equality and inequality constraints of "bset"
2278
 * to "n_eq" and "n_ineq" and free "bset".
2279
 */
2280
static isl_stat update_count(__isl_take isl_basic_set *bset,
2281
  int f, int *n_eq, int *n_ineq)
2282
41
{
2283
41
  if (!bset)
2284
0
    return isl_stat_error;
2285
41
2286
41
  *n_eq += isl_basic_set_n_equality(bset);
2287
41
  *n_ineq += isl_basic_set_n_inequality(bset);
2288
41
  isl_basic_set_free(bset);
2289
41
2290
41
  return isl_stat_ok;
2291
41
}
2292
2293
/* Count the number of equality and inequality constraints
2294
 * that will be added for the given map.
2295
 *
2296
 * The edges that require parameter coefficients are counted separately.
2297
 *
2298
 * "use_coincidence" is set if we should take into account coincidence edges.
2299
 */
2300
static isl_stat count_map_constraints(struct isl_sched_graph *graph,
2301
  struct isl_sched_edge *edge, __isl_take isl_map *map,
2302
  int *n_eq, int *n_ineq, int use_coincidence)
2303
33
{
2304
33
  isl_map *copy;
2305
33
  isl_basic_set *coef;
2306
33
  int f = edge_multiplicity(edge, use_coincidence);
2307
33
  int fp = parametric_intra_edge_multiplicity(edge, use_coincidence);
2308
33
2309
33
  if (f == 0) {
2310
0
    isl_map_free(map);
2311
0
    return isl_stat_ok;
2312
0
  }
2313
33
2314
33
  if (edge->src != edge->dst) {
2315
4
    coef = inter_coefficients(graph, edge, map);
2316
4
    return update_count(coef, f, n_eq, n_ineq);
2317
4
  }
2318
29
2319
29
  if (fp > 0) {
2320
8
    copy = isl_map_copy(map);
2321
8
    coef = intra_coefficients(graph, edge->src, copy, 1);
2322
8
    if (update_count(coef, fp, n_eq, n_ineq) < 0)
2323
0
      goto error;
2324
29
  }
2325
29
2326
29
  if (f > fp) {
2327
29
    copy = isl_map_copy(map);
2328
29
    coef = intra_coefficients(graph, edge->src, copy, 0);
2329
29
    if (update_count(coef, f - fp, n_eq, n_ineq) < 0)
2330
0
      goto error;
2331
29
  }
2332
29
2333
29
  isl_map_free(map);
2334
29
  return isl_stat_ok;
2335
0
error:
2336
0
  isl_map_free(map);
2337
0
  return isl_stat_error;
2338
33
}
2339
2340
/* Count the number of equality and inequality constraints
2341
 * that will be added to the main lp problem.
2342
 * We count as follows
2343
 * validity   -> 1 (>= 0)
2344
 * validity+proximity -> 2 (>= 0 and upper bound)
2345
 * proximity    -> 2 (lower and upper bound)
2346
 * local(+any)    -> 2 (>= 0 and <= 0)
2347
 *
2348
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2349
 * Otherwise, we ignore them.
2350
 */
2351
static int count_constraints(struct isl_sched_graph *graph,
2352
  int *n_eq, int *n_ineq, int use_coincidence)
2353
49
{
2354
49
  int i;
2355
49
2356
49
  *n_eq = *n_ineq = 0;
2357
82
  for (i = 0; i < graph->n_edge; 
++i33
) {
2358
33
    struct isl_sched_edge *edge = &graph->edge[i];
2359
33
    isl_map *map = isl_map_copy(edge->map);
2360
33
2361
33
    if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
2362
33
              use_coincidence) < 0)
2363
0
      return -1;
2364
33
  }
2365
49
2366
49
  return 0;
2367
49
}
2368
2369
/* Count the number of constraints that will be added by
2370
 * add_bound_constant_constraints to bound the values of the constant terms
2371
 * and increment *n_eq and *n_ineq accordingly.
2372
 *
2373
 * In practice, add_bound_constant_constraints only adds inequalities.
2374
 */
2375
static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
2376
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2377
49
{
2378
49
  if (isl_options_get_schedule_max_constant_term(ctx) == -1)
2379
0
    return isl_stat_ok;
2380
49
2381
49
  *n_ineq += graph->n;
2382
49
2383
49
  return isl_stat_ok;
2384
49
}
2385
2386
/* Add constraints to bound the values of the constant terms in the schedule,
2387
 * if requested by the user.
2388
 *
2389
 * The maximal value of the constant terms is defined by the option
2390
 * "schedule_max_constant_term".
2391
 */
2392
static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
2393
  struct isl_sched_graph *graph)
2394
49
{
2395
49
  int i, k;
2396
49
  int max;
2397
49
  int total;
2398
49
2399
49
  max = isl_options_get_schedule_max_constant_term(ctx);
2400
49
  if (max == -1)
2401
0
    return isl_stat_ok;
2402
49
2403
49
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2404
49
2405
100
  for (i = 0; i < graph->n; 
++i51
) {
2406
51
    struct isl_sched_node *node = &graph->node[i];
2407
51
    int pos;
2408
51
2409
51
    k = isl_basic_set_alloc_inequality(graph->lp);
2410
51
    if (k < 0)
2411
0
      return isl_stat_error;
2412
51
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2413
51
    pos = node_cst_coef_offset(node);
2414
51
    isl_int_set_si(graph->lp->ineq[k][1 + pos], -1);
2415
51
    isl_int_set_si(graph->lp->ineq[k][0], max);
2416
51
  }
2417
49
2418
49
  return isl_stat_ok;
2419
49
}
2420
2421
/* Count the number of constraints that will be added by
2422
 * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
2423
 * accordingly.
2424
 *
2425
 * In practice, add_bound_coefficient_constraints only adds inequalities.
2426
 */
2427
static int count_bound_coefficient_constraints(isl_ctx *ctx,
2428
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2429
49
{
2430
49
  int i;
2431
49
2432
49
  if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
2433
49
      
!isl_options_get_schedule_treat_coalescing(ctx)0
)
2434
0
    return 0;
2435
49
2436
100
  
for (i = 0; 49
i < graph->n;
++i51
)
2437
51
    *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
2438
49
2439
49
  return 0;
2440
49
}
2441
2442
/* Add constraints to graph->lp that bound the values of
2443
 * the parameter schedule coefficients of "node" to "max" and
2444
 * the variable schedule coefficients to the corresponding entry
2445
 * in node->max.
2446
 * In either case, a negative value means that no bound needs to be imposed.
2447
 *
2448
 * For parameter coefficients, this amounts to adding a constraint
2449
 *
2450
 *  c_n <= max
2451
 *
2452
 * i.e.,
2453
 *
2454
 *  -c_n + max >= 0
2455
 *
2456
 * The variables coefficients are, however, not represented directly.
2457
 * Instead, the variable coefficients c_x are written as differences
2458
 * c_x = c_x^+ - c_x^-.
2459
 * That is,
2460
 *
2461
 *  -max_i <= c_x_i <= max_i
2462
 *
2463
 * is encoded as
2464
 *
2465
 *  -max_i <= c_x_i^+ - c_x_i^- <= max_i
2466
 *
2467
 * or
2468
 *
2469
 *  -(c_x_i^+ - c_x_i^-) + max_i >= 0
2470
 *  c_x_i^+ - c_x_i^- + max_i >= 0
2471
 */
2472
static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
2473
  struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
2474
51
{
2475
51
  int i, j, k;
2476
51
  int total;
2477
51
  isl_vec *ineq;
2478
51
2479
51
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2480
51
2481
66
  for (j = 0; j < node->nparam; 
++j15
) {
2482
15
    int dim;
2483
15
2484
15
    if (max < 0)
2485
0
      continue;
2486
15
2487
15
    k = isl_basic_set_alloc_inequality(graph->lp);
2488
15
    if (k < 0)
2489
0
      return isl_stat_error;
2490
15
    dim = 1 + node_par_coef_offset(node) + j;
2491
15
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2492
15
    isl_int_set_si(graph->lp->ineq[k][dim], -1);
2493
15
    isl_int_set_si(graph->lp->ineq[k][0], max);
2494
15
  }
2495
51
2496
51
  ineq = isl_vec_alloc(ctx, 1 + total);
2497
51
  ineq = isl_vec_clr(ineq);
2498
51
  if (!ineq)
2499
0
    return isl_stat_error;
2500
179
  
for (i = 0; 51
i < node->nvar;
++i128
) {
2501
128
    int pos = 1 + node_var_coef_pos(node, i);
2502
128
2503
128
    if (isl_int_is_neg(node->max->el[i]))
2504
128
      
continue0
;
2505
128
2506
128
    isl_int_set_si(ineq->el[pos], 1);
2507
128
    isl_int_set_si(ineq->el[pos + 1], -1);
2508
128
    isl_int_set(ineq->el[0], node->max->el[i]);
2509
128
2510
128
    k = isl_basic_set_alloc_inequality(graph->lp);
2511
128
    if (k < 0)
2512
0
      goto error;
2513
128
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2514
128
2515
128
    isl_seq_neg(ineq->el + pos, ineq->el + pos, 2);
2516
128
    k = isl_basic_set_alloc_inequality(graph->lp);
2517
128
    if (k < 0)
2518
0
      goto error;
2519
128
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2520
128
2521
128
    isl_seq_clr(ineq->el + pos, 2);
2522
128
  }
2523
51
  isl_vec_free(ineq);
2524
51
2525
51
  return isl_stat_ok;
2526
0
error:
2527
0
  isl_vec_free(ineq);
2528
0
  return isl_stat_error;
2529
51
}
2530
2531
/* Add constraints that bound the values of the variable and parameter
2532
 * coefficients of the schedule.
2533
 *
2534
 * The maximal value of the coefficients is defined by the option
2535
 * 'schedule_max_coefficient' and the entries in node->max.
2536
 * These latter entries are only set if either the schedule_max_coefficient
2537
 * option or the schedule_treat_coalescing option is set.
2538
 */
2539
static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
2540
  struct isl_sched_graph *graph)
2541
49
{
2542
49
  int i;
2543
49
  int max;
2544
49
2545
49
  max = isl_options_get_schedule_max_coefficient(ctx);
2546
49
2547
49
  if (max == -1 && 
!isl_options_get_schedule_treat_coalescing(ctx)0
)
2548
0
    return isl_stat_ok;
2549
49
2550
100
  
for (i = 0; 49
i < graph->n;
++i51
) {
2551
51
    struct isl_sched_node *node = &graph->node[i];
2552
51
2553
51
    if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
2554
0
      return isl_stat_error;
2555
51
  }
2556
49
2557
49
  return isl_stat_ok;
2558
49
}
2559
2560
/* Add a constraint to graph->lp that equates the value at position
2561
 * "sum_pos" to the sum of the "n" values starting at "first".
2562
 */
2563
static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
2564
  int sum_pos, int first, int n)
2565
49
{
2566
49
  int i, k;
2567
49
  int total;
2568
49
2569
49
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2570
49
2571
49
  k = isl_basic_set_alloc_equality(graph->lp);
2572
49
  if (k < 0)
2573
0
    return isl_stat_error;
2574
49
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2575
49
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2576
79
  for (i = 0; i < n; 
++i30
)
2577
49
    
isl_int_set_si30
(graph->lp->eq[k][1 + first + i], 1);
2578
49
2579
49
  return isl_stat_ok;
2580
49
}
2581
2582
/* Add a constraint to graph->lp that equates the value at position
2583
 * "sum_pos" to the sum of the parameter coefficients of all nodes.
2584
 */
2585
static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
2586
  int sum_pos)
2587
49
{
2588
49
  int i, j, k;
2589
49
  int total;
2590
49
2591
49
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2592
49
2593
49
  k = isl_basic_set_alloc_equality(graph->lp);
2594
49
  if (k < 0)
2595
0
    return isl_stat_error;
2596
49
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2597
49
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2598
100
  for (i = 0; i < graph->n; 
++i51
) {
2599
51
    int pos = 1 + node_par_coef_offset(&graph->node[i]);
2600
51
2601
66
    for (j = 0; j < graph->node[i].nparam; 
++j15
)
2602
51
      
isl_int_set_si15
(graph->lp->eq[k][pos + j], 1);
2603
51
  }
2604
49
2605
49
  return isl_stat_ok;
2606
49
}
2607
2608
/* Add a constraint to graph->lp that equates the value at position
2609
 * "sum_pos" to the sum of the variable coefficients of all nodes.
2610
 */
2611
static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
2612
  int sum_pos)
2613
49
{
2614
49
  int i, j, k;
2615
49
  int total;
2616
49
2617
49
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2618
49
2619
49
  k = isl_basic_set_alloc_equality(graph->lp);
2620
49
  if (k < 0)
2621
0
    return isl_stat_error;
2622
49
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2623
49
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2624
100
  for (i = 0; i < graph->n; 
++i51
) {
2625
51
    struct isl_sched_node *node = &graph->node[i];
2626
51
    int pos = 1 + node_var_coef_offset(node);
2627
51
2628
307
    for (j = 0; j < 2 * node->nvar; 
++j256
)
2629
256
      isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2630
51
  }
2631
49
2632
49
  return isl_stat_ok;
2633
49
}
2634
2635
/* Construct an ILP problem for finding schedule coefficients
2636
 * that result in non-negative, but small dependence distances
2637
 * over all dependences.
2638
 * In particular, the dependence distances over proximity edges
2639
 * are bounded by m_0 + m_n n and we compute schedule coefficients
2640
 * with small values (preferably zero) of m_n and m_0.
2641
 *
2642
 * All variables of the ILP are non-negative.  The actual coefficients
2643
 * may be negative, so each coefficient is represented as the difference
2644
 * of two non-negative variables.  The negative part always appears
2645
 * immediately before the positive part.
2646
 * Other than that, the variables have the following order
2647
 *
2648
 *  - sum of positive and negative parts of m_n coefficients
2649
 *  - m_0
2650
 *  - sum of all c_n coefficients
2651
 *    (unconstrained when computing non-parametric schedules)
2652
 *  - sum of positive and negative parts of all c_x coefficients
2653
 *  - positive and negative parts of m_n coefficients
2654
 *  - for each node
2655
 *    - positive and negative parts of c_i_x, in opposite order
2656
 *    - c_i_n (if parametric)
2657
 *    - c_i_0
2658
 *
2659
 * The constraints are those from the edges plus two or three equalities
2660
 * to express the sums.
2661
 *
2662
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2663
 * Otherwise, we ignore them.
2664
 */
2665
static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
2666
  int use_coincidence)
2667
49
{
2668
49
  int i;
2669
49
  unsigned nparam;
2670
49
  unsigned total;
2671
49
  isl_space *space;
2672
49
  int parametric;
2673
49
  int param_pos;
2674
49
  int n_eq, n_ineq;
2675
49
2676
49
  parametric = ctx->opt->schedule_parametric;
2677
49
  nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
2678
49
  param_pos = 4;
2679
49
  total = param_pos + 2 * nparam;
2680
100
  for (i = 0; i < graph->n; 
++i51
) {
2681
51
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2682
51
    if (node_update_vmap(node) < 0)
2683
0
      return isl_stat_error;
2684
51
    node->start = total;
2685
51
    total += 1 + node->nparam + 2 * node->nvar;
2686
51
  }
2687
49
2688
49
  if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0)
2689
0
    return isl_stat_error;
2690
49
  if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2691
0
    return isl_stat_error;
2692
49
  if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2693
0
    return isl_stat_error;
2694
49
2695
49
  space = isl_space_set_alloc(ctx, 0, total);
2696
49
  isl_basic_set_free(graph->lp);
2697
49
  n_eq += 2 + parametric;
2698
49
2699
49
  graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
2700
49
2701
49
  if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
2702
0
    return isl_stat_error;
2703
49
  if (parametric && add_param_sum_constraint(graph, 2) < 0)
2704
0
    return isl_stat_error;
2705
49
  if (add_var_sum_constraint(graph, 3) < 0)
2706
0
    return isl_stat_error;
2707
49
  if (add_bound_constant_constraints(ctx, graph) < 0)
2708
0
    return isl_stat_error;
2709
49
  if (add_bound_coefficient_constraints(ctx, graph) < 0)
2710
0
    return isl_stat_error;
2711
49
  if (add_all_validity_constraints(graph, use_coincidence) < 0)
2712
0
    return isl_stat_error;
2713
49
  if (add_all_proximity_constraints(graph, use_coincidence) < 0)
2714
0
    return isl_stat_error;
2715
49
2716
49
  return isl_stat_ok;
2717
49
}
2718
2719
/* Analyze the conflicting constraint found by
2720
 * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
2721
 * constraint of one of the edges between distinct nodes, living, moreover
2722
 * in distinct SCCs, then record the source and sink SCC as this may
2723
 * be a good place to cut between SCCs.
2724
 */
2725
static int check_conflict(int con, void *user)
2726
49
{
2727
49
  int i;
2728
49
  struct isl_sched_graph *graph = user;
2729
49
2730
49
  if (graph->src_scc >= 0)
2731
0
    return 0;
2732
49
2733
49
  con -= graph->lp->n_eq;
2734
49
2735
49
  if (con >= graph->lp->n_ineq)
2736
24
    return 0;
2737
25
2738
58
  
for (i = 0; 25
i < graph->n_edge;
++i33
) {
2739
33
    if (!is_validity(&graph->edge[i]))
2740
0
      continue;
2741
33
    if (graph->edge[i].src == graph->edge[i].dst)
2742
25
      continue;
2743
8
    if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
2744
8
      continue;
2745
0
    if (graph->edge[i].start > con)
2746
0
      continue;
2747
0
    if (graph->edge[i].end <= con)
2748
0
      continue;
2749
0
    graph->src_scc = graph->edge[i].src->scc;
2750
0
    graph->dst_scc = graph->edge[i].dst->scc;
2751
0
  }
2752
49
2753
49
  return 0;
2754
49
}
2755
2756
/* Check whether the next schedule row of the given node needs to be
2757
 * non-trivial.  Lower-dimensional domains may have some trivial rows,
2758
 * but as soon as the number of remaining required non-trivial rows
2759
 * is as large as the number or remaining rows to be computed,
2760
 * all remaining rows need to be non-trivial.
2761
 */
2762
static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
2763
51
{
2764
51
  return node->nvar - node->rank >= graph->maxvar - graph->n_row;
2765
51
}
2766
2767
/* Construct a non-triviality region with triviality directions
2768
 * corresponding to the rows of "indep".
2769
 * The rows of "indep" are expressed in terms of the schedule coefficients c_i,
2770
 * while the triviality directions are expressed in terms of
2771
 * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
2772
 * before c^+_i.  Furthermore,
2773
 * the pairs of non-negative variables representing the coefficients
2774
 * are stored in the opposite order.
2775
 */
2776
static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep)
2777
51
{
2778
51
  isl_ctx *ctx;
2779
51
  isl_mat *mat;
2780
51
  int i, j, n, n_var;
2781
51
2782
51
  if (!indep)
2783
0
    return NULL;
2784
51
2785
51
  ctx = isl_mat_get_ctx(indep);
2786
51
  n = isl_mat_rows(indep);
2787
51
  n_var = isl_mat_cols(indep);
2788
51
  mat = isl_mat_alloc(ctx, n, 2 * n_var);
2789
51
  if (!mat)
2790
0
    return NULL;
2791
134
  
for (i = 0; 51
i < n;
++i83
) {
2792
299
    for (j = 0; j < n_var; 
++j216
) {
2793
216
      int nj = n_var - 1 - j;
2794
216
      isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]);
2795
216
      isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]);
2796
216
    }
2797
83
  }
2798
51
2799
51
  return mat;
2800
51
}
2801
2802
/* Solve the ILP problem constructed in setup_lp.
2803
 * For each node such that all the remaining rows of its schedule
2804
 * need to be non-trivial, we construct a non-triviality region.
2805
 * This region imposes that the next row is independent of previous rows.
2806
 * In particular, the non-triviality region enforces that at least
2807
 * one of the linear combinations in the rows of node->indep is non-zero.
2808
 */
2809
static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
2810
49
{
2811
49
  int i;
2812
49
  isl_vec *sol;
2813
49
  isl_basic_set *lp;
2814
49
2815
100
  for (i = 0; i < graph->n; 
++i51
) {
2816
51
    struct isl_sched_node *node = &graph->node[i];
2817
51
    isl_mat *trivial;
2818
51
2819
51
    graph->region[i].pos = node_var_coef_offset(node);
2820
51
    if (needs_row(graph, node))
2821
51
      trivial = construct_trivial(node->indep);
2822
0
    else
2823
0
      trivial = isl_mat_zero(ctx, 0, 0);
2824
51
    graph->region[i].trivial = trivial;
2825
51
  }
2826
49
  lp = isl_basic_set_copy(graph->lp);
2827
49
  sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
2828
49
               graph->region, &check_conflict, graph);
2829
100
  for (i = 0; i < graph->n; 
++i51
)
2830
51
    isl_mat_free(graph->region[i].trivial);
2831
49
  return sol;
2832
49
}
2833
2834
/* Extract the coefficients for the variables of "node" from "sol".
2835
 *
2836
 * Each schedule coefficient c_i_x is represented as the difference
2837
 * between two non-negative variables c_i_x^+ - c_i_x^-.
2838
 * The c_i_x^- appear before their c_i_x^+ counterpart.
2839
 * Furthermore, the order of these pairs is the opposite of that
2840
 * of the corresponding coefficients.
2841
 *
2842
 * Return c_i_x = c_i_x^+ - c_i_x^-
2843
 */
2844
static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
2845
  __isl_keep isl_vec *sol)
2846
42
{
2847
42
  int i;
2848
42
  int pos;
2849
42
  isl_vec *csol;
2850
42
2851
42
  if (!sol)
2852
0
    return NULL;
2853
42
  csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
2854
42
  if (!csol)
2855
0
    return NULL;
2856
42
2857
42
  pos = 1 + node_var_coef_offset(node);
2858
148
  for (i = 0; i < node->nvar; 
++i106
)
2859
106
    isl_int_sub(csol->el[node->nvar - 1 - i],
2860
42
          sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
2861
42
2862
42
  return csol;
2863
42
}
2864
2865
/* Update the schedules of all nodes based on the given solution
2866
 * of the LP problem.
2867
 * The new row is added to the current band.
2868
 * All possibly negative coefficients are encoded as a difference
2869
 * of two non-negative variables, so we need to perform the subtraction
2870
 * here.
2871
 *
2872
 * If coincident is set, then the caller guarantees that the new
2873
 * row satisfies the coincidence constraints.
2874
 */
2875
static int update_schedule(struct isl_sched_graph *graph,
2876
  __isl_take isl_vec *sol, int coincident)
2877
41
{
2878
41
  int i, j;
2879
41
  isl_vec *csol = NULL;
2880
41
2881
41
  if (!sol)
2882
0
    goto error;
2883
41
  if (sol->size == 0)
2884
41
    
isl_die0
(sol->ctx, isl_error_internal,
2885
41
      "no solution found", goto error);
2886
41
  if (graph->n_total_row >= graph->max_row)
2887
41
    
isl_die0
(sol->ctx, isl_error_internal,
2888
41
      "too many schedule rows", goto error);
2889
41
2890
83
  
for (i = 0; 41
i < graph->n;
++i42
) {
2891
42
    struct isl_sched_node *node = &graph->node[i];
2892
42
    int pos;
2893
42
    int row = isl_mat_rows(node->sched);
2894
42
2895
42
    isl_vec_free(csol);
2896
42
    csol = extract_var_coef(node, sol);
2897
42
    if (!csol)
2898
0
      goto error;
2899
42
2900
42
    isl_map_free(node->sched_map);
2901
42
    node->sched_map = NULL;
2902
42
    node->sched = isl_mat_add_rows(node->sched, 1);
2903
42
    if (!node->sched)
2904
0
      goto error;
2905
42
    pos = node_cst_coef_offset(node);
2906
42
    node->sched = isl_mat_set_element(node->sched,
2907
42
          row, 0, sol->el[1 + pos]);
2908
42
    pos = node_par_coef_offset(node);
2909
53
    for (j = 0; j < node->nparam; 
++j11
)
2910
42
      node->sched = isl_mat_set_element(node->sched,
2911
11
          row, 1 + j, sol->el[1 + pos + j]);
2912
148
    for (j = 0; j < node->nvar; 
++j106
)
2913
106
      node->sched = isl_mat_set_element(node->sched,
2914
106
          row, 1 + node->nparam + j, csol->el[j]);
2915
42
    node->coincident[graph->n_total_row] = coincident;
2916
42
  }
2917
41
  isl_vec_free(sol);
2918
41
  isl_vec_free(csol);
2919
41
2920
41
  graph->n_row++;
2921
41
  graph->n_total_row++;
2922
41
2923
41
  return 0;
2924
0
error:
2925
0
  isl_vec_free(sol);
2926
0
  isl_vec_free(csol);
2927
0
  return -1;
2928
41
}
2929
2930
/* Convert row "row" of node->sched into an isl_aff living in "ls"
2931
 * and return this isl_aff.
2932
 */
2933
static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
2934
  struct isl_sched_node *node, int row)
2935
44
{
2936
44
  int j;
2937
44
  isl_int v;
2938
44
  isl_aff *aff;
2939
44
2940
44
  isl_int_init(v);
2941
44
2942
44
  aff = isl_aff_zero_on_domain(ls);
2943
44
  isl_mat_get_element(node->sched, row, 0, &v);
2944
44
  aff = isl_aff_set_constant(aff, v);
2945
55
  for (j = 0; j < node->nparam; 
++j11
) {
2946
11
    isl_mat_get_element(node->sched, row, 1 + j, &v);
2947
11
    aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
2948
11
  }
2949
152
  for (j = 0; j < node->nvar; 
++j108
) {
2950
108
    isl_mat_get_element(node->sched, row, 1 + node->nparam + j, &v);
2951
108
    aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
2952
108
  }
2953
44
2954
44
  isl_int_clear(v);
2955
44
2956
44
  return aff;
2957
44
}
2958
2959
/* Convert the "n" rows starting at "first" of node->sched into a multi_aff
2960
 * and return this multi_aff.
2961
 *
2962
 * The result is defined over the uncompressed node domain.
2963
 */
2964
static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
2965
  struct isl_sched_node *node, int first, int n)
2966
20
{
2967
20
  int i;
2968
20
  isl_space *space;
2969
20
  isl_local_space *ls;
2970
20
  isl_aff *aff;
2971
20
  isl_multi_aff *ma;
2972
20
  int nrow;
2973
20
2974
20
  if (!node)
2975
0
    return NULL;
2976
20
  nrow = isl_mat_rows(node->sched);
2977
20
  if (node->compressed)
2978
0
    space = isl_multi_aff_get_domain_space(node->decompress);
2979
20
  else
2980
20
    space = isl_space_copy(node->space);
2981
20
  ls = isl_local_space_from_space(isl_space_copy(space));
2982
20
  space = isl_space_from_domain(space);
2983
20
  space = isl_space_add_dims(space, isl_dim_out, n);
2984
20
  ma = isl_multi_aff_zero(space);
2985
20
2986
64
  for (i = first; i < first + n; 
++i44
) {
2987
44
    aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
2988
44
    ma = isl_multi_aff_set_aff(ma, i - first, aff);
2989
44
  }
2990
20
2991
20
  isl_local_space_free(ls);
2992
20
2993
20
  if (node->compressed)
2994
0
    ma = isl_multi_aff_pullback_multi_aff(ma,
2995
0
          isl_multi_aff_copy(node->compress));
2996
20
2997
20
  return ma;
2998
20
}
2999
3000
/* Convert node->sched into a multi_aff and return this multi_aff.
3001
 *
3002
 * The result is defined over the uncompressed node domain.
3003
 */
3004
static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
3005
  struct isl_sched_node *node)
3006
2
{
3007
2
  int nrow;
3008
2
3009
2
  nrow = isl_mat_rows(node->sched);
3010
2
  return node_extract_partial_schedule_multi_aff(node, 0, nrow);
3011
2
}
3012
3013
/* Convert node->sched into a map and return this map.
3014
 *
3015
 * The result is cached in node->sched_map, which needs to be released
3016
 * whenever node->sched is updated.
3017
 * It is defined over the uncompressed node domain.
3018
 */
3019
static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
3020
6
{
3021
6
  if (!node->sched_map) {
3022
2
    isl_multi_aff *ma;
3023
2
3024
2
    ma = node_extract_schedule_multi_aff(node);
3025
2
    node->sched_map = isl_map_from_multi_aff(ma);
3026
2
  }
3027
6
3028
6
  return isl_map_copy(node->sched_map);
3029
6
}
3030
3031
/* Construct a map that can be used to update a dependence relation
3032
 * based on the current schedule.
3033
 * That is, construct a map expressing that source and sink
3034
 * are executed within the same iteration of the current schedule.
3035
 * This map can then be intersected with the dependence relation.
3036
 * This is not the most efficient way, but this shouldn't be a critical
3037
 * operation.
3038
 */
3039
static __isl_give isl_map *specializer(struct isl_sched_node *src,
3040
  struct isl_sched_node *dst)
3041
3
{
3042
3
  isl_map *src_sched, *dst_sched;
3043
3
3044
3
  src_sched = node_extract_schedule(src);
3045
3
  dst_sched = node_extract_schedule(dst);
3046
3
  return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
3047
3
}
3048
3049
/* Intersect the domains of the nested relations in domain and range
3050
 * of "umap" with "map".
3051
 */
3052
static __isl_give isl_union_map *intersect_domains(
3053
  __isl_take isl_union_map *umap, __isl_keep isl_map *map)
3054
0
{
3055
0
  isl_union_set *uset;
3056
0
3057
0
  umap = isl_union_map_zip(umap);
3058
0
  uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
3059
0
  umap = isl_union_map_intersect_domain(umap, uset);
3060
0
  umap = isl_union_map_zip(umap);
3061
0
  return umap;
3062
0
}
3063
3064
/* Update the dependence relation of the given edge based
3065
 * on the current schedule.
3066
 * If the dependence is carried completely by the current schedule, then
3067
 * it is removed from the edge_tables.  It is kept in the list of edges
3068
 * as otherwise all edge_tables would have to be recomputed.
3069
 */
3070
static int update_edge(struct isl_sched_graph *graph,
3071
  struct isl_sched_edge *edge)
3072
3
{
3073
3
  int empty;
3074
3
  isl_map *id;
3075
3
3076
3
  id = specializer(edge->src, edge->dst);
3077
3
  edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
3078
3
  if (!edge->map)
3079
0
    goto error;
3080
3
3081
3
  if (edge->tagged_condition) {
3082
0
    edge->tagged_condition =
3083
0
      intersect_domains(edge->tagged_condition, id);
3084
0
    if (!edge->tagged_condition)
3085
0
      goto error;
3086
3
  }
3087
3
  if (edge->tagged_validity) {
3088
0
    edge->tagged_validity =
3089
0
      intersect_domains(edge->tagged_validity, id);
3090
0
    if (!edge->tagged_validity)
3091
0
      goto error;
3092
3
  }
3093
3
3094
3
  empty = isl_map_plain_is_empty(edge->map);
3095
3
  if (empty < 0)
3096
0
    goto error;
3097
3
  if (empty)
3098
2
    graph_remove_edge(graph, edge);
3099
3
3100
3
  isl_map_free(id);
3101
3
  return 0;
3102
0
error:
3103
0
  isl_map_free(id);
3104
0
  return -1;
3105
3
}
3106
3107
/* Does the domain of "umap" intersect "uset"?
3108
 */
3109
static int domain_intersects(__isl_keep isl_union_map *umap,
3110
  __isl_keep isl_union_set *uset)
3111
0
{
3112
0
  int empty;
3113
0
3114
0
  umap = isl_union_map_copy(umap);
3115
0
  umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
3116
0
  empty = isl_union_map_is_empty(umap);
3117
0
  isl_union_map_free(umap);
3118
0
3119
0
  return empty < 0 ? -1 : !empty;
3120
0
}
3121
3122
/* Does the range of "umap" intersect "uset"?
3123
 */
3124
static int range_intersects(__isl_keep isl_union_map *umap,
3125
  __isl_keep isl_union_set *uset)
3126
0
{
3127
0
  int empty;
3128
0
3129
0
  umap = isl_union_map_copy(umap);
3130
0
  umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
3131
0
  empty = isl_union_map_is_empty(umap);
3132
0
  isl_union_map_free(umap);
3133
0
3134
0
  return empty < 0 ? -1 : !empty;
3135
0
}
3136
3137
/* Are the condition dependences of "edge" local with respect to
3138
 * the current schedule?
3139
 *
3140
 * That is, are domain and range of the condition dependences mapped
3141
 * to the same point?
3142
 *
3143
 * In other words, is the condition false?
3144
 */
3145
static int is_condition_false(struct isl_sched_edge *edge)
3146
0
{
3147
0
  isl_union_map *umap;
3148
0
  isl_map *map, *sched, *test;
3149
0
  int empty, local;
3150
0
3151
0
  empty = isl_union_map_is_empty(edge->tagged_condition);
3152
0
  if (empty < 0 || empty)
3153
0
    return empty;
3154
0
3155
0
  umap = isl_union_map_copy(edge->tagged_condition);
3156
0
  umap = isl_union_map_zip(umap);
3157
0
  umap = isl_union_set_unwrap(isl_union_map_domain(umap));
3158
0
  map = isl_map_from_union_map(umap);
3159
0
3160
0
  sched = node_extract_schedule(edge->src);
3161
0
  map = isl_map_apply_domain(map, sched);
3162
0
  sched = node_extract_schedule(edge->dst);
3163
0
  map = isl_map_apply_range(map, sched);
3164
0
3165
0
  test = isl_map_identity(isl_map_get_space(map));
3166
0
  local = isl_map_is_subset(map, test);
3167
0
  isl_map_free(map);
3168
0
  isl_map_free(test);
3169
0
3170
0
  return local;
3171
0
}
3172
3173
/* For each conditional validity constraint that is adjacent
3174
 * to a condition with domain in condition_source or range in condition_sink,
3175
 * turn it into an unconditional validity constraint.
3176
 */
3177
static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
3178
  __isl_take isl_union_set *condition_source,
3179
  __isl_take isl_union_set *condition_sink)
3180
0
{
3181
0
  int i;
3182
0
3183
0
  condition_source = isl_union_set_coalesce(condition_source);
3184
0
  condition_sink = isl_union_set_coalesce(condition_sink);
3185
0
3186
0
  for (i = 0; i < graph->n_edge; ++i) {
3187
0
    int adjacent;
3188
0
    isl_union_map *validity;
3189
0
3190
0
    if (!is_conditional_validity(&graph->edge[i]))
3191
0
      continue;
3192
0
    if (is_validity(&graph->edge[i]))
3193
0
      continue;
3194
0
3195
0
    validity = graph->edge[i].tagged_validity;
3196
0
    adjacent = domain_intersects(validity, condition_sink);
3197
0
    if (adjacent >= 0 && !adjacent)
3198
0
      adjacent = range_intersects(validity, condition_source);
3199
0
    if (adjacent < 0)
3200
0
      goto error;
3201
0
    if (!adjacent)
3202
0
      continue;
3203
0
3204
0
    set_validity(&graph->edge[i]);
3205
0
  }
3206
0
3207
0
  isl_union_set_free(condition_source);
3208
0
  isl_union_set_free(condition_sink);
3209
0
  return 0;
3210
0
error:
3211
0
  isl_union_set_free(condition_source);
3212
0
  isl_union_set_free(condition_sink);
3213
0
  return -1;
3214
0
}
3215
3216
/* Update the dependence relations of all edges based on the current schedule
3217
 * and enforce conditional validity constraints that are adjacent
3218
 * to satisfied condition constraints.
3219
 *
3220
 * First check if any of the condition constraints are satisfied
3221
 * (i.e., not local to the outer schedule) and keep track of
3222
 * their domain and range.
3223
 * Then update all dependence relations (which removes the non-local
3224
 * constraints).
3225
 * Finally, if any condition constraints turned out to be satisfied,
3226
 * then turn all adjacent conditional validity constraints into
3227
 * unconditional validity constraints.
3228
 */
3229
static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
3230
1
{
3231
1
  int i;
3232
1
  int any = 0;
3233
1
  isl_union_set *source, *sink;
3234
1
3235
1
  source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3236
1
  sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3237
4
  for (i = 0; i < graph->n_edge; 
++i3
) {
3238
3
    int local;
3239
3
    isl_union_set *uset;
3240
3
    isl_union_map *umap;
3241
3
3242
3
    if (!is_condition(&graph->edge[i]))
3243
3
      continue;
3244
0
    if (is_local(&graph->edge[i]))
3245
0
      continue;
3246
0
    local = is_condition_false(&graph->edge[i]);
3247
0
    if (local < 0)
3248
0
      goto error;
3249
0
    if (local)
3250
0
      continue;
3251
0
3252
0
    any = 1;
3253
0
3254
0
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3255
0
    uset = isl_union_map_domain(umap);
3256
0
    source = isl_union_set_union(source, uset);
3257
0
3258
0
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3259
0
    uset = isl_union_map_range(umap);
3260
0
    sink = isl_union_set_union(sink, uset);
3261
0
  }
3262
1
3263
4
  
for (i = graph->n_edge - 1; 1
i >= 0;
--i3
) {
3264
3
    if (update_edge(graph, &graph->edge[i]) < 0)
3265
0
      goto error;
3266
3
  }
3267
1
3268
1
  if (any)
3269
0
    return unconditionalize_adjacent_validity(graph, source, sink);
3270
1
3271
1
  isl_union_set_free(source);
3272
1
  isl_union_set_free(sink);
3273
1
  return 0;
3274
0
error:
3275
0
  isl_union_set_free(source);
3276
0
  isl_union_set_free(sink);
3277
0
  return -1;
3278
1
}
3279
3280
static void next_band(struct isl_sched_graph *graph)
3281
1
{
3282
1
  graph->band_start = graph->n_total_row;
3283
1
}
3284
3285
/* Return the union of the universe domains of the nodes in "graph"
3286
 * that satisfy "pred".
3287
 */
3288
static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
3289
  struct isl_sched_graph *graph,
3290
  int (*pred)(struct isl_sched_node *node, int data), int data)
3291
12
{
3292
12
  int i;
3293
12
  isl_set *set;
3294
12
  isl_union_set *dom;
3295
12
3296
18
  for (i = 0; i < graph->n; 
++i6
)
3297
18
    if (pred(&graph->node[i], data))
3298
12
      break;
3299
12
3300
12
  if (i >= graph->n)
3301
12
    
isl_die0
(ctx, isl_error_internal,
3302
12
      "empty component", return NULL);
3303
12
3304
12
  set = isl_set_universe(isl_space_copy(graph->node[i].space));
3305
12
  dom = isl_union_set_from_set(set);
3306
12
3307
18
  for (i = i + 1; i < graph->n; 
++i6
) {
3308
6
    if (!pred(&graph->node[i], data))
3309
6
      continue;
3310
0
    set = isl_set_universe(isl_space_copy(graph->node[i].space));
3311
0
    dom = isl_union_set_union(dom, isl_union_set_from_set(set));
3312
0
  }
3313
12
3314
12
  return dom;
3315
12
}
3316
3317
/* Return a list of unions of universe domains, where each element
3318
 * in the list corresponds to an SCC (or WCC) indexed by node->scc.
3319
 */
3320
static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
3321
  struct isl_sched_graph *graph)
3322
6
{
3323
6
  int i;
3324
6
  isl_union_set_list *filters;
3325
6
3326
6
  filters = isl_union_set_list_alloc(ctx, graph->scc);
3327
18
  for (i = 0; i < graph->scc; 
++i12
) {
3328
12
    isl_union_set *dom;
3329
12
3330
12
    dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
3331
12
    filters = isl_union_set_list_add(filters, dom);
3332
12
  }
3333
6
3334
6
  return filters;
3335
6
}
3336
3337
/* Return a list of two unions of universe domains, one for the SCCs up
3338
 * to and including graph->src_scc and another for the other SCCs.
3339
 */
3340
static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
3341
  struct isl_sched_graph *graph)
3342
0
{
3343
0
  isl_union_set *dom;
3344
0
  isl_union_set_list *filters;
3345
0
3346
0
  filters = isl_union_set_list_alloc(ctx, 2);
3347
0
  dom = isl_sched_graph_domain(ctx, graph,
3348
0
          &node_scc_at_most, graph->src_scc);
3349
0
  filters = isl_union_set_list_add(filters, dom);
3350
0
  dom = isl_sched_graph_domain(ctx, graph,
3351
0
          &node_scc_at_least, graph->src_scc + 1);
3352
0
  filters = isl_union_set_list_add(filters, dom);
3353
0
3354
0
  return filters;
3355
0
}
3356
3357
/* Copy nodes that satisfy node_pred from the src dependence graph
3358
 * to the dst dependence graph.
3359
 */
3360
static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
3361
  int (*node_pred)(struct isl_sched_node *node, int data), int data)
3362
10
{
3363
10
  int i;
3364
10
3365
10
  dst->n = 0;
3366
30
  for (i = 0; i < src->n; 
++i20
) {
3367
20
    int j;
3368
20
3369
20
    if (!node_pred(&src->node[i], data))
3370
10
      continue;
3371
10
3372
10
    j = dst->n;
3373
10
    dst->node[j].space = isl_space_copy(src->node[i].space);
3374
10
    dst->node[j].compressed = src->node[i].compressed;
3375
10
    dst->node[j].hull = isl_set_copy(src->node[i].hull);
3376
10
    dst->node[j].compress =
3377
10
      isl_multi_aff_copy(src->node[i].compress);
3378
10
    dst->node[j].decompress =
3379
10
      isl_multi_aff_copy(src->node[i].decompress);
3380
10
    dst->node[j].nvar = src->node[i].nvar;
3381
10
    dst->node[j].nparam = src->node[i].nparam;
3382
10
    dst->node[j].sched = isl_mat_copy(src->node[i].sched);
3383
10
    dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
3384
10
    dst->node[j].coincident = src->node[i].coincident;
3385
10
    dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
3386
10
    dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds);
3387
10
    dst->node[j].max = isl_vec_copy(src->node[i].max);
3388
10
    dst->n++;
3389
10
3390
10
    if (!dst->node[j].space || !dst->node[j].sched)
3391
0
      return -1;
3392
10
    if (dst->node[j].compressed &&
3393
10
        
(0
!dst->node[j].hull0
||
!dst->node[j].compress0
||
3394
0
         !dst->node[j].decompress))
3395
0
      return -1;
3396
20
  }
3397
10
3398
10
  return 0;
3399
10
}
3400
3401
/* Copy non-empty edges that satisfy edge_pred from the src dependence graph
3402
 * to the dst dependence graph.
3403
 * If the source or destination node of the edge is not in the destination
3404
 * graph, then it must be a backward proximity edge and it should simply
3405
 * be ignored.
3406
 */
3407
static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
3408
  struct isl_sched_graph *src,
3409
  int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
3410
10
{
3411
10
  int i;
3412
10
  enum isl_edge_type t;
3413
10
3414
10
  dst->n_edge = 0;
3415
28
  for (i = 0; i < src->n_edge; 
++i18
) {
3416
18
    struct isl_sched_edge *edge = &src->edge[i];
3417
18
    isl_map *map;
3418
18
    isl_union_map *tagged_condition;
3419
18
    isl_union_map *tagged_validity;
3420
18
    struct isl_sched_node *dst_src, *dst_dst;
3421
18
3422
18
    if (!edge_pred(edge, data))
3423
14
      continue;
3424
4
3425
4
    if (isl_map_plain_is_empty(edge->map))
3426
0
      continue;
3427
4
3428
4
    dst_src = graph_find_node(ctx, dst, edge->src->space);
3429
4
    dst_dst = graph_find_node(ctx, dst, edge->dst->space);
3430
4
    if (!dst_src || !dst_dst) {
3431
0
      if (is_validity(edge) || is_conditional_validity(edge))
3432
0
        isl_die(ctx, isl_error_internal,
3433
0
          "backward (conditional) validity edge",
3434
0
          return -1);
3435
0
      continue;
3436
4
    }
3437
4
3438
4
    map = isl_map_copy(edge->map);
3439
4
    tagged_condition = isl_union_map_copy(edge->tagged_condition);
3440
4
    tagged_validity = isl_union_map_copy(edge->tagged_validity);
3441
4
3442
4
    dst->edge[dst->n_edge].src = dst_src;
3443
4
    dst->edge[dst->n_edge].dst = dst_dst;
3444
4
    dst->edge[dst->n_edge].map = map;
3445
4
    dst->edge[dst->n_edge].tagged_condition = tagged_condition;
3446
4
    dst->edge[dst->n_edge].tagged_validity = tagged_validity;
3447
4
    dst->edge[dst->n_edge].types = edge->types;
3448
4
    dst->n_edge++;
3449
4
3450
4
    if (edge->tagged_condition && 
!tagged_condition0
)
3451
0
      return -1;
3452
4
    if (edge->tagged_validity && 
!tagged_validity0
)
3453
0
      return -1;
3454
4
3455
24
    
for (t = isl_edge_first; 4
t <= isl_edge_last;
++t20
) {
3456
20
      if (edge !=
3457
20
          graph_find_edge(src, t, edge->src, edge->dst))
3458
8
        continue;
3459
12
      if (graph_edge_table_add(ctx, dst, t,
3460
12
              &dst->edge[dst->n_edge - 1]) < 0)
3461
0
        return -1;
3462
20
    }
3463
18
  }
3464
10
3465
10
  return 0;
3466
10
}
3467
3468
/* Compute the maximal number of variables over all nodes.
3469
 * This is the maximal number of linearly independent schedule
3470
 * rows that we need to compute.
3471
 * Just in case we end up in a part of the dependence graph
3472
 * with only lower-dimensional domains, we make sure we will
3473
 * compute the required amount of extra linearly independent rows.
3474
 */
3475
static int compute_maxvar(struct isl_sched_graph *graph)
3476
17
{
3477
17
  int i;
3478
17
3479
17
  graph->maxvar = 0;
3480
35
  for (i = 0; i < graph->n; 
++i18
) {
3481
18
    struct isl_sched_node *node = &graph->node[i];
3482
18
    int nvar;
3483
18
3484
18
    if (node_update_vmap(node) < 0)
3485
0
      return -1;
3486
18
    nvar = node->nvar + graph->n_row - node->rank;
3487
18
    if (nvar > graph->maxvar)
3488
17
      graph->maxvar = nvar;
3489
18
  }
3490
17
3491
17
  return 0;
3492
17
}
3493
3494
/* Extract the subgraph of "graph" that consists of the node satisfying
3495
 * "node_pred" and the edges satisfying "edge_pred" and store
3496
 * the result in "sub".
3497
 */
3498
static int extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
3499
  int (*node_pred)(struct isl_sched_node *node, int data),
3500
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3501
  int data, struct isl_sched_graph *sub)
3502
10
{
3503
10
  int i, n = 0, n_edge = 0;
3504
10
  int t;
3505
10
3506
30
  for (i = 0; i < graph->n; 
++i20
)
3507
20
    if (node_pred(&graph->node[i], data))
3508
10
      ++n;
3509
28
  for (i = 0; i < graph->n_edge; 
++i18
)
3510
18
    if (edge_pred(&graph->edge[i], data))
3511
4
      ++n_edge;
3512
10
  if (graph_alloc(ctx, sub, n, n_edge) < 0)
3513
0
    return -1;
3514
10
  sub->root = graph->root;
3515
10
  if (copy_nodes(sub, graph, node_pred, data) < 0)
3516
0
    return -1;
3517
10
  if (graph_init_table(ctx, sub) < 0)
3518
0
    return -1;
3519
60
  
for (t = 0; 10
t <= isl_edge_last;
++t50
)
3520
50
    sub->max_edge[t] = graph->max_edge[t];
3521
10
  if (graph_init_edge_tables(ctx, sub) < 0)
3522
0
    return -1;
3523
10
  if (copy_edges(ctx, sub, graph, edge_pred, data) < 0)
3524
0
    return -1;
3525
10
  sub->n_row = graph->n_row;
3526
10
  sub->max_row = graph->max_row;
3527
10
  sub->n_total_row = graph->n_total_row;
3528
10
  sub->band_start = graph->band_start;
3529
10
3530
10
  return 0;
3531
10
}
3532
3533
static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
3534
  struct isl_sched_graph *graph);
3535
static __isl_give isl_schedule_node *compute_schedule_wcc(
3536
  isl_schedule_node *node, struct isl_sched_graph *graph);
3537
3538
/* Compute a schedule for a subgraph of "graph".  In particular, for
3539
 * the graph composed of nodes that satisfy node_pred and edges that
3540
 * that satisfy edge_pred.
3541
 * If the subgraph is known to consist of a single component, then wcc should
3542
 * be set and then we call compute_schedule_wcc on the constructed subgraph.
3543
 * Otherwise, we call compute_schedule, which will check whether the subgraph
3544
 * is connected.
3545
 *
3546
 * The schedule is inserted at "node" and the updated schedule node
3547
 * is returned.
3548
 */
3549
static __isl_give isl_schedule_node *compute_sub_schedule(
3550
  __isl_take isl_schedule_node *node, isl_ctx *ctx,
3551
  struct isl_sched_graph *graph,
3552
  int (*node_pred)(struct isl_sched_node *node, int data),
3553
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3554
  int data, int wcc)
3555
10
{
3556
10
  struct isl_sched_graph split = { 0 };
3557
10
3558
10
  if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
3559
10
        &split) < 0)
3560
0
    goto error;
3561
10
3562
10
  if (wcc)
3563
10
    node = compute_schedule_wcc(node, &split);
3564
0
  else
3565
0
    node = compute_schedule(node, &split);
3566
10
3567
10
  graph_free(ctx, &split);
3568
10
  return node;
3569
0
error:
3570
0
  graph_free(ctx, &split);
3571
0
  return isl_schedule_node_free(node);
3572
10
}
3573
3574
static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
3575
36
{
3576
36
  return edge->src->scc == scc && 
edge->dst->scc == scc18
;
3577
36
}
3578
3579
static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
3580
0
{
3581
0
  return edge->dst->scc <= scc;
3582
0
}
3583
3584
static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
3585
0
{
3586
0
  return edge->src->scc >= scc;
3587
0
}
3588
3589
/* Reset the current band by dropping all its schedule rows.
3590
 */
3591
static int reset_band(struct isl_sched_graph *graph)
3592
0
{
3593
0
  int i;
3594
0
  int drop;
3595
0
3596
0
  drop = graph->n_total_row - graph->band_start;
3597
0
  graph->n_total_row -= drop;
3598
0
  graph->n_row -= drop;
3599
0
3600
0
  for (i = 0; i < graph->n; ++i) {
3601
0
    struct isl_sched_node *node = &graph->node[i];
3602
0
3603
0
    isl_map_free(node->sched_map);
3604
0
    node->sched_map = NULL;
3605
0
3606
0
    node->sched = isl_mat_drop_rows(node->sched,
3607
0
            graph->band_start, drop);
3608
0
3609
0
    if (!node->sched)
3610
0
      return -1;
3611
0
  }
3612
0
3613
0
  return 0;
3614
0
}
3615
3616
/* Split the current graph into two parts and compute a schedule for each
3617
 * part individually.  In particular, one part consists of all SCCs up
3618
 * to and including graph->src_scc, while the other part contains the other
3619
 * SCCs.  The split is enforced by a sequence node inserted at position "node"
3620
 * in the schedule tree.  Return the updated schedule node.
3621
 * If either of these two parts consists of a sequence, then it is spliced
3622
 * into the sequence containing the two parts.
3623
 *
3624
 * The current band is reset. It would be possible to reuse
3625
 * the previously computed rows as the first rows in the next
3626
 * band, but recomputing them may result in better rows as we are looking
3627
 * at a smaller part of the dependence graph.
3628
 */
3629
static __isl_give isl_schedule_node *compute_split_schedule(
3630
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3631
0
{
3632
0
  int is_seq;
3633
0
  isl_ctx *ctx;
3634
0
  isl_union_set_list *filters;
3635
0
3636
0
  if (!node)
3637
0
    return NULL;
3638
0
3639
0
  if (reset_band(graph) < 0)
3640
0
    return isl_schedule_node_free(node);
3641
0
3642
0
  next_band(graph);
3643
0
3644
0
  ctx = isl_schedule_node_get_ctx(node);
3645
0
  filters = extract_split(ctx, graph);
3646
0
  node = isl_schedule_node_insert_sequence(node, filters);
3647
0
  node = isl_schedule_node_child(node, 1);
3648
0
  node = isl_schedule_node_child(node, 0);
3649
0
3650
0
  node = compute_sub_schedule(node, ctx, graph,
3651
0
        &node_scc_at_least, &edge_src_scc_at_least,
3652
0
        graph->src_scc + 1, 0);
3653
0
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3654
0
  node = isl_schedule_node_parent(node);
3655
0
  node = isl_schedule_node_parent(node);
3656
0
  if (is_seq)
3657
0
    node = isl_schedule_node_sequence_splice_child(node, 1);
3658
0
  node = isl_schedule_node_child(node, 0);
3659
0
  node = isl_schedule_node_child(node, 0);
3660
0
  node = compute_sub_schedule(node, ctx, graph,
3661
0
        &node_scc_at_most, &edge_dst_scc_at_most,
3662
0
        graph->src_scc, 0);
3663
0
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3664
0
  node = isl_schedule_node_parent(node);
3665
0
  node = isl_schedule_node_parent(node);
3666
0
  if (is_seq)
3667
0
    node = isl_schedule_node_sequence_splice_child(node, 0);
3668
0
3669
0
  return node;
3670
0
}
3671
3672
/* Insert a band node at position "node" in the schedule tree corresponding
3673
 * to the current band in "graph".  Mark the band node permutable
3674
 * if "permutable" is set.
3675
 * The partial schedules and the coincidence property are extracted
3676
 * from the graph nodes.
3677
 * Return the updated schedule node.
3678
 */
3679
static __isl_give isl_schedule_node *insert_current_band(
3680
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3681
  int permutable)
3682
17
{
3683
17
  int i;
3684
17
  int start, end, n;
3685
17
  isl_multi_aff *ma;
3686
17
  isl_multi_pw_aff *mpa;
3687
17
  isl_multi_union_pw_aff *mupa;
3688
17
3689
17
  if (!node)
3690
0
    return NULL;
3691
17
3692
17
  if (graph->n < 1)
3693
17
    
isl_die0
(isl_schedule_node_get_ctx(node), isl_error_internal,
3694
17
      "graph should have at least one node",
3695
17
      return isl_schedule_node_free(node));
3696
17
3697
17
  start = graph->band_start;
3698
17
  end = graph->n_total_row;
3699
17
  n = end - start;
3700
17
3701
17
  ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
3702
17
  mpa = isl_multi_pw_aff_from_multi_aff(ma);
3703
17
  mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3704
17
3705
18
  for (i = 1; i < graph->n; 
++i1
) {
3706
1
    isl_multi_union_pw_aff *mupa_i;
3707
1
3708
1
    ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
3709
1
                start, n);
3710
1
    mpa = isl_multi_pw_aff_from_multi_aff(ma);
3711
1
    mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3712
1
    mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
3713
1
  }
3714
17
  node = isl_schedule_node_insert_partial_schedule(node, mupa);
3715
17
3716
58
  for (i = 0; i < n; 
++i41
)
3717
41
    node = isl_schedule_node_band_member_set_coincident(node, i,
3718
41
          graph->node[0].coincident[start + i]);
3719
17
  node = isl_schedule_node_band_set_permutable(node, permutable);
3720
17
3721
17
  return node;
3722
17
}
3723
3724
/* Update the dependence relations based on the current schedule,
3725
 * add the current band to "node" and then continue with the computation
3726
 * of the next band.
3727
 * Return the updated schedule node.
3728
 */
3729
static __isl_give isl_schedule_node *compute_next_band(
3730
  __isl_take isl_schedule_node *node,
3731
  struct isl_sched_graph *graph, int permutable)
3732
0
{
3733
0
  isl_ctx *ctx;
3734
0
3735
0
  if (!node)
3736
0
    return NULL;
3737
0
3738
0
  ctx = isl_schedule_node_get_ctx(node);
3739
0
  if (update_edges(ctx, graph) < 0)
3740
0
    return isl_schedule_node_free(node);
3741
0
  node = insert_current_band(node, graph, permutable);
3742
0
  next_band(graph);
3743
0
3744
0
  node = isl_schedule_node_child(node, 0);
3745
0
  node = compute_schedule(node, graph);
3746
0
  node = isl_schedule_node_parent(node);
3747
0
3748
0
  return node;
3749
0
}
3750
3751
/* Add the constraints "coef" derived from an edge from "node" to itself
3752
 * to graph->lp in order to respect the dependences and to try and carry them.
3753
 * "pos" is the sequence number of the edge that needs to be carried.
3754
 * "coef" represents general constraints on coefficients (c_0, c_x)
3755
 * of valid constraints for (y - x) with x and y instances of the node.
3756
 *
3757
 * The constraints added to graph->lp need to enforce
3758
 *
3759
 *  (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
3760
 *  = c_j_x (y - x) >= e_i
3761
 *
3762
 * for each (x,y) in the dependence relation of the edge.
3763
 * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
3764
 * taking into account that each coefficient in c_j_x is represented
3765
 * as a pair of non-negative coefficients.
3766
 */
3767
static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
3768
  struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
3769
0
{
3770
0
  int offset;
3771
0
  isl_ctx *ctx;
3772
0
  isl_dim_map *dim_map;
3773
0
3774
0
  if (!coef)
3775
0
    return isl_stat_error;
3776
0
3777
0
  ctx = isl_basic_set_get_ctx(coef);
3778
0
  offset = coef_var_offset(coef);
3779
0
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
3780
0
  isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3781
0
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3782
0
3783
0
  return isl_stat_ok;
3784
0
}
3785
3786
/* Add the constraints "coef" derived from an edge from "src" to "dst"
3787
 * to graph->lp in order to respect the dependences and to try and carry them.
3788
 * "pos" is the sequence number of the edge that needs to be carried or
3789
 * -1 if no attempt should be made to carry the dependences.
3790
 * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
3791
 * of valid constraints for (x, y) with x and y instances of "src" and "dst".
3792
 *
3793
 * The constraints added to graph->lp need to enforce
3794
 *
3795
 *  (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
3796
 *
3797
 * for each (x,y) in the dependence relation of the edge or
3798
 *
3799
 *  (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
3800
 *
3801
 * if pos is -1.
3802
 * That is,
3803
 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3804
 * or
3805
 * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3806
 * needs to be plugged in for (c_0, c_n, c_x, c_y),
3807
 * taking into account that each coefficient in c_j_x and c_k_x is represented
3808
 * as a pair of non-negative coefficients.
3809
 */
3810
static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
3811
  struct isl_sched_node *src, struct isl_sched_node *dst,
3812
  __isl_take isl_basic_set *coef, int pos)
3813
0
{
3814
0
  int offset;
3815
0
  isl_ctx *ctx;
3816
0
  isl_dim_map *dim_map;
3817
0
3818
0
  if (!coef)
3819
0
    return isl_stat_error;
3820
0
3821
0
  ctx = isl_basic_set_get_ctx(coef);
3822
0
  offset = coef_var_offset(coef);
3823
0
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
3824
0
  if (pos >= 0)
3825
0
    isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3826
0
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3827
0
3828
0
  return isl_stat_ok;
3829
0
}
3830
3831
/* Data structure for keeping track of the data needed
3832
 * to exploit non-trivial lineality spaces.
3833
 *
3834
 * "any_non_trivial" is true if there are any non-trivial lineality spaces.
3835
 * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
3836
 * "equivalent" connects instances to other instances on the same line(s).
3837
 * "mask" contains the domain spaces of "equivalent".
3838
 * Any instance set not in "mask" does not have a non-trivial lineality space.
3839
 */
3840
struct isl_exploit_lineality_data {
3841
  isl_bool any_non_trivial;
3842
  isl_union_map *equivalent;
3843
  isl_union_set *mask;
3844
};
3845
3846
/* Data structure collecting information used during the construction
3847
 * of an LP for carrying dependences.
3848
 *
3849
 * "intra" is a sequence of coefficient constraints for intra-node edges.
3850
 * "inter" is a sequence of coefficient constraints for inter-node edges.
3851
 * "lineality" contains data used to exploit non-trivial lineality spaces.
3852
 */
3853
struct isl_carry {
3854
  isl_basic_set_list *intra;
3855
  isl_basic_set_list *inter;
3856
  struct isl_exploit_lineality_data lineality;
3857
};
3858
3859
/* Free all the data stored in "carry".
3860
 */
3861
static void isl_carry_clear(struct isl_carry *carry)
3862
0
{
3863
0
  isl_basic_set_list_free(carry->intra);
3864
0
  isl_basic_set_list_free(carry->inter);
3865
0
  isl_union_map_free(carry->lineality.equivalent);
3866
0
  isl_union_set_free(carry->lineality.mask);
3867
0
}
3868
3869
/* Return a pointer to the node in "graph" that lives in "space".
3870
 * If the requested node has been compressed, then "space"
3871
 * corresponds to the compressed space.
3872
 *
3873
 * First try and see if "space" is the space of an uncompressed node.
3874
 * If so, return that node.
3875
 * Otherwise, "space" was constructed by construct_compressed_id and
3876
 * contains a user pointer pointing to the node in the tuple id.
3877
 * However, this node belongs to the original dependence graph.
3878
 * If "graph" is a subgraph of this original dependence graph,
3879
 * then the node with the same space still needs to be looked up
3880
 * in the current graph.
3881
 */
3882
static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
3883
  struct isl_sched_graph *graph, __isl_keep isl_space *space)
3884
0
{
3885
0
  isl_id *id;
3886
0
  struct isl_sched_node *node;
3887
0
3888
0
  if (!space)
3889
0
    return NULL;
3890
0
3891
0
  node = graph_find_node(ctx, graph, space);
3892
0
  if (node)
3893
0
    return node;
3894
0
3895
0
  id = isl_space_get_tuple_id(space, isl_dim_set);
3896
0
  node = isl_id_get_user(id);
3897
0
  isl_id_free(id);
3898
0
3899
0
  if (!node)
3900
0
    return NULL;
3901
0
3902
0
  if (!is_node(graph->root, node))
3903
0
    isl_die(ctx, isl_error_internal,
3904
0
      "space points to invalid node", return NULL);
3905
0
  if (graph != graph->root)
3906
0
    node = graph_find_node(ctx, graph, node->space);
3907
0
3908
0
  return node;
3909
0
}
3910
3911
/* Internal data structure for add_all_constraints.
3912
 *
3913
 * "graph" is the schedule constraint graph for which an LP problem
3914
 * is being constructed.
3915
 * "carry_inter" indicates whether inter-node edges should be carried.
3916
 * "pos" is the position of the next edge that needs to be carried.
3917
 */
3918
struct isl_add_all_constraints_data {
3919
  isl_ctx *ctx;
3920
  struct isl_sched_graph *graph;
3921
  int carry_inter;
3922
  int pos;
3923
};
3924
3925
/* Add the constraints "coef" derived from an edge from a node to itself
3926
 * to data->graph->lp in order to respect the dependences and
3927
 * to try and carry them.
3928
 *
3929
 * The space of "coef" is of the form
3930
 *
3931
 *  coefficients[[c_cst] -> S[c_x]]
3932
 *
3933
 * with S[c_x] the (compressed) space of the node.
3934
 * Extract the node from the space and call add_intra_constraints.
3935
 */
3936
static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
3937
0
{
3938
0
  struct isl_add_all_constraints_data *data = user;
3939
0
  isl_space *space;
3940
0
  struct isl_sched_node *node;
3941
0
3942
0
  space = isl_basic_set_get_space(coef);
3943
0
  space = isl_space_range(isl_space_unwrap(space));
3944
0
  node = graph_find_compressed_node(data->ctx, data->graph, space);
3945
0
  isl_space_free(space);
3946
0
  return add_intra_constraints(data->graph, node, coef, data->pos++);
3947
0
}
3948
3949
/* Add the constraints "coef" derived from an edge from a node j
3950
 * to a node k to data->graph->lp in order to respect the dependences and
3951
 * to try and carry them (provided data->carry_inter is set).
3952
 *
3953
 * The space of "coef" is of the form
3954
 *
3955
 *  coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
3956
 *
3957
 * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
3958
 * Extract the nodes from the space and call add_inter_constraints.
3959
 */
3960
static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
3961
0
{
3962
0
  struct isl_add_all_constraints_data *data = user;
3963
0
  isl_space *space, *dom;
3964
0
  struct isl_sched_node *src, *dst;
3965
0
  int pos;
3966
0
3967
0
  space = isl_basic_set_get_space(coef);
3968
0
  space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
3969
0
  dom = isl_space_domain(isl_space_copy(space));
3970
0
  src = graph_find_compressed_node(data->ctx, data->graph, dom);
3971
0
  isl_space_free(dom);
3972
0
  space = isl_space_range(space);
3973
0
  dst = graph_find_compressed_node(data->ctx, data->graph, space);
3974
0
  isl_space_free(space);
3975
0
3976
0
  pos = data->carry_inter ? data->pos++ : -1;
3977
0
  return add_inter_constraints(data->graph, src, dst, coef, pos);
3978
0
}
3979
3980
/* Add constraints to graph->lp that force all (conditional) validity
3981
 * dependences to be respected and attempt to carry them.
3982
 * "intra" is the sequence of coefficient constraints for intra-node edges.
3983
 * "inter" is the sequence of coefficient constraints for inter-node edges.
3984
 * "carry_inter" indicates whether inter-node edges should be carried or
3985
 * only respected.
3986
 */
3987
static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
3988
  __isl_keep isl_basic_set_list *intra,
3989
  __isl_keep isl_basic_set_list *inter, int carry_inter)
3990
0
{
3991
0
  struct isl_add_all_constraints_data data = { ctx, graph, carry_inter };
3992
0
3993
0
  data.pos = 0;
3994
0
  if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
3995
0
    return isl_stat_error;
3996
0
  if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0)
3997
0
    return isl_stat_error;
3998
0
  return isl_stat_ok;
3999
0
}
4000
4001
/* Internal data structure for count_all_constraints
4002
 * for keeping track of the number of equality and inequality constraints.
4003
 */
4004
struct isl_sched_count {
4005
  int n_eq;
4006
  int n_ineq;
4007
};
4008
4009
/* Add the number of equality and inequality constraints of "bset"
4010
 * to data->n_eq and data->n_ineq.
4011
 */
4012
static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
4013
0
{
4014
0
  struct isl_sched_count *data = user;
4015
0
4016
0
  return update_count(bset, 1, &data->n_eq, &data->n_ineq);
4017
0
}
4018
4019
/* Count the number of equality and inequality constraints
4020
 * that will be added to the carry_lp problem.
4021
 * We count each edge exactly once.
4022
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4023
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4024
 */
4025
static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
4026
  __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
4027
0
{
4028
0
  struct isl_sched_count data;
4029
0
4030
0
  data.n_eq = data.n_ineq = 0;
4031
0
  if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
4032
0
    return isl_stat_error;
4033
0
  if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0)
4034
0
    return isl_stat_error;
4035
0
4036
0
  *n_eq = data.n_eq;
4037
0
  *n_ineq = data.n_ineq;
4038
0
4039
0
  return isl_stat_ok;
4040
0
}
4041
4042
/* Construct an LP problem for finding schedule coefficients
4043
 * such that the schedule carries as many validity dependences as possible.
4044
 * In particular, for each dependence i, we bound the dependence distance
4045
 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
4046
 * of all e_i's.  Dependences with e_i = 0 in the solution are simply
4047
 * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
4048
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4049
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4050
 * "n_edge" is the total number of edges.
4051
 * "carry_inter" indicates whether inter-node edges should be carried or
4052
 * only respected.  That is, if "carry_inter" is not set, then
4053
 * no e_i variables are introduced for the inter-node edges.
4054
 *
4055
 * All variables of the LP are non-negative.  The actual coefficients
4056
 * may be negative, so each coefficient is represented as the difference
4057
 * of two non-negative variables.  The negative part always appears
4058
 * immediately before the positive part.
4059
 * Other than that, the variables have the following order
4060
 *
4061
 *  - sum of (1 - e_i) over all edges
4062
 *  - sum of all c_n coefficients
4063
 *    (unconstrained when computing non-parametric schedules)
4064
 *  - sum of positive and negative parts of all c_x coefficients
4065
 *  - for each edge
4066
 *    - e_i
4067
 *  - for each node
4068
 *    - positive and negative parts of c_i_x, in opposite order
4069
 *    - c_i_n (if parametric)
4070
 *    - c_i_0
4071
 *
4072
 * The constraints are those from the (validity) edges plus three equalities
4073
 * to express the sums and n_edge inequalities to express e_i <= 1.
4074
 */
4075
static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
4076
  int n_edge, __isl_keep isl_basic_set_list *intra,
4077
  __isl_keep isl_basic_set_list *inter, int carry_inter)
4078
0
{
4079
0
  int i;
4080
0
  int k;
4081
0
  isl_space *dim;
4082
0
  unsigned total;
4083
0
  int n_eq, n_ineq;
4084
0
4085
0
  total = 3 + n_edge;
4086
0
  for (i = 0; i < graph->n; ++i) {
4087
0
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
4088
0
    node->start = total;
4089
0
    total += 1 + node->nparam + 2 * node->nvar;
4090
0
  }
4091
0
4092
0
  if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
4093
0
    return isl_stat_error;
4094
0
4095
0
  dim = isl_space_set_alloc(ctx, 0, total);
4096
0
  isl_basic_set_free(graph->lp);
4097
0
  n_eq += 3;
4098
0
  n_ineq += n_edge;
4099
0
  graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
4100
0
  graph->lp = isl_basic_set_set_rational(graph->lp);
4101
0
4102
0
  k = isl_basic_set_alloc_equality(graph->lp);
4103
0
  if (k < 0)
4104
0
    return isl_stat_error;
4105
0
  isl_seq_clr(graph->lp->eq[k], 1 + total);
4106
0
  isl_int_set_si(graph->lp->eq[k][0], -n_edge);
4107
0
  isl_int_set_si(graph->lp->eq[k][1], 1);
4108
0
  for (i = 0; i < n_edge; ++i)
4109
0
    isl_int_set_si(graph->lp->eq[k][4 + i], 1);
4110
0
4111
0
  if (add_param_sum_constraint(graph, 1) < 0)
4112
0
    return isl_stat_error;
4113
0
  if (add_var_sum_constraint(graph, 2) < 0)
4114
0
    return isl_stat_error;
4115
0
4116
0
  for (i = 0; i < n_edge; ++i) {
4117
0
    k = isl_basic_set_alloc_inequality(graph->lp);
4118
0
    if (k < 0)
4119
0
      return isl_stat_error;
4120
0
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
4121
0
    isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
4122
0
    isl_int_set_si(graph->lp->ineq[k][0], 1);
4123
0
  }
4124
0
4125
0
  if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0)
4126
0
    return isl_stat_error;
4127
0
4128
0
  return isl_stat_ok;
4129
0
}
4130
4131
static __isl_give isl_schedule_node *compute_component_schedule(
4132
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4133
  int wcc);
4134
4135
/* If the schedule_split_scaled option is set and if the linear
4136
 * parts of the scheduling rows for all nodes in the graphs have
4137
 * a non-trivial common divisor, then remove this
4138
 * common divisor from the linear part.
4139
 * Otherwise, insert a band node directly and continue with
4140
 * the construction of the schedule.
4141
 *
4142
 * If a non-trivial common divisor is found, then
4143
 * the linear part is reduced and the remainder is ignored.
4144
 * The pieces of the graph that are assigned different remainders
4145
 * form (groups of) strongly connected components within
4146
 * the scaled down band.  If needed, they can therefore
4147
 * be ordered along this remainder in a sequence node.
4148
 * However, this ordering is not enforced here in order to allow
4149
 * the scheduler to combine some of the strongly connected components.
4150
 */
4151
static __isl_give isl_schedule_node *split_scaled(
4152
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4153
0
{
4154
0
  int i;
4155
0
  int row;
4156
0
  isl_ctx *ctx;
4157
0
  isl_int gcd, gcd_i;
4158
0
4159
0
  if (!node)
4160
0
    return NULL;
4161
0
4162
0
  ctx = isl_schedule_node_get_ctx(node);
4163
0
  if (!ctx->opt->schedule_split_scaled)
4164
0
    return compute_next_band(node, graph, 0);
4165
0
  if (graph->n <= 1)
4166
0
    return compute_next_band(node, graph, 0);
4167
0
4168
0
  isl_int_init(gcd);
4169
0
  isl_int_init(gcd_i);
4170
0
4171
0
  isl_int_set_si(gcd, 0);
4172
0
4173
0
  row = isl_mat_rows(graph->node[0].sched) - 1;
4174
0
4175
0
  for (i = 0; i < graph->n; ++i) {
4176
0
    struct isl_sched_node *node = &graph->node[i];
4177
0
    int cols = isl_mat_cols(node->sched);
4178
0
4179
0
    isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
4180
0
    isl_int_gcd(gcd, gcd, gcd_i);
4181
0
  }
4182
0
4183
0
  isl_int_clear(gcd_i);
4184
0
4185
0
  if (isl_int_cmp_si(gcd, 1) <= 0) {
4186
0
    isl_int_clear(gcd);
4187
0
    return compute_next_band(node, graph, 0);
4188
0
  }
4189
0
4190
0
  for (i = 0; i < graph->n; ++i) {
4191
0
    struct isl_sched_node *node = &graph->node[i];
4192
0
4193
0
    isl_int_fdiv_q(node->sched->row[row][0],
4194
0
             node->sched->row[row][0], gcd);
4195
0
    isl_int_mul(node->sched->row[row][0],
4196
0
          node->sched->row[row][0], gcd);
4197
0
    node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
4198
0
    if (!node->sched)
4199
0
      goto error;
4200
0
  }
4201
0
4202
0
  isl_int_clear(gcd);
4203
0
4204
0
  return compute_next_band(node, graph, 0);
4205
0
error:
4206
0
  isl_int_clear(gcd);
4207
0
  return isl_schedule_node_free(node);
4208
0
}
4209
4210
/* Is the schedule row "sol" trivial on node "node"?
4211
 * That is, is the solution zero on the dimensions linearly independent of
4212
 * the previously found solutions?
4213
 * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
4214
 *
4215
 * Each coefficient is represented as the difference between
4216
 * two non-negative values in "sol".
4217
 * We construct the schedule row s and check if it is linearly
4218
 * independent of previously computed schedule rows
4219
 * by computing T s, with T the linear combinations that are zero
4220
 * on linearly dependent schedule rows.
4221
 * If the result consists of all zeros, then the solution is trivial.
4222
 */
4223
static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
4224
0
{
4225
0
  int trivial;
4226
0
  isl_vec *node_sol;
4227
0
4228
0
  if (!sol)
4229
0
    return -1;
4230
0
  if (node->nvar == node->rank)
4231
0
    return 0;
4232
0
4233
0
  node_sol = extract_var_coef(node, sol);
4234
0
  node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol);
4235
0
  if (!node_sol)
4236
0
    return -1;
4237
0
4238
0
  trivial = isl_seq_first_non_zero(node_sol->el,
4239
0
          node->nvar - node->rank) == -1;
4240
0
4241
0
  isl_vec_free(node_sol);
4242
0
4243
0
  return trivial;
4244
0
}
4245
4246
/* Is the schedule row "sol" trivial on any node where it should
4247
 * not be trivial?
4248
 * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
4249
 */
4250
static int is_any_trivial(struct isl_sched_graph *graph,
4251
  __isl_keep isl_vec *sol)
4252
0
{
4253
0
  int i;
4254
0
4255
0
  for (i = 0; i < graph->n; ++i) {
4256
0
    struct isl_sched_node *node = &graph->node[i];
4257
0
    int trivial;
4258
0
4259
0
    if (!needs_row(graph, node))
4260
0
      continue;
4261
0
    trivial = is_trivial(node, sol);
4262
0
    if (trivial < 0 || trivial)
4263
0
      return trivial;
4264
0
  }
4265
0
4266
0
  return 0;
4267
0
}
4268
4269
/* Does the schedule represented by "sol" perform loop coalescing on "node"?
4270
 * If so, return the position of the coalesced dimension.
4271
 * Otherwise, return node->nvar or -1 on error.
4272
 *
4273
 * In particular, look for pairs of coefficients c_i and c_j such that
4274
 * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
4275
 * If any such pair is found, then return i.
4276
 * If size_i is infinity, then no check on c_i needs to be performed.
4277
 */
4278
static int find_node_coalescing(struct isl_sched_node *node,
4279
  __isl_keep isl_vec *sol)
4280
0
{
4281
0
  int i, j;
4282
0
  isl_int max;
4283
0
  isl_vec *csol;
4284
0
4285
0
  if (node->nvar <= 1)
4286
0
    return node->nvar;
4287
0
4288
0
  csol = extract_var_coef(node, sol);
4289
0
  if (!csol)
4290
0
    return -1;
4291
0
  isl_int_init(max);
4292
0
  for (i = 0; i < node->nvar; ++i) {
4293
0
    isl_val *v;
4294
0
4295
0
    if (isl_int_is_zero(csol->el[i]))
4296
0
      continue;
4297
0
    v = isl_multi_val_get_val(node->sizes, i);
4298
0
    if (!v)
4299
0
      goto error;
4300
0
    if (!isl_val_is_int(v)) {
4301
0
      isl_val_free(v);
4302
0
      continue;
4303
0
    }
4304
0
    v = isl_val_div_ui(v, 2);
4305
0
    v = isl_val_ceil(v);
4306
0
    if (!v)
4307
0
      goto error;
4308
0
    isl_int_mul(max, v->n, csol->el[i]);
4309
0
    isl_val_free(v);
4310
0
4311
0
    for (j = 0; j < node->nvar; ++j) {
4312
0
      if (j == i)
4313
0
        continue;
4314
0
      if (isl_int_abs_gt(csol->el[j], max))
4315
0
        break;
4316
0
    }
4317
0
    if (j < node->nvar)
4318
0
      break;
4319
0
  }
4320
0
4321
0
  isl_int_clear(max);
4322
0
  isl_vec_free(csol);
4323
0
  return i;
4324
0
error:
4325
0
  isl_int_clear(max);
4326
0
  isl_vec_free(csol);
4327
0
  return -1;
4328
0
}
4329
4330
/* Force the schedule coefficient at position "pos" of "node" to be zero
4331
 * in "tl".
4332
 * The coefficient is encoded as the difference between two non-negative
4333
 * variables.  Force these two variables to have the same value.
4334
 */
4335
static __isl_give isl_tab_lexmin *zero_out_node_coef(
4336
  __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
4337
0
{
4338
0
  int dim;
4339
0
  isl_ctx *ctx;
4340
0
  isl_vec *eq;
4341
0
4342
0
  ctx = isl_space_get_ctx(node->space);
4343
0
  dim = isl_tab_lexmin_dim(tl);
4344
0
  if (dim < 0)
4345
0
    return isl_tab_lexmin_free(tl);
4346
0
  eq = isl_vec_alloc(ctx, 1 + dim);
4347
0
  eq = isl_vec_clr(eq);
4348
0
  if (!eq)
4349
0
    return isl_tab_lexmin_free(tl);
4350
0
4351
0
  pos = 1 + node_var_coef_pos(node, pos);
4352
0
  isl_int_set_si(eq->el[pos], 1);
4353
0
  isl_int_set_si(eq->el[pos + 1], -1);
4354
0
  tl = isl_tab_lexmin_add_eq(tl, eq->el);
4355
0
  isl_vec_free(eq);
4356
0
4357
0
  return tl;
4358
0
}
4359
4360
/* Return the lexicographically smallest rational point in the basic set
4361
 * from which "tl" was constructed, double checking that this input set
4362
 * was not empty.
4363
 */
4364
static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
4365
0
{
4366
0
  isl_vec *sol;
4367
0
4368
0
  sol = isl_tab_lexmin_get_solution(tl);
4369
0
  if (!sol)
4370
0
    return NULL;
4371
0
  if (sol->size == 0)
4372
0
    isl_die(isl_vec_get_ctx(sol), isl_error_internal,
4373
0
      "error in schedule construction",
4374
0
      return isl_vec_free(sol));
4375
0
  return sol;
4376
0
}
4377
4378
/* Does the solution "sol" of the LP problem constructed by setup_carry_lp
4379
 * carry any of the "n_edge" groups of dependences?
4380
 * The value in the first position is the sum of (1 - e_i) over all "n_edge"
4381
 * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
4382
 * by the edge are carried by the solution.
4383
 * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
4384
 * one of those is carried.
4385
 *
4386
 * Note that despite the fact that the problem is solved using a rational
4387
 * solver, the solution is guaranteed to be integral.
4388
 * Specifically, the dependence distance lower bounds e_i (and therefore
4389
 * also their sum) are integers.  See Lemma 5 of [1].
4390
 *
4391
 * Any potential denominator of the sum is cleared by this function.
4392
 * The denominator is not relevant for any of the other elements
4393
 * in the solution.
4394
 *
4395
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4396
 *     Problem, Part II: Multi-Dimensional Time.
4397
 *     In Intl. Journal of Parallel Programming, 1992.
4398
 */
4399
static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
4400
0
{
4401
0
  isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
4402
0
  isl_int_set_si(sol->el[0], 1);
4403
0
  return isl_int_cmp_si(sol->el[1], n_edge) < 0;
4404
0
}
4405
4406
/* Return the lexicographically smallest rational point in "lp",
4407
 * assuming that all variables are non-negative and performing some
4408
 * additional sanity checks.
4409
 * If "want_integral" is set, then compute the lexicographically smallest
4410
 * integer point instead.
4411
 * In particular, "lp" should not be empty by construction.
4412
 * Double check that this is the case.
4413
 * If dependences are not carried for any of the "n_edge" edges,
4414
 * then return an empty vector.
4415
 *
4416
 * If the schedule_treat_coalescing option is set and
4417
 * if the computed schedule performs loop coalescing on a given node,
4418
 * i.e., if it is of the form
4419
 *
4420
 *  c_i i + c_j j + ...
4421
 *
4422
 * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
4423
 * to cut out this solution.  Repeat this process until no more loop
4424
 * coalescing occurs or until no more dependences can be carried.
4425
 * In the latter case, revert to the previously computed solution.
4426
 *
4427
 * If the caller requests an integral solution and if coalescing should
4428
 * be treated, then perform the coalescing treatment first as
4429
 * an integral solution computed before coalescing treatment
4430
 * would carry the same number of edges and would therefore probably
4431
 * also be coalescing.
4432
 *
4433
 * To allow the coalescing treatment to be performed first,
4434
 * the initial solution is allowed to be rational and it is only
4435
 * cut out (if needed) in the next iteration, if no coalescing measures
4436
 * were taken.
4437
 */
4438
static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
4439
  __isl_take isl_basic_set *lp, int n_edge, int want_integral)
4440
0
{
4441
0
  int i, pos, cut;
4442
0
  isl_ctx *ctx;
4443
0
  isl_tab_lexmin *tl;
4444
0
  isl_vec *sol, *prev = NULL;
4445
0
  int treat_coalescing;
4446
0
4447
0
  if (!lp)
4448
0
    return NULL;
4449
0
  ctx = isl_basic_set_get_ctx(lp);
4450
0
  treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
4451
0
  tl = isl_tab_lexmin_from_basic_set(lp);
4452
0
4453
0
  cut = 0;
4454
0
  do {
4455
0
    int integral;
4456
0
4457
0
    if (cut)
4458
0
      tl = isl_tab_lexmin_cut_to_integer(tl);
4459
0
    sol = non_empty_solution(tl);
4460
0
    if (!sol)
4461
0
      goto error;
4462
0
4463
0
    integral = isl_int_is_one(sol->el[0]);
4464
0
    if (!carries_dependences(sol, n_edge)) {
4465
0
      if (!prev)
4466
0
        prev = isl_vec_alloc(ctx, 0);
4467
0
      isl_vec_free(sol);
4468
0
      sol = prev;
4469
0
      break;
4470
0
    }
4471
0
    prev = isl_vec_free(prev);
4472
0
    cut = want_integral && !integral;
4473
0
    if (cut)
4474
0
      prev = sol;
4475
0
    if (!treat_coalescing)
4476
0
      continue;
4477
0
    for (i = 0; i < graph->n; ++i) {
4478
0
      struct isl_sched_node *node = &graph->node[i];
4479
0
4480
0
      pos = find_node_coalescing(node, sol);
4481
0
      if (pos < 0)
4482
0
        goto error;
4483
0
      if (pos < node->nvar)
4484
0
        break;
4485
0
    }
4486
0
    if (i < graph->n) {
4487
0
      prev = sol;
4488
0
      tl = zero_out_node_coef(tl, &graph->node[i], pos);
4489
0
      cut = 0;
4490
0
    }
4491
0
  } while (prev);
4492
0
4493
0
  isl_tab_lexmin_free(tl);
4494
0
4495
0
  return sol;
4496
0
error:
4497
0
  isl_tab_lexmin_free(tl);
4498
0
  isl_vec_free(prev);
4499
0
  isl_vec_free(sol);
4500
0
  return NULL;
4501
0
}
4502
4503
/* If "edge" is an edge from a node to itself, then add the corresponding
4504
 * dependence relation to "umap".
4505
 * If "node" has been compressed, then the dependence relation
4506
 * is also compressed first.
4507
 */
4508
static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
4509
  struct isl_sched_edge *edge)
4510
0
{
4511
0
  isl_map *map;
4512
0
  struct isl_sched_node *node = edge->src;
4513
0
4514
0
  if (edge->src != edge->dst)
4515
0
    return umap;
4516
0
4517
0
  map = isl_map_copy(edge->map);
4518
0
  if (node->compressed) {
4519
0
    map = isl_map_preimage_domain_multi_aff(map,
4520
0
            isl_multi_aff_copy(node->decompress));
4521
0
    map = isl_map_preimage_range_multi_aff(map,
4522
0
            isl_multi_aff_copy(node->decompress));
4523
0
  }
4524
0
  umap = isl_union_map_add_map(umap, map);
4525
0
  return umap;
4526
0
}
4527
4528
/* If "edge" is an edge from a node to another node, then add the corresponding
4529
 * dependence relation to "umap".
4530
 * If the source or destination nodes of "edge" have been compressed,
4531
 * then the dependence relation is also compressed first.
4532
 */
4533
static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
4534
  struct isl_sched_edge *edge)
4535
0
{
4536
0
  isl_map *map;
4537
0
4538
0
  if (edge->src == edge->dst)
4539
0
    return umap;
4540
0
4541
0
  map = isl_map_copy(edge->map);
4542
0
  if (edge->src->compressed)
4543
0
    map = isl_map_preimage_domain_multi_aff(map,
4544
0
            isl_multi_aff_copy(edge->src->decompress));
4545
0
  if (edge->dst->compressed)
4546
0
    map = isl_map_preimage_range_multi_aff(map,
4547
0
            isl_multi_aff_copy(edge->dst->decompress));
4548
0
  umap = isl_union_map_add_map(umap, map);
4549
0
  return umap;
4550
0
}
4551
4552
/* Internal data structure used by union_drop_coalescing_constraints
4553
 * to collect bounds on all relevant statements.
4554
 *
4555
 * "graph" is the schedule constraint graph for which an LP problem
4556
 * is being constructed.
4557
 * "bounds" collects the bounds.
4558
 */
4559
struct isl_collect_bounds_data {
4560
  isl_ctx *ctx;
4561
  struct isl_sched_graph *graph;
4562
  isl_union_set *bounds;
4563
};
4564
4565
/* Add the size bounds for the node with instance deltas in "set"
4566
 * to data->bounds.
4567
 */
4568
static isl_stat collect_bounds(__isl_take isl_set *set, void *user)
4569
0
{
4570
0
  struct isl_collect_bounds_data *data = user;
4571
0
  struct isl_sched_node *node;
4572
0
  isl_space *space;
4573
0
  isl_set *bounds;
4574
0
4575
0
  space = isl_set_get_space(set);
4576
0
  isl_set_free(set);
4577
0
4578
0
  node = graph_find_compressed_node(data->ctx, data->graph, space);
4579
0
  isl_space_free(space);
4580
0
4581
0
  bounds = isl_set_from_basic_set(get_size_bounds(node));
4582
0
  data->bounds = isl_union_set_add_set(data->bounds, bounds);
4583
0
4584
0
  return isl_stat_ok;
4585
0
}
4586
4587
/* Drop some constraints from "delta" that could be exploited
4588
 * to construct loop coalescing schedules.
4589
 * In particular, drop those constraint that bound the difference
4590
 * to the size of the domain.
4591
 * Do this for each set/node in "delta" separately.
4592
 * The parameters are assumed to have been projected out by the caller.
4593
 */
4594
static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx,
4595
  struct isl_sched_graph *graph, __isl_take isl_union_set *delta)
4596
0
{
4597
0
  struct isl_collect_bounds_data data = { ctx, graph };
4598
0
4599
0
  data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4600
0
  if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0)
4601
0
    data.bounds = isl_union_set_free(data.bounds);
4602
0
  delta = isl_union_set_plain_gist(delta, data.bounds);
4603
0
4604
0
  return delta;
4605
0
}
4606
4607
/* Given a non-trivial lineality space "lineality", add the corresponding
4608
 * universe set to data->mask and add a map from elements to
4609
 * other elements along the lines in "lineality" to data->equivalent.
4610
 * If this is the first time this function gets called
4611
 * (data->any_non_trivial is still false), then set data->any_non_trivial and
4612
 * initialize data->mask and data->equivalent.
4613
 *
4614
 * In particular, if the lineality space is defined by equality constraints
4615
 *
4616
 *  E x = 0
4617
 *
4618
 * then construct an affine mapping
4619
 *
4620
 *  f : x -> E x
4621
 *
4622
 * and compute the equivalence relation of having the same image under f:
4623
 *
4624
 *  { x -> x' : E x = E x' }
4625
 */
4626
static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality,
4627
  struct isl_exploit_lineality_data *data)
4628
0
{
4629
0
  isl_mat *eq;
4630
0
  isl_space *space;
4631
0
  isl_set *univ;
4632
0
  isl_multi_aff *ma;
4633
0
  isl_multi_pw_aff *mpa;
4634
0
  isl_map *map;
4635
0
  int n;
4636
0
4637
0
  if (!lineality)
4638
0
    return isl_stat_error;
4639
0
  if (isl_basic_set_dim(lineality, isl_dim_div) != 0)
4640
0
    isl_die(isl_basic_set_get_ctx(lineality), isl_error_internal,
4641
0
      "local variables not allowed", goto error);
4642
0
4643
0
  space = isl_basic_set_get_space(lineality);
4644
0
  if (!data->any_non_trivial) {
4645
0
    data->equivalent = isl_union_map_empty(isl_space_copy(space));
4646
0
    data->mask = isl_union_set_empty(isl_space_copy(space));
4647
0
  }
4648
0
  data->any_non_trivial = isl_bool_true;
4649
0
4650
0
  univ = isl_set_universe(isl_space_copy(space));
4651
0
  data->mask = isl_union_set_add_set(data->mask, univ);
4652
0
4653
0
  eq = isl_basic_set_extract_equalities(lineality);
4654
0
  n = isl_mat_rows(eq);
4655
0
  eq = isl_mat_insert_zero_rows(eq, 0, 1);
4656
0
  eq = isl_mat_set_element_si(eq, 0, 0, 1);
4657
0
  space = isl_space_from_domain(space);
4658
0
  space = isl_space_add_dims(space, isl_dim_out, n);
4659
0
  ma = isl_multi_aff_from_aff_mat(space, eq);
4660
0
  mpa = isl_multi_pw_aff_from_multi_aff(ma);
4661
0
  map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa));
4662
0
  data->equivalent = isl_union_map_add_map(data->equivalent, map);
4663
0
4664
0
  isl_basic_set_free(lineality);
4665
0
  return isl_stat_ok;
4666
0
error:
4667
0
  isl_basic_set_free(lineality);
4668
0
  return isl_stat_error;
4669
0
}
4670
4671
/* Check if the lineality space "set" is non-trivial (i.e., is not just
4672
 * the origin or, in other words, satisfies a number of equality constraints
4673
 * that is smaller than the dimension of the set).
4674
 * If so, extend data->mask and data->equivalent accordingly.
4675
 *
4676
 * The input should not have any local variables already, but
4677
 * isl_set_remove_divs is called to make sure it does not.
4678
 */
4679
static isl_stat add_lineality(__isl_take isl_set *set, void *user)
4680
0
{
4681
0
  struct isl_exploit_lineality_data *data = user;
4682
0
  isl_basic_set *hull;
4683
0
  int dim, n_eq;
4684
0
4685
0
  set = isl_set_remove_divs(set);
4686
0
  hull = isl_set_unshifted_simple_hull(set);
4687
0
  dim = isl_basic_set_dim(hull, isl_dim_set);
4688
0
  n_eq = isl_basic_set_n_equality(hull);
4689
0
  if (!hull)
4690
0
    return isl_stat_error;
4691
0
  if (dim != n_eq)
4692
0
    return add_non_trivial_lineality(hull, data);
4693
0
  isl_basic_set_free(hull);
4694
0
  return isl_stat_ok;
4695
0
}
4696
4697
/* Check if the difference set on intra-node schedule constraints "intra"
4698
 * has any non-trivial lineality space.
4699
 * If so, then extend the difference set to a difference set
4700
 * on equivalent elements.  That is, if "intra" is
4701
 *
4702
 *  { y - x : (x,y) \in V }
4703
 *
4704
 * and elements are equivalent if they have the same image under f,
4705
 * then return
4706
 *
4707
 *  { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4708
 *
4709
 * or, since f is linear,
4710
 *
4711
 *  { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
4712
 *
4713
 * The results of the search for non-trivial lineality spaces is stored
4714
 * in "data".
4715
 */
4716
static __isl_give isl_union_set *exploit_intra_lineality(
4717
  __isl_take isl_union_set *intra,
4718
  struct isl_exploit_lineality_data *data)
4719
0
{
4720
0
  isl_union_set *lineality;
4721
0
  isl_union_set *uset;
4722
0
4723
0
  data->any_non_trivial = isl_bool_false;
4724
0
  lineality = isl_union_set_copy(intra);
4725
0
  lineality = isl_union_set_combined_lineality_space(lineality);
4726
0
  if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0)
4727
0
    data->any_non_trivial = isl_bool_error;
4728
0
  isl_union_set_free(lineality);
4729
0
4730
0
  if (data->any_non_trivial < 0)
4731
0
    return isl_union_set_free(intra);
4732
0
  if (!data->any_non_trivial)
4733
0
    return intra;
4734
0
4735
0
  uset = isl_union_set_copy(intra);
4736
0
  intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask));
4737
0
  uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent));
4738
0
  intra = isl_union_set_union(intra, uset);
4739
0
4740
0
  intra = isl_union_set_remove_divs(intra);
4741
0
4742
0
  return intra;
4743
0
}
4744
4745
/* If the difference set on intra-node schedule constraints was found to have
4746
 * any non-trivial lineality space by exploit_intra_lineality,
4747
 * as recorded in "data", then extend the inter-node
4748
 * schedule constraints "inter" to schedule constraints on equivalent elements.
4749
 * That is, if "inter" is V and
4750
 * elements are equivalent if they have the same image under f, then return
4751
 *
4752
 *  { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4753
 */
4754
static __isl_give isl_union_map *exploit_inter_lineality(
4755
  __isl_take isl_union_map *inter,
4756
  struct isl_exploit_lineality_data *data)
4757
0
{
4758
0
  isl_union_map *umap;
4759
0
4760
0
  if (data->any_non_trivial < 0)
4761
0
    return isl_union_map_free(inter);
4762
0
  if (!data->any_non_trivial)
4763
0
    return inter;
4764
0
4765
0
  umap = isl_union_map_copy(inter);
4766
0
  inter = isl_union_map_subtract_range(inter,
4767
0
        isl_union_set_copy(data->mask));
4768
0
  umap = isl_union_map_apply_range(umap,
4769
0
        isl_union_map_copy(data->equivalent));
4770
0
  inter = isl_union_map_union(inter, umap);
4771
0
  umap = isl_union_map_copy(inter);
4772
0
  inter = isl_union_map_subtract_domain(inter,
4773
0
        isl_union_set_copy(data->mask));
4774
0
  umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent),
4775
0
        umap);
4776
0
  inter = isl_union_map_union(inter, umap);
4777
0
4778
0
  inter = isl_union_map_remove_divs(inter);
4779
0
4780
0
  return inter;
4781
0
}
4782
4783
/* For each (conditional) validity edge in "graph",
4784
 * add the corresponding dependence relation using "add"
4785
 * to a collection of dependence relations and return the result.
4786
 * If "coincidence" is set, then coincidence edges are considered as well.
4787
 */
4788
static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
4789
  __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
4790
    struct isl_sched_edge *edge), int coincidence)
4791
0
{
4792
0
  int i;
4793
0
  isl_space *space;
4794
0
  isl_union_map *umap;
4795
0
4796
0
  space = isl_space_copy(graph->node[0].space);
4797
0
  umap = isl_union_map_empty(space);
4798
0
4799
0
  for (i = 0; i < graph->n_edge; ++i) {
4800
0
    struct isl_sched_edge *edge = &graph->edge[i];
4801
0
4802
0
    if (!is_any_validity(edge) &&
4803
0
        (!coincidence || !is_coincidence(edge)))
4804
0
      continue;
4805
0
4806
0
    umap = add(umap, edge);
4807
0
  }
4808
0
4809
0
  return umap;
4810
0
}
4811
4812
/* Project out all parameters from "uset" and return the result.
4813
 */
4814
static __isl_give isl_union_set *union_set_drop_parameters(
4815
  __isl_take isl_union_set *uset)
4816
0
{
4817
0
  unsigned nparam;
4818
0
4819
0
  nparam = isl_union_set_dim(uset, isl_dim_param);
4820
0
  return isl_union_set_project_out(uset, isl_dim_param, 0, nparam);
4821
0
}
4822
4823
/* For each dependence relation on a (conditional) validity edge
4824
 * from a node to itself,
4825
 * construct the set of coefficients of valid constraints for elements
4826
 * in that dependence relation and collect the results.
4827
 * If "coincidence" is set, then coincidence edges are considered as well.
4828
 *
4829
 * In particular, for each dependence relation R, constraints
4830
 * on coefficients (c_0, c_x) are constructed such that
4831
 *
4832
 *  c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
4833
 *
4834
 * If the schedule_treat_coalescing option is set, then some constraints
4835
 * that could be exploited to construct coalescing schedules
4836
 * are removed before the dual is computed, but after the parameters
4837
 * have been projected out.
4838
 * The entire computation is essentially the same as that performed
4839
 * by intra_coefficients, except that it operates on multiple
4840
 * edges together and that the parameters are always projected out.
4841
 *
4842
 * Additionally, exploit any non-trivial lineality space
4843
 * in the difference set after removing coalescing constraints and
4844
 * store the results of the non-trivial lineality space detection in "data".
4845
 * The procedure is currently run unconditionally, but it is unlikely
4846
 * to find any non-trivial lineality spaces if no coalescing constraints
4847
 * have been removed.
4848
 *
4849
 * Note that if a dependence relation is a union of basic maps,
4850
 * then each basic map needs to be treated individually as it may only
4851
 * be possible to carry the dependences expressed by some of those
4852
 * basic maps and not all of them.
4853
 * The collected validity constraints are therefore not coalesced and
4854
 * it is assumed that they are not coalesced automatically.
4855
 * Duplicate basic maps can be removed, however.
4856
 * In particular, if the same basic map appears as a disjunct
4857
 * in multiple edges, then it only needs to be carried once.
4858
 */
4859
static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx,
4860
  struct isl_sched_graph *graph, int coincidence,
4861
  struct isl_exploit_lineality_data *data)
4862
0
{
4863
0
  isl_union_map *intra;
4864
0
  isl_union_set *delta;
4865
0
  isl_basic_set_list *list;
4866
0
4867
0
  intra = collect_validity(graph, &add_intra, coincidence);
4868
0
  delta = isl_union_map_deltas(intra);
4869
0
  delta = union_set_drop_parameters(delta);
4870
0
  delta = isl_union_set_remove_divs(delta);
4871
0
  if (isl_options_get_schedule_treat_coalescing(ctx))
4872
0
    delta = union_drop_coalescing_constraints(ctx, graph, delta);
4873
0
  delta = exploit_intra_lineality(delta, data);
4874
0
  list = isl_union_set_get_basic_set_list(delta);
4875
0
  isl_union_set_free(delta);
4876
0
4877
0
  return isl_basic_set_list_coefficients(list);
4878
0
}
4879
4880
/* For each dependence relation on a (conditional) validity edge
4881
 * from a node to some other node,
4882
 * construct the set of coefficients of valid constraints for elements
4883
 * in that dependence relation and collect the results.
4884
 * If "coincidence" is set, then coincidence edges are considered as well.
4885
 *
4886
 * In particular, for each dependence relation R, constraints
4887
 * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
4888
 *
4889
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
4890
 *
4891
 * This computation is essentially the same as that performed
4892
 * by inter_coefficients, except that it operates on multiple
4893
 * edges together.
4894
 *
4895
 * Additionally, exploit any non-trivial lineality space
4896
 * that may have been discovered by collect_intra_validity
4897
 * (as stored in "data").
4898
 *
4899
 * Note that if a dependence relation is a union of basic maps,
4900
 * then each basic map needs to be treated individually as it may only
4901
 * be possible to carry the dependences expressed by some of those
4902
 * basic maps and not all of them.
4903
 * The collected validity constraints are therefore not coalesced and
4904
 * it is assumed that they are not coalesced automatically.
4905
 * Duplicate basic maps can be removed, however.
4906
 * In particular, if the same basic map appears as a disjunct
4907
 * in multiple edges, then it only needs to be carried once.
4908
 */
4909
static __isl_give isl_basic_set_list *collect_inter_validity(
4910
  struct isl_sched_graph *graph, int coincidence,
4911
  struct isl_exploit_lineality_data *data)
4912
0
{
4913
0
  isl_union_map *inter;
4914
0
  isl_union_set *wrap;
4915
0
  isl_basic_set_list *list;
4916
0
4917
0
  inter = collect_validity(graph, &add_inter, coincidence);
4918
0
  inter = exploit_inter_lineality(inter, data);
4919
0
  inter = isl_union_map_remove_divs(inter);
4920
0
  wrap = isl_union_map_wrap(inter);
4921
0
  list = isl_union_set_get_basic_set_list(wrap);
4922
0
  isl_union_set_free(wrap);
4923
0
  return isl_basic_set_list_coefficients(list);
4924
0
}
4925
4926
/* Construct an LP problem for finding schedule coefficients
4927
 * such that the schedule carries as many of the "n_edge" groups of
4928
 * dependences as possible based on the corresponding coefficient
4929
 * constraints and return the lexicographically smallest non-trivial solution.
4930
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4931
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4932
 * If "want_integral" is set, then compute an integral solution
4933
 * for the coefficients rather than using the numerators
4934
 * of a rational solution.
4935
 * "carry_inter" indicates whether inter-node edges should be carried or
4936
 * only respected.
4937
 *
4938
 * If none of the "n_edge" groups can be carried
4939
 * then return an empty vector.
4940
 */
4941
static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx,
4942
  struct isl_sched_graph *graph, int n_edge,
4943
  __isl_keep isl_basic_set_list *intra,
4944
  __isl_keep isl_basic_set_list *inter, int want_integral,
4945
  int carry_inter)
4946
0
{
4947
0
  isl_basic_set *lp;
4948
0
4949
0
  if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0)
4950
0
    return NULL;
4951
0
4952
0
  lp = isl_basic_set_copy(graph->lp);
4953
0
  return non_neg_lexmin(graph, lp, n_edge, want_integral);
4954
0
}
4955
4956
/* Construct an LP problem for finding schedule coefficients
4957
 * such that the schedule carries as many of the validity dependences
4958
 * as possible and
4959
 * return the lexicographically smallest non-trivial solution.
4960
 * If "fallback" is set, then the carrying is performed as a fallback
4961
 * for the Pluto-like scheduler.
4962
 * If "coincidence" is set, then try and carry coincidence edges as well.
4963
 *
4964
 * The variable "n_edge" stores the number of groups that should be carried.
4965
 * If none of the "n_edge" groups can be carried
4966
 * then return an empty vector.
4967
 * If, moreover, "n_edge" is zero, then the LP problem does not even
4968
 * need to be constructed.
4969
 *
4970
 * If a fallback solution is being computed, then compute an integral solution
4971
 * for the coefficients rather than using the numerators
4972
 * of a rational solution.
4973
 *
4974
 * If a fallback solution is being computed, if there are any intra-node
4975
 * dependences, and if requested by the user, then first try
4976
 * to only carry those intra-node dependences.
4977
 * If this fails to carry any dependences, then try again
4978
 * with the inter-node dependences included.
4979
 */
4980
static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
4981
  struct isl_sched_graph *graph, int fallback, int coincidence)
4982
0
{
4983
0
  int n_intra, n_inter;
4984
0
  int n_edge;
4985
0
  struct isl_carry carry = { 0 };
4986
0
  isl_vec *sol;
4987
0
4988
0
  carry.intra = collect_intra_validity(ctx, graph, coincidence,
4989
0
            &carry.lineality);
4990
0
  carry.inter = collect_inter_validity(graph, coincidence,
4991
0
            &carry.lineality);
4992
0
  if (!carry.intra || !carry.inter)
4993
0
    goto error;
4994
0
  n_intra = isl_basic_set_list_n_basic_set(carry.intra);
4995
0
  n_inter = isl_basic_set_list_n_basic_set(carry.inter);
4996
0
4997
0
  if (fallback && n_intra > 0 &&
4998
0
      isl_options_get_schedule_carry_self_first(ctx)) {
4999
0
    sol = compute_carrying_sol_coef(ctx, graph, n_intra,
5000
0
        carry.intra, carry.inter, fallback, 0);
5001
0
    if (!sol || sol->size != 0 || n_inter == 0) {
5002
0
      isl_carry_clear(&carry);
5003
0
      return sol;
5004
0
    }
5005
0
    isl_vec_free(sol);
5006
0
  }
5007
0
5008
0
  n_edge = n_intra + n_inter;
5009
0
  if (n_edge == 0) {
5010
0
    isl_carry_clear(&carry);
5011
0
    return isl_vec_alloc(ctx, 0);
5012
0
  }
5013
0
5014
0
  sol = compute_carrying_sol_coef(ctx, graph, n_edge,
5015
0
        carry.intra, carry.inter, fallback, 1);
5016
0
  isl_carry_clear(&carry);
5017
0
  return sol;
5018
0
error:
5019
0
  isl_carry_clear(&carry);
5020
0
  return NULL;
5021
0
}
5022
5023
/* Construct a schedule row for each node such that as many validity dependences
5024
 * as possible are carried and then continue with the next band.
5025
 * If "fallback" is set, then the carrying is performed as a fallback
5026
 * for the Pluto-like scheduler.
5027