Coverage Report

Created: 2018-02-20 23:11

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/External/isl/isl_scheduler.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2011      INRIA Saclay
3
 * Copyright 2012-2014 Ecole Normale Superieure
4
 * Copyright 2015-2016 Sven Verdoolaege
5
 * Copyright 2016      INRIA Paris
6
 * Copyright 2017      Sven Verdoolaege
7
 *
8
 * Use of this software is governed by the MIT license
9
 *
10
 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
11
 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
12
 * 91893 Orsay, France
13
 * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14
 * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
15
 * CS 42112, 75589 Paris Cedex 12, France
16
 */
17
18
#include <isl_ctx_private.h>
19
#include <isl_map_private.h>
20
#include <isl_space_private.h>
21
#include <isl_aff_private.h>
22
#include <isl/hash.h>
23
#include <isl/constraint.h>
24
#include <isl/schedule.h>
25
#include <isl_schedule_constraints.h>
26
#include <isl/schedule_node.h>
27
#include <isl_mat_private.h>
28
#include <isl_vec_private.h>
29
#include <isl/set.h>
30
#include <isl_union_set_private.h>
31
#include <isl_seq.h>
32
#include <isl_tab.h>
33
#include <isl_dim_map.h>
34
#include <isl/map_to_basic_set.h>
35
#include <isl_sort.h>
36
#include <isl_options_private.h>
37
#include <isl_tarjan.h>
38
#include <isl_morph.h>
39
#include <isl/ilp.h>
40
#include <isl_val_private.h>
41
42
/*
43
 * The scheduling algorithm implemented in this file was inspired by
44
 * Bondhugula et al., "Automatic Transformations for Communication-Minimized
45
 * Parallelization and Locality Optimization in the Polyhedral Model".
46
 *
47
 * For a detailed description of the variant implemented in isl,
48
 * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017).
49
 */
50
51
52
/* Internal information about a node that is used during the construction
53
 * of a schedule.
54
 * space represents the original space in which the domain lives;
55
 *  that is, the space is not affected by compression
56
 * sched is a matrix representation of the schedule being constructed
57
 *  for this node; if compressed is set, then this schedule is
58
 *  defined over the compressed domain space
59
 * sched_map is an isl_map representation of the same (partial) schedule
60
 *  sched_map may be NULL; if compressed is set, then this map
61
 *  is defined over the uncompressed domain space
62
 * rank is the number of linearly independent rows in the linear part
63
 *  of sched
64
 * the rows of "vmap" represent a change of basis for the node
65
 *  variables; the first rank rows span the linear part of
66
 *  the schedule rows; the remaining rows are linearly independent
67
 * the rows of "indep" represent linear combinations of the schedule
68
 * coefficients that are non-zero when the schedule coefficients are
69
 * linearly independent of previously computed schedule rows.
70
 * start is the first variable in the LP problem in the sequences that
71
 *  represents the schedule coefficients of this node
72
 * nvar is the dimension of the (compressed) domain
73
 * nparam is the number of parameters or 0 if we are not constructing
74
 *  a parametric schedule
75
 *
76
 * If compressed is set, then hull represents the constraints
77
 * that were used to derive the compression, while compress and
78
 * decompress map the original space to the compressed space and
79
 * vice versa.
80
 *
81
 * scc is the index of SCC (or WCC) this node belongs to
82
 *
83
 * "cluster" is only used inside extract_clusters and identifies
84
 * the cluster of SCCs that the node belongs to.
85
 *
86
 * coincident contains a boolean for each of the rows of the schedule,
87
 * indicating whether the corresponding scheduling dimension satisfies
88
 * the coincidence constraints in the sense that the corresponding
89
 * dependence distances are zero.
90
 *
91
 * If the schedule_treat_coalescing option is set, then
92
 * "sizes" contains the sizes of the (compressed) instance set
93
 * in each direction.  If there is no fixed size in a given direction,
94
 * then the corresponding size value is set to infinity.
95
 * If the schedule_treat_coalescing option or the schedule_max_coefficient
96
 * option is set, then "max" contains the maximal values for
97
 * schedule coefficients of the (compressed) variables.  If no bound
98
 * needs to be imposed on a particular variable, then the corresponding
99
 * value is negative.
100
 * If not NULL, then "bounds" contains a non-parametric set
101
 * in the compressed space that is bounded by the size in each direction.
102
 */
103
struct isl_sched_node {
104
  isl_space *space;
105
  int compressed;
106
  isl_set *hull;
107
  isl_multi_aff *compress;
108
  isl_multi_aff *decompress;
109
  isl_mat *sched;
110
  isl_map *sched_map;
111
  int  rank;
112
  isl_mat *indep;
113
  isl_mat *vmap;
114
  int  start;
115
  int  nvar;
116
  int  nparam;
117
118
  int  scc;
119
  int  cluster;
120
121
  int *coincident;
122
123
  isl_multi_val *sizes;
124
  isl_basic_set *bounds;
125
  isl_vec *max;
126
};
127
128
static int node_has_tuples(const void *entry, const void *val)
129
1.73k
{
130
1.73k
  struct isl_sched_node *node = (struct isl_sched_node *)entry;
131
1.73k
  isl_space *space = (isl_space *) val;
132
1.73k
133
1.73k
  return isl_space_has_equal_tuples(node->space, space);
134
1.73k
}
135
136
static int node_scc_exactly(struct isl_sched_node *node, int scc)
137
1.10k
{
138
1.10k
  return node->scc == scc;
139
1.10k
}
140
141
static int node_scc_at_most(struct isl_sched_node *node, int scc)
142
18
{
143
18
  return node->scc <= scc;
144
18
}
145
146
static int node_scc_at_least(struct isl_sched_node *node, int scc)
147
18
{
148
18
  return node->scc >= scc;
149
18
}
150
151
/* An edge in the dependence graph.  An edge may be used to
152
 * ensure validity of the generated schedule, to minimize the dependence
153
 * distance or both
154
 *
155
 * map is the dependence relation, with i -> j in the map if j depends on i
156
 * tagged_condition and tagged_validity contain the union of all tagged
157
 *  condition or conditional validity dependence relations that
158
 *  specialize the dependence relation "map"; that is,
159
 *  if (i -> a) -> (j -> b) is an element of "tagged_condition"
160
 *  or "tagged_validity", then i -> j is an element of "map".
161
 *  If these fields are NULL, then they represent the empty relation.
162
 * src is the source node
163
 * dst is the sink node
164
 *
165
 * types is a bit vector containing the types of this edge.
166
 * validity is set if the edge is used to ensure correctness
167
 * coincidence is used to enforce zero dependence distances
168
 * proximity is set if the edge is used to minimize dependence distances
169
 * condition is set if the edge represents a condition
170
 *  for a conditional validity schedule constraint
171
 * local can only be set for condition edges and indicates that
172
 *  the dependence distance over the edge should be zero
173
 * conditional_validity is set if the edge is used to conditionally
174
 *  ensure correctness
175
 *
176
 * For validity edges, start and end mark the sequence of inequality
177
 * constraints in the LP problem that encode the validity constraint
178
 * corresponding to this edge.
179
 *
180
 * During clustering, an edge may be marked "no_merge" if it should
181
 * not be used to merge clusters.
182
 * The weight is also only used during clustering and it is
183
 * an indication of how many schedule dimensions on either side
184
 * of the schedule constraints can be aligned.
185
 * If the weight is negative, then this means that this edge was postponed
186
 * by has_bounded_distances or any_no_merge.  The original weight can
187
 * be retrieved by adding 1 + graph->max_weight, with "graph"
188
 * the graph containing this edge.
189
 */
190
struct isl_sched_edge {
191
  isl_map *map;
192
  isl_union_map *tagged_condition;
193
  isl_union_map *tagged_validity;
194
195
  struct isl_sched_node *src;
196
  struct isl_sched_node *dst;
197
198
  unsigned types;
199
200
  int start;
201
  int end;
202
203
  int no_merge;
204
  int weight;
205
};
206
207
/* Is "edge" marked as being of type "type"?
208
 */
209
static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
210
12.9k
{
211
12.9k
  return ISL_FL_ISSET(edge->types, 1 << type);
212
12.9k
}
213
214
/* Mark "edge" as being of type "type".
215
 */
216
static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
217
683
{
218
683
  ISL_FL_SET(edge->types, 1 << type);
219
683
}
220
221
/* No longer mark "edge" as being of type "type"?
222
 */
223
static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
224
44
{
225
44
  ISL_FL_CLR(edge->types, 1 << type);
226
44
}
227
228
/* Is "edge" marked as a validity edge?
229
 */
230
static int is_validity(struct isl_sched_edge *edge)
231
3.44k
{
232
3.44k
  return is_type(edge, isl_edge_validity);
233
3.44k
}
234
235
/* Mark "edge" as a validity edge.
236
 */
237
static void set_validity(struct isl_sched_edge *edge)
238
2
{
239
2
  set_type(edge, isl_edge_validity);
240
2
}
241
242
/* Is "edge" marked as a proximity edge?
243
 */
244
static int is_proximity(struct isl_sched_edge *edge)
245
1.80k
{
246
1.80k
  return is_type(edge, isl_edge_proximity);
247
1.80k
}
248
249
/* Is "edge" marked as a local edge?
250
 */
251
static int is_local(struct isl_sched_edge *edge)
252
2.24k
{
253
2.24k
  return is_type(edge, isl_edge_local);
254
2.24k
}
255
256
/* Mark "edge" as a local edge.
257
 */
258
static void set_local(struct isl_sched_edge *edge)
259
15
{
260
15
  set_type(edge, isl_edge_local);
261
15
}
262
263
/* No longer mark "edge" as a local edge.
264
 */
265
static void clear_local(struct isl_sched_edge *edge)
266
44
{
267
44
  clear_type(edge, isl_edge_local);
268
44
}
269
270
/* Is "edge" marked as a coincidence edge?
271
 */
272
static int is_coincidence(struct isl_sched_edge *edge)
273
1.04k
{
274
1.04k
  return is_type(edge, isl_edge_coincidence);
275
1.04k
}
276
277
/* Is "edge" marked as a condition edge?
278
 */
279
static int is_condition(struct isl_sched_edge *edge)
280
2.03k
{
281
2.03k
  return is_type(edge, isl_edge_condition);
282
2.03k
}
283
284
/* Is "edge" marked as a conditional validity edge?
285
 */
286
static int is_conditional_validity(struct isl_sched_edge *edge)
287
1.49k
{
288
1.49k
  return is_type(edge, isl_edge_conditional_validity);
289
1.49k
}
290
291
/* Is "edge" of a type that can appear multiple times between
292
 * the same pair of nodes?
293
 *
294
 * Condition edges and conditional validity edges may have tagged
295
 * dependence relations, in which case an edge is added for each
296
 * pair of tags.
297
 */
298
static int is_multi_edge_type(struct isl_sched_edge *edge)
299
112
{
300
112
  return is_condition(edge) || 
is_conditional_validity(edge)89
;
301
112
}
302
303
/* Internal information about the dependence graph used during
304
 * the construction of the schedule.
305
 *
306
 * intra_hmap is a cache, mapping dependence relations to their dual,
307
 *  for dependences from a node to itself, possibly without
308
 *  coefficients for the parameters
309
 * intra_hmap_param is a cache, mapping dependence relations to their dual,
310
 *  for dependences from a node to itself, including coefficients
311
 *  for the parameters
312
 * inter_hmap is a cache, mapping dependence relations to their dual,
313
 *  for dependences between distinct nodes
314
 * if compression is involved then the key for these maps
315
 * is the original, uncompressed dependence relation, while
316
 * the value is the dual of the compressed dependence relation.
317
 *
318
 * n is the number of nodes
319
 * node is the list of nodes
320
 * maxvar is the maximal number of variables over all nodes
321
 * max_row is the allocated number of rows in the schedule
322
 * n_row is the current (maximal) number of linearly independent
323
 *  rows in the node schedules
324
 * n_total_row is the current number of rows in the node schedules
325
 * band_start is the starting row in the node schedules of the current band
326
 * root is set to the original dependence graph from which this graph
327
 *  is derived through splitting.  If this graph is not the result of
328
 *  splitting, then the root field points to the graph itself.
329
 *
330
 * sorted contains a list of node indices sorted according to the
331
 *  SCC to which a node belongs
332
 *
333
 * n_edge is the number of edges
334
 * edge is the list of edges
335
 * max_edge contains the maximal number of edges of each type;
336
 *  in particular, it contains the number of edges in the inital graph.
337
 * edge_table contains pointers into the edge array, hashed on the source
338
 *  and sink spaces; there is one such table for each type;
339
 *  a given edge may be referenced from more than one table
340
 *  if the corresponding relation appears in more than one of the
341
 *  sets of dependences; however, for each type there is only
342
 *  a single edge between a given pair of source and sink space
343
 *  in the entire graph
344
 *
345
 * node_table contains pointers into the node array, hashed on the space tuples
346
 *
347
 * region contains a list of variable sequences that should be non-trivial
348
 *
349
 * lp contains the (I)LP problem used to obtain new schedule rows
350
 *
351
 * src_scc and dst_scc are the source and sink SCCs of an edge with
352
 *  conflicting constraints
353
 *
354
 * scc represents the number of components
355
 * weak is set if the components are weakly connected
356
 *
357
 * max_weight is used during clustering and represents the maximal
358
 * weight of the relevant proximity edges.
359
 */
360
struct isl_sched_graph {
361
  isl_map_to_basic_set *intra_hmap;
362
  isl_map_to_basic_set *intra_hmap_param;
363
  isl_map_to_basic_set *inter_hmap;
364
365
  struct isl_sched_node *node;
366
  int n;
367
  int maxvar;
368
  int max_row;
369
  int n_row;
370
371
  int *sorted;
372
373
  int n_total_row;
374
  int band_start;
375
376
  struct isl_sched_graph *root;
377
378
  struct isl_sched_edge *edge;
379
  int n_edge;
380
  int max_edge[isl_edge_last + 1];
381
  struct isl_hash_table *edge_table[isl_edge_last + 1];
382
383
  struct isl_hash_table *node_table;
384
  struct isl_trivial_region *region;
385
386
  isl_basic_set *lp;
387
388
  int src_scc;
389
  int dst_scc;
390
391
  int scc;
392
  int weak;
393
394
  int max_weight;
395
};
396
397
/* Initialize node_table based on the list of nodes.
398
 */
399
static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
400
349
{
401
349
  int i;
402
349
403
349
  graph->node_table = isl_hash_table_alloc(ctx, graph->n);
404
349
  if (!graph->node_table)
405
0
    return -1;
406
349
407
856
  
for (i = 0; 349
i < graph->n;
++i507
) {
408
507
    struct isl_hash_table_entry *entry;
409
507
    uint32_t hash;
410
507
411
507
    hash = isl_space_get_tuple_hash(graph->node[i].space);
412
507
    entry = isl_hash_table_find(ctx, graph->node_table, hash,
413
507
              &node_has_tuples,
414
507
              graph->node[i].space, 1);
415
507
    if (!entry)
416
0
      return -1;
417
507
    entry->data = &graph->node[i];
418
507
  }
419
349
420
349
  return 0;
421
349
}
422
423
/* Return a pointer to the node that lives within the given space,
424
 * an invalid node if there is no such node, or NULL in case of error.
425
 */
426
static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
427
  struct isl_sched_graph *graph, __isl_keep isl_space *space)
428
1.76k
{
429
1.76k
  struct isl_hash_table_entry *entry;
430
1.76k
  uint32_t hash;
431
1.76k
432
1.76k
  if (!space)
433
0
    return NULL;
434
1.76k
435
1.76k
  hash = isl_space_get_tuple_hash(space);
436
1.76k
  entry = isl_hash_table_find(ctx, graph->node_table, hash,
437
1.76k
            &node_has_tuples, space, 0);
438
1.76k
439
1.76k
  return entry ? 
entry->data1.73k
:
graph->node + graph->n24
;
440
1.76k
}
441
442
/* Is "node" a node in "graph"?
443
 */
444
static int is_node(struct isl_sched_graph *graph,
445
  struct isl_sched_node *node)
446
1.76k
{
447
1.76k
  return node && node >= &graph->node[0] && node < &graph->node[graph->n];
448
1.76k
}
449
450
static int edge_has_src_and_dst(const void *entry, const void *val)
451
1.22k
{
452
1.22k
  const struct isl_sched_edge *edge = entry;
453
1.22k
  const struct isl_sched_edge *temp = val;
454
1.22k
455
1.22k
  return edge->src == temp->src && edge->dst == temp->dst;
456
1.22k
}
457
458
/* Add the given edge to graph->edge_table[type].
459
 */
460
static isl_stat graph_edge_table_add(isl_ctx *ctx,
461
  struct isl_sched_graph *graph, enum isl_edge_type type,
462
  struct isl_sched_edge *edge)
463
950
{
464
950
  struct isl_hash_table_entry *entry;
465
950
  uint32_t hash;
466
950
467
950
  hash = isl_hash_init();
468
950
  hash = isl_hash_builtin(hash, edge->src);
469
950
  hash = isl_hash_builtin(hash, edge->dst);
470
950
  entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
471
950
            &edge_has_src_and_dst, edge, 1);
472
950
  if (!entry)
473
0
    return isl_stat_error;
474
950
  entry->data = edge;
475
950
476
950
  return isl_stat_ok;
477
950
}
478
479
/* Add "edge" to all relevant edge tables.
480
 * That is, for every type of the edge, add it to the corresponding table.
481
 */
482
static isl_stat graph_edge_tables_add(isl_ctx *ctx,
483
  struct isl_sched_graph *graph, struct isl_sched_edge *edge)
484
114
{
485
114
  enum isl_edge_type t;
486
114
487
684
  for (t = isl_edge_first; t <= isl_edge_last; 
++t570
) {
488
570
    if (!is_type(edge, t))
489
286
      continue;
490
284
    if (graph_edge_table_add(ctx, graph, t, edge) < 0)
491
0
      return isl_stat_error;
492
284
  }
493
114
494
114
  return isl_stat_ok;
495
114
}
496
497
/* Allocate the edge_tables based on the maximal number of edges of
498
 * each type.
499
 */
500
static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
501
349
{
502
349
  int i;
503
349
504
2.09k
  for (i = 0; i <= isl_edge_last; 
++i1.74k
) {
505
1.74k
    graph->edge_table[i] = isl_hash_table_alloc(ctx,
506
1.74k
                  graph->max_edge[i]);
507
1.74k
    if (!graph->edge_table[i])
508
0
      return -1;
509
1.74k
  }
510
349
511
349
  return 0;
512
349
}
513
514
/* If graph->edge_table[type] contains an edge from the given source
515
 * to the given destination, then return the hash table entry of this edge.
516
 * Otherwise, return NULL.
517
 */
518
static struct isl_hash_table_entry *graph_find_edge_entry(
519
  struct isl_sched_graph *graph,
520
  enum isl_edge_type type,
521
  struct isl_sched_node *src, struct isl_sched_node *dst)
522
4.89k
{
523
4.89k
  isl_ctx *ctx = isl_space_get_ctx(src->space);
524
4.89k
  uint32_t hash;
525
4.89k
  struct isl_sched_edge temp = { .src = src, .dst = dst };
526
4.89k
527
4.89k
  hash = isl_hash_init();
528
4.89k
  hash = isl_hash_builtin(hash, temp.src);
529
4.89k
  hash = isl_hash_builtin(hash, temp.dst);
530
4.89k
  return isl_hash_table_find(ctx, graph->edge_table[type], hash,
531
4.89k
            &edge_has_src_and_dst, &temp, 0);
532
4.89k
}
533
534
535
/* If graph->edge_table[type] contains an edge from the given source
536
 * to the given destination, then return this edge.
537
 * Otherwise, return NULL.
538
 */
539
static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
540
  enum isl_edge_type type,
541
  struct isl_sched_node *src, struct isl_sched_node *dst)
542
4.03k
{
543
4.03k
  struct isl_hash_table_entry *entry;
544
4.03k
545
4.03k
  entry = graph_find_edge_entry(graph, type, src, dst);
546
4.03k
  if (!entry)
547
3.22k
    return NULL;
548
808
549
808
  return entry->data;
550
808
}
551
552
/* Check whether the dependence graph has an edge of the given type
553
 * between the given two nodes.
554
 */
555
static isl_bool graph_has_edge(struct isl_sched_graph *graph,
556
  enum isl_edge_type type,
557
  struct isl_sched_node *src, struct isl_sched_node *dst)
558
2.28k
{
559
2.28k
  struct isl_sched_edge *edge;
560
2.28k
  isl_bool empty;
561
2.28k
562
2.28k
  edge = graph_find_edge(graph, type, src, dst);
563
2.28k
  if (!edge)
564
1.89k
    return 0;
565
393
566
393
  empty = isl_map_plain_is_empty(edge->map);
567
393
  if (empty < 0)
568
0
    return isl_bool_error;
569
393
570
393
  return !empty;
571
393
}
572
573
/* Look for any edge with the same src, dst and map fields as "model".
574
 *
575
 * Return the matching edge if one can be found.
576
 * Return "model" if no matching edge is found.
577
 * Return NULL on error.
578
 */
579
static struct isl_sched_edge *graph_find_matching_edge(
580
  struct isl_sched_graph *graph, struct isl_sched_edge *model)
581
666
{
582
666
  enum isl_edge_type i;
583
666
  struct isl_sched_edge *edge;
584
666
585
2.01k
  for (i = isl_edge_first; i <= isl_edge_last; 
++i1.34k
) {
586
1.74k
    int is_equal;
587
1.74k
588
1.74k
    edge = graph_find_edge(graph, i, model->src, model->dst);
589
1.74k
    if (!edge)
590
1.32k
      continue;
591
415
    is_equal = isl_map_plain_is_equal(model->map, edge->map);
592
415
    if (is_equal < 0)
593
0
      return NULL;
594
415
    if (is_equal)
595
397
      return edge;
596
415
  }
597
666
598
666
  
return model269
;
599
666
}
600
601
/* Remove the given edge from all the edge_tables that refer to it.
602
 */
603
static void graph_remove_edge(struct isl_sched_graph *graph,
604
  struct isl_sched_edge *edge)
605
172
{
606
172
  isl_ctx *ctx = isl_map_get_ctx(edge->map);
607
172
  enum isl_edge_type i;
608
172
609
1.03k
  for (i = isl_edge_first; i <= isl_edge_last; 
++i860
) {
610
860
    struct isl_hash_table_entry *entry;
611
860
612
860
    entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
613
860
    if (!entry)
614
515
      continue;
615
345
    if (entry->data != edge)
616
9
      continue;
617
336
    isl_hash_table_remove(ctx, graph->edge_table[i], entry);
618
336
  }
619
172
}
620
621
/* Check whether the dependence graph has any edge
622
 * between the given two nodes.
623
 */
624
static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
625
  struct isl_sched_node *src, struct isl_sched_node *dst)
626
497
{
627
497
  enum isl_edge_type i;
628
497
  isl_bool r;
629
497
630
1.86k
  for (i = isl_edge_first; i <= isl_edge_last; 
++i1.37k
) {
631
1.62k
    r = graph_has_edge(graph, i, src, dst);
632
1.62k
    if (r < 0 || r)
633
248
      return r;
634
1.62k
  }
635
497
636
497
  
return r249
;
637
497
}
638
639
/* Check whether the dependence graph has a validity edge
640
 * between the given two nodes.
641
 *
642
 * Conditional validity edges are essentially validity edges that
643
 * can be ignored if the corresponding condition edges are iteration private.
644
 * Here, we are only checking for the presence of validity
645
 * edges, so we need to consider the conditional validity edges too.
646
 * In particular, this function is used during the detection
647
 * of strongly connected components and we cannot ignore
648
 * conditional validity edges during this detection.
649
 */
650
static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
651
  struct isl_sched_node *src, struct isl_sched_node *dst)
652
397
{
653
397
  isl_bool r;
654
397
655
397
  r = graph_has_edge(graph, isl_edge_validity, src, dst);
656
397
  if (r < 0 || r)
657
126
    return r;
658
271
659
271
  return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
660
271
}
661
662
/* Perform all the required memory allocations for a schedule graph "graph"
663
 * with "n_node" nodes and "n_edge" edge and initialize the corresponding
664
 * fields.
665
 */
666
static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
667
  int n_node, int n_edge)
668
349
{
669
349
  int i;
670
349
671
349
  graph->n = n_node;
672
349
  graph->n_edge = n_edge;
673
349
  graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
674
349
  graph->sorted = isl_calloc_array(ctx, int, graph->n);
675
349
  graph->region = isl_alloc_array(ctx,
676
349
          struct isl_trivial_region, graph->n);
677
349
  graph->edge = isl_calloc_array(ctx,
678
349
          struct isl_sched_edge, graph->n_edge);
679
349
680
349
  graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
681
349
  graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
682
349
  graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
683
349
684
349
  if (!graph->node || !graph->region || (graph->n_edge && 
!graph->edge201
) ||
685
349
      !graph->sorted)
686
0
    return isl_stat_error;
687
349
688
858
  
for(i = 0; 349
i < graph->n;
++i509
)
689
509
    graph->sorted[i] = i;
690
349
691
349
  return isl_stat_ok;
692
349
}
693
694
/* Free the memory associated to node "node" in "graph".
695
 * The "coincident" field is shared by nodes in a graph and its subgraph.
696
 * It therefore only needs to be freed for the original dependence graph,
697
 * i.e., one that is not the result of splitting.
698
 */
699
static void clear_node(struct isl_sched_graph *graph,
700
  struct isl_sched_node *node)
701
507
{
702
507
  isl_space_free(node->space);
703
507
  isl_set_free(node->hull);
704
507
  isl_multi_aff_free(node->compress);
705
507
  isl_multi_aff_free(node->decompress);
706
507
  isl_mat_free(node->sched);
707
507
  isl_map_free(node->sched_map);
708
507
  isl_mat_free(node->indep);
709
507
  isl_mat_free(node->vmap);
710
507
  if (graph->root == graph)
711
283
    free(node->coincident);
712
507
  isl_multi_val_free(node->sizes);
713
507
  isl_basic_set_free(node->bounds);
714
507
  isl_vec_free(node->max);
715
507
}
716
717
static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
718
367
{
719
367
  int i;
720
367
721
367
  isl_map_to_basic_set_free(graph->intra_hmap);
722
367
  isl_map_to_basic_set_free(graph->intra_hmap_param);
723
367
  isl_map_to_basic_set_free(graph->inter_hmap);
724
367
725
367
  if (graph->node)
726
856
    
for (i = 0; 349
i < graph->n;
++i507
)
727
507
      clear_node(graph, &graph->node[i]);
728
367
  free(graph->node);
729
367
  free(graph->sorted);
730
367
  if (graph->edge)
731
699
    
for (i = 0; 349
i < graph->n_edge;
++i350
) {
732
350
      isl_map_free(graph->edge[i].map);
733
350
      isl_union_map_free(graph->edge[i].tagged_condition);
734
350
      isl_union_map_free(graph->edge[i].tagged_validity);
735
350
    }
736
367
  free(graph->edge);
737
367
  free(graph->region);
738
2.20k
  for (i = 0; i <= isl_edge_last; 
++i1.83k
)
739
1.83k
    isl_hash_table_free(ctx, graph->edge_table[i]);
740
367
  isl_hash_table_free(ctx, graph->node_table);
741
367
  isl_basic_set_free(graph->lp);
742
367
}
743
744
/* For each "set" on which this function is called, increment
745
 * graph->n by one and update graph->maxvar.
746
 */
747
static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
748
285
{
749
285
  struct isl_sched_graph *graph = user;
750
285
  int nvar = isl_set_dim(set, isl_dim_set);
751
285
752
285
  graph->n++;
753
285
  if (nvar > graph->maxvar)
754
159
    graph->maxvar = nvar;
755
285
756
285
  isl_set_free(set);
757
285
758
285
  return isl_stat_ok;
759
285
}
760
761
/* Compute the number of rows that should be allocated for the schedule.
762
 * In particular, we need one row for each variable or one row
763
 * for each basic map in the dependences.
764
 * Note that it is practically impossible to exhaust both
765
 * the number of dependences and the number of variables.
766
 */
767
static isl_stat compute_max_row(struct isl_sched_graph *graph,
768
  __isl_keep isl_schedule_constraints *sc)
769
155
{
770
155
  int n_edge;
771
155
  isl_stat r;
772
155
  isl_union_set *domain;
773
155
774
155
  graph->n = 0;
775
155
  graph->maxvar = 0;
776
155
  domain = isl_schedule_constraints_get_domain(sc);
777
155
  r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
778
155
  isl_union_set_free(domain);
779
155
  if (r < 0)
780
0
    return isl_stat_error;
781
155
  n_edge = isl_schedule_constraints_n_basic_map(sc);
782
155
  if (n_edge < 0)
783
0
    return isl_stat_error;
784
155
  graph->max_row = n_edge + graph->maxvar;
785
155
786
155
  return isl_stat_ok;
787
155
}
788
789
/* Does "bset" have any defining equalities for its set variables?
790
 */
791
static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
792
283
{
793
283
  int i, n;
794
283
795
283
  if (!bset)
796
0
    return isl_bool_error;
797
283
798
283
  n = isl_basic_set_dim(bset, isl_dim_set);
799
664
  for (i = 0; i < n; 
++i381
) {
800
389
    isl_bool has;
801
389
802
389
    has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
803
389
              NULL);
804
389
    if (has < 0 || has)
805
8
      return has;
806
389
  }
807
283
808
283
  
return isl_bool_false275
;
809
283
}
810
811
/* Set the entries of node->max to the value of the schedule_max_coefficient
812
 * option, if set.
813
 */
814
static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
815
4
{
816
4
  int max;
817
4
818
4
  max = isl_options_get_schedule_max_coefficient(ctx);
819
4
  if (max == -1)
820
4
    return isl_stat_ok;
821
0
822
0
  node->max = isl_vec_alloc(ctx, node->nvar);
823
0
  node->max = isl_vec_set_si(node->max, max);
824
0
  if (!node->max)
825
0
    return isl_stat_error;
826
0
827
0
  return isl_stat_ok;
828
0
}
829
830
/* Set the entries of node->max to the minimum of the schedule_max_coefficient
831
 * option (if set) and half of the minimum of the sizes in the other
832
 * dimensions.  Round up when computing the half such that
833
 * if the minimum of the sizes is one, half of the size is taken to be one
834
 * rather than zero.
835
 * If the global minimum is unbounded (i.e., if both
836
 * the schedule_max_coefficient is not set and the sizes in the other
837
 * dimensions are unbounded), then store a negative value.
838
 * If the schedule coefficient is close to the size of the instance set
839
 * in another dimension, then the schedule may represent a loop
840
 * coalescing transformation (especially if the coefficient
841
 * in that other dimension is one).  Forcing the coefficient to be
842
 * smaller than or equal to half the minimal size should avoid this
843
 * situation.
844
 */
845
static isl_stat compute_max_coefficient(isl_ctx *ctx,
846
  struct isl_sched_node *node)
847
279
{
848
279
  int max;
849
279
  int i, j;
850
279
  isl_vec *v;
851
279
852
279
  max = isl_options_get_schedule_max_coefficient(ctx);
853
279
  v = isl_vec_alloc(ctx, node->nvar);
854
279
  if (!v)
855
0
    return isl_stat_error;
856
279
857
650
  
for (i = 0; 279
i < node->nvar;
++i371
) {
858
371
    isl_int_set_si(v->el[i], max);
859
371
    isl_int_mul_si(v->el[i], v->el[i], 2);
860
371
  }
861
279
862
650
  for (i = 0; i < node->nvar; 
++i371
) {
863
371
    isl_val *size;
864
371
865
371
    size = isl_multi_val_get_val(node->sizes, i);
866
371
    if (!size)
867
0
      goto error;
868
371
    if (!isl_val_is_int(size)) {
869
205
      isl_val_free(size);
870
205
      continue;
871
205
    }
872
530
    
for (j = 0; 166
j < node->nvar;
++j364
) {
873
364
      if (j == i)
874
166
        continue;
875
198
      if (isl_int_is_neg(v->el[j]) ||
876
198
          
isl_int_gt177
(v->el[j], size->n))
877
198
        
isl_int_set23
(v->el[j], size->n);
878
198
    }
879
166
    isl_val_free(size);
880
166
  }
881
279
882
650
  
for (i = 0; 279
i < node->nvar;
++i371
)
883
371
    isl_int_cdiv_q_ui(v->el[i], v->el[i], 2);
884
279
885
279
  node->max = v;
886
279
  return isl_stat_ok;
887
0
error:
888
0
  isl_vec_free(v);
889
0
  return isl_stat_error;
890
279
}
891
892
/* Compute and return the size of "set" in dimension "dim".
893
 * The size is taken to be the difference in values for that variable
894
 * for fixed values of the other variables.
895
 * This assumes that "set" is convex.
896
 * In particular, the variable is first isolated from the other variables
897
 * in the range of a map
898
 *
899
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
900
 *
901
 * and then duplicated
902
 *
903
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
904
 *
905
 * The shared variables are then projected out and the maximal value
906
 * of i_dim' - i_dim is computed.
907
 */
908
static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
909
371
{
910
371
  isl_map *map;
911
371
  isl_local_space *ls;
912
371
  isl_aff *obj;
913
371
  isl_val *v;
914
371
915
371
  map = isl_set_project_onto_map(set, isl_dim_set, dim, 1);
916
371
  map = isl_map_project_out(map, isl_dim_in, dim, 1);
917
371
  map = isl_map_range_product(map, isl_map_copy(map));
918
371
  map = isl_set_unwrap(isl_map_range(map));
919
371
  set = isl_map_deltas(map);
920
371
  ls = isl_local_space_from_space(isl_set_get_space(set));
921
371
  obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
922
371
  v = isl_set_max_val(set, obj);
923
371
  isl_aff_free(obj);
924
371
  isl_set_free(set);
925
371
926
371
  return v;
927
371
}
928
929
/* Compute the size of the instance set "set" of "node", after compression,
930
 * as well as bounds on the corresponding coefficients, if needed.
931
 *
932
 * The sizes are needed when the schedule_treat_coalescing option is set.
933
 * The bounds are needed when the schedule_treat_coalescing option or
934
 * the schedule_max_coefficient option is set.
935
 *
936
 * If the schedule_treat_coalescing option is not set, then at most
937
 * the bounds need to be set and this is done in set_max_coefficient.
938
 * Otherwise, compress the domain if needed, compute the size
939
 * in each direction and store the results in node->size.
940
 * If the domain is not convex, then the sizes are computed
941
 * on a convex superset in order to avoid picking up sizes
942
 * that are valid for the individual disjuncts, but not for
943
 * the domain as a whole.
944
 * Finally, set the bounds on the coefficients based on the sizes
945
 * and the schedule_max_coefficient option in compute_max_coefficient.
946
 */
947
static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
948
  __isl_take isl_set *set)
949
283
{
950
283
  int j, n;
951
283
  isl_multi_val *mv;
952
283
953
283
  if (!isl_options_get_schedule_treat_coalescing(ctx)) {
954
4
    isl_set_free(set);
955
4
    return set_max_coefficient(ctx, node);
956
4
  }
957
279
958
279
  if (node->compressed)
959
8
    set = isl_set_preimage_multi_aff(set,
960
8
          isl_multi_aff_copy(node->decompress));
961
279
  set = isl_set_from_basic_set(isl_set_simple_hull(set));
962
279
  mv = isl_multi_val_zero(isl_set_get_space(set));
963
279
  n = isl_set_dim(set, isl_dim_set);
964
650
  for (j = 0; j < n; 
++j371
) {
965
371
    isl_val *v;
966
371
967
371
    v = compute_size(isl_set_copy(set), j);
968
371
    mv = isl_multi_val_set_val(mv, j, v);
969
371
  }
970
279
  node->sizes = mv;
971
279
  isl_set_free(set);
972
279
  if (!node->sizes)
973
0
    return isl_stat_error;
974
279
  return compute_max_coefficient(ctx, node);
975
279
}
976
977
/* Add a new node to the graph representing the given instance set.
978
 * "nvar" is the (possibly compressed) number of variables and
979
 * may be smaller than then number of set variables in "set"
980
 * if "compressed" is set.
981
 * If "compressed" is set, then "hull" represents the constraints
982
 * that were used to derive the compression, while "compress" and
983
 * "decompress" map the original space to the compressed space and
984
 * vice versa.
985
 * If "compressed" is not set, then "hull", "compress" and "decompress"
986
 * should be NULL.
987
 *
988
 * Compute the size of the instance set and bounds on the coefficients,
989
 * if needed.
990
 */
991
static isl_stat add_node(struct isl_sched_graph *graph,
992
  __isl_take isl_set *set, int nvar, int compressed,
993
  __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
994
  __isl_take isl_multi_aff *decompress)
995
283
{
996
283
  int nparam;
997
283
  isl_ctx *ctx;
998
283
  isl_mat *sched;
999
283
  isl_space *space;
1000
283
  int *coincident;
1001
283
  struct isl_sched_node *node;
1002
283
1003
283
  if (!set)
1004
0
    return isl_stat_error;
1005
283
1006
283
  ctx = isl_set_get_ctx(set);
1007
283
  nparam = isl_set_dim(set, isl_dim_param);
1008
283
  if (!ctx->opt->schedule_parametric)
1009
28
    nparam = 0;
1010
283
  sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
1011
283
  node = &graph->node[graph->n];
1012
283
  graph->n++;
1013
283
  space = isl_set_get_space(set);
1014
283
  node->space = space;
1015
283
  node->nvar = nvar;
1016
283
  node->nparam = nparam;
1017
283
  node->sched = sched;
1018
283
  node->sched_map = NULL;
1019
283
  coincident = isl_calloc_array(ctx, int, graph->max_row);
1020
283
  node->coincident = coincident;
1021
283
  node->compressed = compressed;
1022
283
  node->hull = hull;
1023
283
  node->compress = compress;
1024
283
  node->decompress = decompress;
1025
283
  if (compute_sizes_and_max(ctx, node, set) < 0)
1026
0
    return isl_stat_error;
1027
283
1028
283
  if (!space || !sched || (graph->max_row && 
!coincident281
))
1029
0
    return isl_stat_error;
1030
283
  if (compressed && 
(8
!hull8
||
!compress8
||
!decompress8
))
1031
0
    return isl_stat_error;
1032
283
1033
283
  return isl_stat_ok;
1034
283
}
1035
1036
/* Construct an identifier for node "node", which will represent "set".
1037
 * The name of the identifier is either "compressed" or
1038
 * "compressed_<name>", with <name> the name of the space of "set".
1039
 * The user pointer of the identifier points to "node".
1040
 */
1041
static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
1042
  struct isl_sched_node *node)
1043
8
{
1044
8
  isl_bool has_name;
1045
8
  isl_ctx *ctx;
1046
8
  isl_id *id;
1047
8
  isl_printer *p;
1048
8
  const char *name;
1049
8
  char *id_name;
1050
8
1051
8
  has_name = isl_set_has_tuple_name(set);
1052
8
  if (has_name < 0)
1053
0
    return NULL;
1054
8
1055
8
  ctx = isl_set_get_ctx(set);
1056
8
  if (!has_name)
1057
0
    return isl_id_alloc(ctx, "compressed", node);
1058
8
1059
8
  p = isl_printer_to_str(ctx);
1060
8
  name = isl_set_get_tuple_name(set);
1061
8
  p = isl_printer_print_str(p, "compressed_");
1062
8
  p = isl_printer_print_str(p, name);
1063
8
  id_name = isl_printer_get_str(p);
1064
8
  isl_printer_free(p);
1065
8
1066
8
  id = isl_id_alloc(ctx, id_name, node);
1067
8
  free(id_name);
1068
8
1069
8
  return id;
1070
8
}
1071
1072
/* Add a new node to the graph representing the given set.
1073
 *
1074
 * If any of the set variables is defined by an equality, then
1075
 * we perform variable compression such that we can perform
1076
 * the scheduling on the compressed domain.
1077
 * In this case, an identifier is used that references the new node
1078
 * such that each compressed space is unique and
1079
 * such that the node can be recovered from the compressed space.
1080
 */
1081
static isl_stat extract_node(__isl_take isl_set *set, void *user)
1082
283
{
1083
283
  int nvar;
1084
283
  isl_bool has_equality;
1085
283
  isl_id *id;
1086
283
  isl_basic_set *hull;
1087
283
  isl_set *hull_set;
1088
283
  isl_morph *morph;
1089
283
  isl_multi_aff *compress, *decompress;
1090
283
  struct isl_sched_graph *graph = user;
1091
283
1092
283
  hull = isl_set_affine_hull(isl_set_copy(set));
1093
283
  hull = isl_basic_set_remove_divs(hull);
1094
283
  nvar = isl_set_dim(set, isl_dim_set);
1095
283
  has_equality = has_any_defining_equality(hull);
1096
283
1097
283
  if (has_equality < 0)
1098
0
    goto error;
1099
283
  if (!has_equality) {
1100
275
    isl_basic_set_free(hull);
1101
275
    return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
1102
275
  }
1103
8
1104
8
  id = construct_compressed_id(set, &graph->node[graph->n]);
1105
8
  morph = isl_basic_set_variable_compression_with_id(hull,
1106
8
                  isl_dim_set, id);
1107
8
  isl_id_free(id);
1108
8
  nvar = isl_morph_ran_dim(morph, isl_dim_set);
1109
8
  compress = isl_morph_get_var_multi_aff(morph);
1110
8
  morph = isl_morph_inverse(morph);
1111
8
  decompress = isl_morph_get_var_multi_aff(morph);
1112
8
  isl_morph_free(morph);
1113
8
1114
8
  hull_set = isl_set_from_basic_set(hull);
1115
8
  return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
1116
0
error:
1117
0
  isl_basic_set_free(hull);
1118
0
  isl_set_free(set);
1119
0
  return isl_stat_error;
1120
8
}
1121
1122
struct isl_extract_edge_data {
1123
  enum isl_edge_type type;
1124
  struct isl_sched_graph *graph;
1125
};
1126
1127
/* Merge edge2 into edge1, freeing the contents of edge2.
1128
 * Return 0 on success and -1 on failure.
1129
 *
1130
 * edge1 and edge2 are assumed to have the same value for the map field.
1131
 */
1132
static int merge_edge(struct isl_sched_edge *edge1,
1133
  struct isl_sched_edge *edge2)
1134
397
{
1135
397
  edge1->types |= edge2->types;
1136
397
  isl_map_free(edge2->map);
1137
397
1138
397
  if (is_condition(edge2)) {
1139
23
    if (!edge1->tagged_condition)
1140
23
      edge1->tagged_condition = edge2->tagged_condition;
1141
0
    else
1142
0
      edge1->tagged_condition =
1143
0
        isl_union_map_union(edge1->tagged_condition,
1144
0
                edge2->tagged_condition);
1145
23
  }
1146
397
1147
397
  if (is_conditional_validity(edge2)) {
1148
3
    if (!edge1->tagged_validity)
1149
3
      edge1->tagged_validity = edge2->tagged_validity;
1150
0
    else
1151
0
      edge1->tagged_validity =
1152
0
        isl_union_map_union(edge1->tagged_validity,
1153
0
                edge2->tagged_validity);
1154
3
  }
1155
397
1156
397
  if (is_condition(edge2) && 
!edge1->tagged_condition23
)
1157
0
    return -1;
1158
397
  if (is_conditional_validity(edge2) && 
!edge1->tagged_validity3
)
1159
0
    return -1;
1160
397
1161
397
  return 0;
1162
397
}
1163
1164
/* Insert dummy tags in domain and range of "map".
1165
 *
1166
 * In particular, if "map" is of the form
1167
 *
1168
 *  A -> B
1169
 *
1170
 * then return
1171
 *
1172
 *  [A -> dummy_tag] -> [B -> dummy_tag]
1173
 *
1174
 * where the dummy_tags are identical and equal to any dummy tags
1175
 * introduced by any other call to this function.
1176
 */
1177
static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1178
12
{
1179
12
  static char dummy;
1180
12
  isl_ctx *ctx;
1181
12
  isl_id *id;
1182
12
  isl_space *space;
1183
12
  isl_set *domain, *range;
1184
12
1185
12
  ctx = isl_map_get_ctx(map);
1186
12
1187
12
  id = isl_id_alloc(ctx, NULL, &dummy);
1188
12
  space = isl_space_params(isl_map_get_space(map));
1189
12
  space = isl_space_set_from_params(space);
1190
12
  space = isl_space_set_tuple_id(space, isl_dim_set, id);
1191
12
  space = isl_space_map_from_set(space);
1192
12
1193
12
  domain = isl_map_wrap(map);
1194
12
  range = isl_map_wrap(isl_map_universe(space));
1195
12
  map = isl_map_from_domain_and_range(domain, range);
1196
12
  map = isl_map_zip(map);
1197
12
1198
12
  return map;
1199
12
}
1200
1201
/* Given that at least one of "src" or "dst" is compressed, return
1202
 * a map between the spaces of these nodes restricted to the affine
1203
 * hull that was used in the compression.
1204
 */
1205
static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1206
  struct isl_sched_node *dst)
1207
22
{
1208
22
  isl_set *dom, *ran;
1209
22
1210
22
  if (src->compressed)
1211
14
    dom = isl_set_copy(src->hull);
1212
8
  else
1213
8
    dom = isl_set_universe(isl_space_copy(src->space));
1214
22
  if (dst->compressed)
1215
14
    ran = isl_set_copy(dst->hull);
1216
8
  else
1217
8
    ran = isl_set_universe(isl_space_copy(dst->space));
1218
22
1219
22
  return isl_map_from_domain_and_range(dom, ran);
1220
22
}
1221
1222
/* Intersect the domains of the nested relations in domain and range
1223
 * of "tagged" with "map".
1224
 */
1225
static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1226
  __isl_keep isl_map *map)
1227
14
{
1228
14
  isl_set *set;
1229
14
1230
14
  tagged = isl_map_zip(tagged);
1231
14
  set = isl_map_wrap(isl_map_copy(map));
1232
14
  tagged = isl_map_intersect_domain(tagged, set);
1233
14
  tagged = isl_map_zip(tagged);
1234
14
  return tagged;
1235
14
}
1236
1237
/* Return a pointer to the node that lives in the domain space of "map",
1238
 * an invalid node if there is no such node, or NULL in case of error.
1239
 */
1240
static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1241
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1242
690
{
1243
690
  struct isl_sched_node *node;
1244
690
  isl_space *space;
1245
690
1246
690
  space = isl_space_domain(isl_map_get_space(map));
1247
690
  node = graph_find_node(ctx, graph, space);
1248
690
  isl_space_free(space);
1249
690
1250
690
  return node;
1251
690
}
1252
1253
/* Return a pointer to the node that lives in the range space of "map",
1254
 * an invalid node if there is no such node, or NULL in case of error.
1255
 */
1256
static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1257
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1258
690
{
1259
690
  struct isl_sched_node *node;
1260
690
  isl_space *space;
1261
690
1262
690
  space = isl_space_range(isl_map_get_space(map));
1263
690
  node = graph_find_node(ctx, graph, space);
1264
690
  isl_space_free(space);
1265
690
1266
690
  return node;
1267
690
}
1268
1269
/* Refrain from adding a new edge based on "map".
1270
 * Instead, just free the map.
1271
 * "tagged" is either a copy of "map" with additional tags or NULL.
1272
 */
1273
static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged)
1274
24
{
1275
24
  isl_map_free(map);
1276
24
  isl_map_free(tagged);
1277
24
1278
24
  return isl_stat_ok;
1279
24
}
1280
1281
/* Add a new edge to the graph based on the given map
1282
 * and add it to data->graph->edge_table[data->type].
1283
 * If a dependence relation of a given type happens to be identical
1284
 * to one of the dependence relations of a type that was added before,
1285
 * then we don't create a new edge, but instead mark the original edge
1286
 * as also representing a dependence of the current type.
1287
 *
1288
 * Edges of type isl_edge_condition or isl_edge_conditional_validity
1289
 * may be specified as "tagged" dependence relations.  That is, "map"
1290
 * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1291
 * the dependence on iterations and a and b are tags.
1292
 * edge->map is set to the relation containing the elements i -> j,
1293
 * while edge->tagged_condition and edge->tagged_validity contain
1294
 * the union of all the "map" relations
1295
 * for which extract_edge is called that result in the same edge->map.
1296
 *
1297
 * If the source or the destination node is compressed, then
1298
 * intersect both "map" and "tagged" with the constraints that
1299
 * were used to construct the compression.
1300
 * This ensures that there are no schedule constraints defined
1301
 * outside of these domains, while the scheduler no longer has
1302
 * any control over those outside parts.
1303
 */
1304
static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1305
690
{
1306
690
  isl_bool empty;
1307
690
  isl_ctx *ctx = isl_map_get_ctx(map);
1308
690
  struct isl_extract_edge_data *data = user;
1309
690
  struct isl_sched_graph *graph = data->graph;
1310
690
  struct isl_sched_node *src, *dst;
1311
690
  struct isl_sched_edge *edge;
1312
690
  isl_map *tagged = NULL;
1313
690
1314
690
  if (data->type == isl_edge_condition ||
1315
690
      
data->type == isl_edge_conditional_validity661
) {
1316
61
    if (isl_map_can_zip(map)) {
1317
49
      tagged = isl_map_copy(map);
1318
49
      map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1319
49
    } else {
1320
12
      tagged = insert_dummy_tags(isl_map_copy(map));
1321
12
    }
1322
61
  }
1323
690
1324
690
  src = find_domain_node(ctx, graph, map);
1325
690
  dst = find_range_node(ctx, graph, map);
1326
690
1327
690
  if (!src || !dst)
1328
0
    goto error;
1329
690
  if (!is_node(graph, src) || !is_node(graph, dst))
1330
24
    return skip_edge(map, tagged);
1331
666
1332
666
  if (src->compressed || 
dst->compressed652
) {
1333
22
    isl_map *hull;
1334
22
    hull = extract_hull(src, dst);
1335
22
    if (tagged)
1336
14
      tagged = map_intersect_domains(tagged, hull);
1337
22
    map = isl_map_intersect(map, hull);
1338
22
  }
1339
666
1340
666
  empty = isl_map_plain_is_empty(map);
1341
666
  if (empty < 0)
1342
0
    goto error;
1343
666
  if (empty)
1344
0
    return skip_edge(map, tagged);
1345
666
1346
666
  graph->edge[graph->n_edge].src = src;
1347
666
  graph->edge[graph->n_edge].dst = dst;
1348
666
  graph->edge[graph->n_edge].map = map;
1349
666
  graph->edge[graph->n_edge].types = 0;
1350
666
  graph->edge[graph->n_edge].tagged_condition = NULL;
1351
666
  graph->edge[graph->n_edge].tagged_validity = NULL;
1352
666
  set_type(&graph->edge[graph->n_edge], data->type);
1353
666
  if (data->type == isl_edge_condition)
1354
29
    graph->edge[graph->n_edge].tagged_condition =
1355
29
          isl_union_map_from_map(tagged);
1356
666
  if (data->type == isl_edge_conditional_validity)
1357
32
    graph->edge[graph->n_edge].tagged_validity =
1358
32
          isl_union_map_from_map(tagged);
1359
666
1360
666
  edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1361
666
  if (!edge) {
1362
0
    graph->n_edge++;
1363
0
    return isl_stat_error;
1364
0
  }
1365
666
  if (edge == &graph->edge[graph->n_edge])
1366
269
    return graph_edge_table_add(ctx, graph, data->type,
1367
269
            &graph->edge[graph->n_edge++]);
1368
397
1369
397
  if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0)
1370
0
    return isl_stat_error;
1371
397
1372
397
  return graph_edge_table_add(ctx, graph, data->type, edge);
1373
0
error:
1374
0
  isl_map_free(map);
1375
0
  isl_map_free(tagged);
1376
0
  return isl_stat_error;
1377
397
}
1378
1379
/* Initialize the schedule graph "graph" from the schedule constraints "sc".
1380
 *
1381
 * The context is included in the domain before the nodes of
1382
 * the graphs are extracted in order to be able to exploit
1383
 * any possible additional equalities.
1384
 * Note that this intersection is only performed locally here.
1385
 */
1386
static isl_stat graph_init(struct isl_sched_graph *graph,
1387
  __isl_keep isl_schedule_constraints *sc)
1388
155
{
1389
155
  isl_ctx *ctx;
1390
155
  isl_union_set *domain;
1391
155
  isl_union_map *c;
1392
155
  struct isl_extract_edge_data data;
1393
155
  enum isl_edge_type i;
1394
155
  isl_stat r;
1395
155
1396
155
  if (!sc)
1397
0
    return isl_stat_error;
1398
155
1399
155
  ctx = isl_schedule_constraints_get_ctx(sc);
1400
155
1401
155
  domain = isl_schedule_constraints_get_domain(sc);
1402
155
  graph->n = isl_union_set_n_set(domain);
1403
155
  isl_union_set_free(domain);
1404
155
1405
155
  if (graph_alloc(ctx, graph, graph->n,
1406
155
      isl_schedule_constraints_n_map(sc)) < 0)
1407
0
    return isl_stat_error;
1408
155
1409
155
  if (compute_max_row(graph, sc) < 0)
1410
0
    return isl_stat_error;
1411
155
  graph->root = graph;
1412
155
  graph->n = 0;
1413
155
  domain = isl_schedule_constraints_get_domain(sc);
1414
155
  domain = isl_union_set_intersect_params(domain,
1415
155
            isl_schedule_constraints_get_context(sc));
1416
155
  r = isl_union_set_foreach_set(domain, &extract_node, graph);
1417
155
  isl_union_set_free(domain);
1418
155
  if (r < 0)
1419
0
    return isl_stat_error;
1420
155
  if (graph_init_table(ctx, graph) < 0)
1421
0
    return isl_stat_error;
1422
930
  
for (i = isl_edge_first; 155
i <= isl_edge_last;
++i775
) {
1423
775
    c = isl_schedule_constraints_get(sc, i);
1424
775
    graph->max_edge[i] = isl_union_map_n_map(c);
1425
775
    isl_union_map_free(c);
1426
775
    if (!c)
1427
0
      return isl_stat_error;
1428
775
  }
1429
155
  if (graph_init_edge_tables(ctx, graph) < 0)
1430
0
    return isl_stat_error;
1431
155
  graph->n_edge = 0;
1432
155
  data.graph = graph;
1433
930
  for (i = isl_edge_first; i <= isl_edge_last; 
++i775
) {
1434
775
    isl_stat r;
1435
775
1436
775
    data.type = i;
1437
775
    c = isl_schedule_constraints_get(sc, i);
1438
775
    r = isl_union_map_foreach_map(c, &extract_edge, &data);
1439
775
    isl_union_map_free(c);
1440
775
    if (r < 0)
1441
0
      return isl_stat_error;
1442
775
  }
1443
155
1444
155
  return isl_stat_ok;
1445
155
}
1446
1447
/* Check whether there is any dependence from node[j] to node[i]
1448
 * or from node[i] to node[j].
1449
 */
1450
static isl_bool node_follows_weak(int i, int j, void *user)
1451
323
{
1452
323
  isl_bool f;
1453
323
  struct isl_sched_graph *graph = user;
1454
323
1455
323
  f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
1456
323
  if (f < 0 || f)
1457
149
    return f;
1458
174
  return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
1459
174
}
1460
1461
/* Check whether there is a (conditional) validity dependence from node[j]
1462
 * to node[i], forcing node[i] to follow node[j].
1463
 */
1464
static isl_bool node_follows_strong(int i, int j, void *user)
1465
313
{
1466
313
  struct isl_sched_graph *graph = user;
1467
313
1468
313
  return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
1469
313
}
1470
1471
/* Use Tarjan's algorithm for computing the strongly connected components
1472
 * in the dependence graph only considering those edges defined by "follows".
1473
 */
1474
static isl_stat detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
1475
  isl_bool (*follows)(int i, int j, void *user))
1476
666
{
1477
666
  int i, n;
1478
666
  struct isl_tarjan_graph *g = NULL;
1479
666
1480
666
  g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
1481
666
  if (!g)
1482
0
    return isl_stat_error;
1483
666
1484
666
  graph->scc = 0;
1485
666
  i = 0;
1486
666
  n = graph->n;
1487
1.51k
  while (n) {
1488
1.87k
    while (g->order[i] != -1) {
1489
1.02k
      graph->node[g->order[i]].scc = graph->scc;
1490
1.02k
      --n;
1491
1.02k
      ++i;
1492
1.02k
    }
1493
848
    ++i;
1494
848
    graph->scc++;
1495
848
  }
1496
666
1497
666
  isl_tarjan_graph_free(g);
1498
666
1499
666
  return isl_stat_ok;
1500
666
}
1501
1502
/* Apply Tarjan's algorithm to detect the strongly connected components
1503
 * in the dependence graph.
1504
 * Only consider the (conditional) validity dependences and clear "weak".
1505
 */
1506
static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1507
434
{
1508
434
  graph->weak = 0;
1509
434
  return detect_ccs(ctx, graph, &node_follows_strong);
1510
434
}
1511
1512
/* Apply Tarjan's algorithm to detect the (weakly) connected components
1513
 * in the dependence graph.
1514
 * Consider all dependences and set "weak".
1515
 */
1516
static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1517
203
{
1518
203
  graph->weak = 1;
1519
203
  return detect_ccs(ctx, graph, &node_follows_weak);
1520
203
}
1521
1522
static int cmp_scc(const void *a, const void *b, void *data)
1523
125
{
1524
125
  struct isl_sched_graph *graph = data;
1525
125
  const int *i1 = a;
1526
125
  const int *i2 = b;
1527
125
1528
125
  return graph->node[*i1].scc - graph->node[*i2].scc;
1529
125
}
1530
1531
/* Sort the elements of graph->sorted according to the corresponding SCCs.
1532
 */
1533
static int sort_sccs(struct isl_sched_graph *graph)
1534
385
{
1535
385
  return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
1536
385
}
1537
1538
/* Return a non-parametric set in the compressed space of "node" that is
1539
 * bounded by the size in each direction
1540
 *
1541
 *  { [x] : -S_i <= x_i <= S_i }
1542
 *
1543
 * If S_i is infinity in direction i, then there are no constraints
1544
 * in that direction.
1545
 *
1546
 * Cache the result in node->bounds.
1547
 */
1548
static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node)
1549
129
{
1550
129
  isl_space *space;
1551
129
  isl_basic_set *bounds;
1552
129
  int i;
1553
129
  unsigned nparam;
1554
129
1555
129
  if (node->bounds)
1556
32
    return isl_basic_set_copy(node->bounds);
1557
97
1558
97
  if (node->compressed)
1559
0
    space = isl_multi_aff_get_domain_space(node->decompress);
1560
97
  else
1561
97
    space = isl_space_copy(node->space);
1562
97
  nparam = isl_space_dim(space, isl_dim_param);
1563
97
  space = isl_space_drop_dims(space, isl_dim_param, 0, nparam);
1564
97
  bounds = isl_basic_set_universe(space);
1565
97
1566
288
  for (i = 0; i < node->nvar; 
++i191
) {
1567
191
    isl_val *size;
1568
191
1569
191
    size = isl_multi_val_get_val(node->sizes, i);
1570
191
    if (!size)
1571
0
      return isl_basic_set_free(bounds);
1572
191
    if (!isl_val_is_int(size)) {
1573
123
      isl_val_free(size);
1574
123
      continue;
1575
123
    }
1576
68
    bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i,
1577
68
              isl_val_copy(size));
1578
68
    bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i,
1579
68
              isl_val_neg(size));
1580
68
  }
1581
97
1582
97
  node->bounds = isl_basic_set_copy(bounds);
1583
97
  return bounds;
1584
97
}
1585
1586
/* Drop some constraints from "delta" that could be exploited
1587
 * to construct loop coalescing schedules.
1588
 * In particular, drop those constraint that bound the difference
1589
 * to the size of the domain.
1590
 * First project out the parameters to improve the effectiveness.
1591
 */
1592
static __isl_give isl_set *drop_coalescing_constraints(
1593
  __isl_take isl_set *delta, struct isl_sched_node *node)
1594
105
{
1595
105
  unsigned nparam;
1596
105
  isl_basic_set *bounds;
1597
105
1598
105
  bounds = get_size_bounds(node);
1599
105
1600
105
  nparam = isl_set_dim(delta, isl_dim_param);
1601
105
  delta = isl_set_project_out(delta, isl_dim_param, 0, nparam);
1602
105
  delta = isl_set_remove_divs(delta);
1603
105
  delta = isl_set_plain_gist_basic_set(delta, bounds);
1604
105
  return delta;
1605
105
}
1606
1607
/* Given a dependence relation R from "node" to itself,
1608
 * construct the set of coefficients of valid constraints for elements
1609
 * in that dependence relation.
1610
 * In particular, the result contains tuples of coefficients
1611
 * c_0, c_n, c_x such that
1612
 *
1613
 *  c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
1614
 *
1615
 * or, equivalently,
1616
 *
1617
 *  c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
1618
 *
1619
 * We choose here to compute the dual of delta R.
1620
 * Alternatively, we could have computed the dual of R, resulting
1621
 * in a set of tuples c_0, c_n, c_x, c_y, and then
1622
 * plugged in (c_0, c_n, c_x, -c_x).
1623
 *
1624
 * If "need_param" is set, then the resulting coefficients effectively
1625
 * include coefficients for the parameters c_n.  Otherwise, they may
1626
 * have been projected out already.
1627
 * Since the constraints may be different for these two cases,
1628
 * they are stored in separate caches.
1629
 * In particular, if no parameter coefficients are required and
1630
 * the schedule_treat_coalescing option is set, then the parameters
1631
 * are projected out and some constraints that could be exploited
1632
 * to construct coalescing schedules are removed before the dual
1633
 * is computed.
1634
 *
1635
 * If "node" has been compressed, then the dependence relation
1636
 * is also compressed before the set of coefficients is computed.
1637
 */
1638
static __isl_give isl_basic_set *intra_coefficients(
1639
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1640
  __isl_take isl_map *map, int need_param)
1641
1.00k
{
1642
1.00k
  isl_ctx *ctx;
1643
1.00k
  isl_set *delta;
1644
1.00k
  isl_map *key;
1645
1.00k
  isl_basic_set *coef;
1646
1.00k
  isl_maybe_isl_basic_set m;
1647
1.00k
  isl_map_to_basic_set **hmap = &graph->intra_hmap;
1648
1.00k
  int treat;
1649
1.00k
1650
1.00k
  if (!map)
1651
0
    return NULL;
1652
1.00k
1653
1.00k
  ctx = isl_map_get_ctx(map);
1654
1.00k
  treat = !need_param && 
isl_options_get_schedule_treat_coalescing(ctx)716
;
1655
1.00k
  if (!treat)
1656
318
    hmap = &graph->intra_hmap_param;
1657
1.00k
  m = isl_map_to_basic_set_try_get(*hmap, map);
1658
1.00k
  if (m.valid < 0 || m.valid) {
1659
804
    isl_map_free(map);
1660
804
    return m.value;
1661
804
  }
1662
198
1663
198
  key = isl_map_copy(map);
1664
198
  if (node->compressed) {
1665
0
    map = isl_map_preimage_domain_multi_aff(map,
1666
0
            isl_multi_aff_copy(node->decompress));
1667
0
    map = isl_map_preimage_range_multi_aff(map,
1668
0
            isl_multi_aff_copy(node->decompress));
1669
0
  }
1670
198
  delta = isl_map_deltas(map);
1671
198
  if (treat)
1672
105
    delta = drop_coalescing_constraints(delta, node);
1673
198
  delta = isl_set_remove_divs(delta);
1674
198
  coef = isl_set_coefficients(delta);
1675
198
  *hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef));
1676
198
1677
198
  return coef;
1678
198
}
1679
1680
/* Given a dependence relation R, construct the set of coefficients
1681
 * of valid constraints for elements in that dependence relation.
1682
 * In particular, the result contains tuples of coefficients
1683
 * c_0, c_n, c_x, c_y such that
1684
 *
1685
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
1686
 *
1687
 * If the source or destination nodes of "edge" have been compressed,
1688
 * then the dependence relation is also compressed before
1689
 * the set of coefficients is computed.
1690
 */
1691
static __isl_give isl_basic_set *inter_coefficients(
1692
  struct isl_sched_graph *graph, struct isl_sched_edge *edge,
1693
  __isl_take isl_map *map)
1694
616
{
1695
616
  isl_set *set;
1696
616
  isl_map *key;
1697
616
  isl_basic_set *coef;
1698
616
  isl_maybe_isl_basic_set m;
1699
616
1700
616
  m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
1701
616
  if (m.valid < 0 || m.valid) {
1702
529
    isl_map_free(map);
1703
529
    return m.value;
1704
529
  }
1705
87
1706
87
  key = isl_map_copy(map);
1707
87
  if (edge->src->compressed)
1708
3
    map = isl_map_preimage_domain_multi_aff(map,
1709
3
            isl_multi_aff_copy(edge->src->decompress));
1710
87
  if (edge->dst->compressed)
1711
3
    map = isl_map_preimage_range_multi_aff(map,
1712
3
            isl_multi_aff_copy(edge->dst->decompress));
1713
87
  set = isl_map_wrap(isl_map_remove_divs(map));
1714
87
  coef = isl_set_coefficients(set);
1715
87
  graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
1716
87
          isl_basic_set_copy(coef));
1717
87
1718
87
  return coef;
1719
87
}
1720
1721
/* Return the position of the coefficients of the variables in
1722
 * the coefficients constraints "coef".
1723
 *
1724
 * The space of "coef" is of the form
1725
 *
1726
 *  { coefficients[[cst, params] -> S] }
1727
 *
1728
 * Return the position of S.
1729
 */
1730
static int coef_var_offset(__isl_keep isl_basic_set *coef)
1731
1.04k
{
1732
1.04k
  int offset;
1733
1.04k
  isl_space *space;
1734
1.04k
1735
1.04k
  space = isl_space_unwrap(isl_basic_set_get_space(coef));
1736
1.04k
  offset = isl_space_dim(space, isl_dim_in);
1737
1.04k
  isl_space_free(space);
1738
1.04k
1739
1.04k
  return offset;
1740
1.04k
}
1741
1742
/* Return the offset of the coefficient of the constant term of "node"
1743
 * within the (I)LP.
1744
 *
1745
 * Within each node, the coefficients have the following order:
1746
 *  - positive and negative parts of c_i_x
1747
 *  - c_i_n (if parametric)
1748
 *  - c_i_0
1749
 */
1750
static int node_cst_coef_offset(struct isl_sched_node *node)
1751
1.46k
{
1752
1.46k
  return node->start + 2 * node->nvar + node->nparam;
1753
1.46k
}
1754
1755
/* Return the offset of the coefficients of the parameters of "node"
1756
 * within the (I)LP.
1757
 *
1758
 * Within each node, the coefficients have the following order:
1759
 *  - positive and negative parts of c_i_x
1760
 *  - c_i_n (if parametric)
1761
 *  - c_i_0
1762
 */
1763
static int node_par_coef_offset(struct isl_sched_node *node)
1764
1.93k
{
1765
1.93k
  return node->start + 2 * node->nvar;
1766
1.93k
}
1767
1768
/* Return the offset of the coefficients of the variables of "node"
1769
 * within the (I)LP.
1770
 *
1771
 * Within each node, the coefficients have the following order:
1772
 *  - positive and negative parts of c_i_x
1773
 *  - c_i_n (if parametric)
1774
 *  - c_i_0
1775
 */
1776
static int node_var_coef_offset(struct isl_sched_node *node)
1777
4.18k
{
1778
4.18k
  return node->start;
1779
4.18k
}
1780
1781
/* Return the position of the pair of variables encoding
1782
 * coefficient "i" of "node".
1783
 *
1784
 * The order of these variable pairs is the opposite of
1785
 * that of the coefficients, with 2 variables per coefficient.
1786
 */
1787
static int node_var_coef_pos(struct isl_sched_node *node, int i)
1788
2.46k
{
1789
2.46k
  return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i);
1790
2.46k
}
1791
1792
/* Construct an isl_dim_map for mapping constraints on coefficients
1793
 * for "node" to the corresponding positions in graph->lp.
1794
 * "offset" is the offset of the coefficients for the variables
1795
 * in the input constraints.
1796
 * "s" is the sign of the mapping.
1797
 *
1798
 * The input constraints are given in terms of the coefficients
1799
 * (c_0, c_x) or (c_0, c_n, c_x).
1800
 * The mapping produced by this function essentially plugs in
1801
 * (0, c_i_x^+ - c_i_x^-) if s = 1 and
1802
 * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
1803
 * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
1804
 * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
1805
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1806
 * Furthermore, the order of these pairs is the opposite of that
1807
 * of the corresponding coefficients.
1808
 *
1809
 * The caller can extend the mapping to also map the other coefficients
1810
 * (and therefore not plug in 0).
1811
 */
1812
static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
1813
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1814
  int offset, int s)
1815
622
{
1816
622
  int pos;
1817
622
  unsigned total;
1818
622
  isl_dim_map *dim_map;
1819
622
1820
622
  if (!node || !graph->lp)
1821
0
    return NULL;
1822
622
1823
622
  total = isl_basic_set_total_dim(graph->lp);
1824
622
  pos = node_var_coef_pos(node, 0);
1825
622
  dim_map = isl_dim_map_alloc(ctx, total);
1826
622
  isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s);
1827
622
  isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s);
1828
622
1829
622
  return dim_map;
1830
622
}
1831
1832
/* Construct an isl_dim_map for mapping constraints on coefficients
1833
 * for "src" (node i) and "dst" (node j) to the corresponding positions
1834
 * in graph->lp.
1835
 * "offset" is the offset of the coefficients for the variables of "src"
1836
 * in the input constraints.
1837
 * "s" is the sign of the mapping.
1838
 *
1839
 * The input constraints are given in terms of the coefficients
1840
 * (c_0, c_n, c_x, c_y).
1841
 * The mapping produced by this function essentially plugs in
1842
 * (c_j_0 - c_i_0, c_j_n - c_i_n,
1843
 *  -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
1844
 * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
1845
 *  c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
1846
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1847
 * Furthermore, the order of these pairs is the opposite of that
1848
 * of the corresponding coefficients.
1849
 *
1850
 * The caller can further extend the mapping.
1851
 */
1852
static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
1853
  struct isl_sched_graph *graph, struct isl_sched_node *src,
1854
  struct isl_sched_node *dst, int offset, int s)
1855
424
{
1856
424
  int pos;
1857
424
  unsigned total;
1858
424
  isl_dim_map *dim_map;
1859
424
1860
424
  if (!src || !dst || !graph->lp)
1861
0
    return NULL;
1862
424
1863
424
  total = isl_basic_set_total_dim(graph->lp);
1864
424
  dim_map = isl_dim_map_alloc(ctx, total);
1865
424
1866
424
  pos = node_cst_coef_offset(dst);
1867
424
  isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s);
1868
424
  pos = node_par_coef_offset(dst);
1869
424
  isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s);
1870
424
  pos = node_var_coef_pos(dst, 0);
1871
424
  isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1,
1872
424
        dst->nvar, -s);
1873
424
  isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1,
1874
424
        dst->nvar, s);
1875
424
1876
424
  pos = node_cst_coef_offset(src);
1877
424
  isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s);
1878
424
  pos = node_par_coef_offset(src);
1879
424
  isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s);
1880
424
  pos = node_var_coef_pos(src, 0);
1881
424
  isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s);
1882
424
  isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s);
1883
424
1884
424
  return dim_map;
1885
424
}
1886
1887
/* Add the constraints from "src" to "dst" using "dim_map",
1888
 * after making sure there is enough room in "dst" for the extra constraints.
1889
 */
1890
static __isl_give isl_basic_set *add_constraints_dim_map(
1891
  __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
1892
  __isl_take isl_dim_map *dim_map)
1893
1.04k
{
1894
1.04k
  int n_eq, n_ineq;
1895
1.04k
1896
1.04k
  n_eq = isl_basic_set_n_equality(src);
1897
1.04k
  n_ineq = isl_basic_set_n_inequality(src);
1898
1.04k
  dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
1899
1.04k
  dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
1900
1.04k
  return dst;
1901
1.04k
}
1902
1903
/* Add constraints to graph->lp that force validity for the given
1904
 * dependence from a node i to itself.
1905
 * That is, add constraints that enforce
1906
 *
1907
 *  (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
1908
 *  = c_i_x (y - x) >= 0
1909
 *
1910
 * for each (x,y) in R.
1911
 * We obtain general constraints on coefficients (c_0, c_x)
1912
 * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
1913
 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
1914
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1915
 * Note that the result of intra_coefficients may also contain
1916
 * parameter coefficients c_n, in which case 0 is plugged in for them as well.
1917
 */
1918
static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
1919
  struct isl_sched_edge *edge)
1920
287
{
1921
287
  int offset;
1922
287
  isl_map *map = isl_map_copy(edge->map);
1923
287
  isl_ctx *ctx = isl_map_get_ctx(map);
1924
287
  isl_dim_map *dim_map;
1925
287
  isl_basic_set *coef;
1926
287
  struct isl_sched_node *node = edge->src;
1927
287
1928
287
  coef = intra_coefficients(graph, node, map, 0);
1929
287
1930
287
  offset = coef_var_offset(coef);
1931
287
1932
287
  if (!coef)
1933
0
    return isl_stat_error;
1934
287
1935
287
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
1936
287
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1937
287
1938
287
  return isl_stat_ok;
1939
287
}
1940
1941
/* Add constraints to graph->lp that force validity for the given
1942
 * dependence from node i to node j.
1943
 * That is, add constraints that enforce
1944
 *
1945
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
1946
 *
1947
 * for each (x,y) in R.
1948
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1949
 * of valid constraints for R and then plug in
1950
 * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
1951
 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
1952
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1953
 */
1954
static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
1955
  struct isl_sched_edge *edge)
1956
201
{
1957
201
  int offset;
1958
201
  isl_map *map;
1959
201
  isl_ctx *ctx;
1960
201
  isl_dim_map *dim_map;
1961
201
  isl_basic_set *coef;
1962
201
  struct isl_sched_node *src = edge->src;
1963
201
  struct isl_sched_node *dst = edge->dst;
1964
201
1965
201
  if (!graph->lp)
1966
0
    return isl_stat_error;
1967
201
1968
201
  map = isl_map_copy(edge->map);
1969
201
  ctx = isl_map_get_ctx(map);
1970
201
  coef = inter_coefficients(graph, edge, map);
1971
201
1972
201
  offset = coef_var_offset(coef);
1973
201
1974
201
  if (!coef)
1975
0
    return isl_stat_error;
1976
201
1977
201
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
1978
201
1979
201
  edge->start = graph->lp->n_ineq;
1980
201
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1981
201
  if (!graph->lp)
1982
0
    return isl_stat_error;
1983
201
  edge->end = graph->lp->n_ineq;
1984
201
1985
201
  return isl_stat_ok;
1986
201
}
1987
1988
/* Add constraints to graph->lp that bound the dependence distance for the given
1989
 * dependence from a node i to itself.
1990
 * If s = 1, we add the constraint
1991
 *
1992
 *  c_i_x (y - x) <= m_0 + m_n n
1993
 *
1994
 * or
1995
 *
1996
 *  -c_i_x (y - x) + m_0 + m_n n >= 0
1997
 *
1998
 * for each (x,y) in R.
1999
 * If s = -1, we add the constraint
2000
 *
2001
 *  -c_i_x (y - x) <= m_0 + m_n n
2002
 *
2003
 * or
2004
 *
2005
 *  c_i_x (y - x) + m_0 + m_n n >= 0
2006
 *
2007
 * for each (x,y) in R.
2008
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
2009
 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
2010
 * with each coefficient (except m_0) represented as a pair of non-negative
2011
 * coefficients.
2012
 *
2013
 *
2014
 * If "local" is set, then we add constraints
2015
 *
2016
 *  c_i_x (y - x) <= 0
2017
 *
2018
 * or
2019
 *
2020
 *  -c_i_x (y - x) <= 0
2021
 *
2022
 * instead, forcing the dependence distance to be (less than or) equal to 0.
2023
 * That is, we plug in (0, 0, -s * c_i_x),
2024
 * intra_coefficients is not required to have c_n in its result when
2025
 * "local" is set.  If they are missing, then (0, -s * c_i_x) is plugged in.
2026
 * Note that dependences marked local are treated as validity constraints
2027
 * by add_all_validity_constraints and therefore also have
2028
 * their distances bounded by 0 from below.
2029
 */
2030
static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
2031
  struct isl_sched_edge *edge, int s, int local)
2032
289
{
2033
289
  int offset;
2034
289
  unsigned nparam;
2035
289
  isl_map *map = isl_map_copy(edge->map);
2036
289
  isl_ctx *ctx = isl_map_get_ctx(map);
2037
289
  isl_dim_map *dim_map;
2038
289
  isl_basic_set *coef;
2039
289
  struct isl_sched_node *node = edge->src;
2040
289
2041
289
  coef = intra_coefficients(graph, node, map, !local);
2042
289
2043
289
  offset = coef_var_offset(coef);
2044
289
2045
289
  if (!coef)
2046
0
    return isl_stat_error;
2047
289
2048
289
  nparam = isl_space_dim(node->space, isl_dim_param);
2049
289
  dim_map = intra_dim_map(ctx, graph, node, offset, -s);
2050
289
2051
289
  if (!local) {
2052
147
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2053
147
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2054
147
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2055
147
  }
2056
289
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2057
289
2058
289
  return isl_stat_ok;
2059
289
}
2060
2061
/* Add constraints to graph->lp that bound the dependence distance for the given
2062
 * dependence from node i to node j.
2063
 * If s = 1, we add the constraint
2064
 *
2065
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
2066
 *    <= m_0 + m_n n
2067
 *
2068
 * or
2069
 *
2070
 *  -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
2071
 *    m_0 + m_n n >= 0
2072
 *
2073
 * for each (x,y) in R.
2074
 * If s = -1, we add the constraint
2075
 *
2076
 *  -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
2077
 *    <= m_0 + m_n n
2078
 *
2079
 * or
2080
 *
2081
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
2082
 *    m_0 + m_n n >= 0
2083
 *
2084
 * for each (x,y) in R.
2085
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2086
 * of valid constraints for R and then plug in
2087
 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
2088
 *  s*c_i_x, -s*c_j_x)
2089
 * with each coefficient (except m_0, c_*_0 and c_*_n)
2090
 * represented as a pair of non-negative coefficients.
2091
 *
2092
 *
2093
 * If "local" is set (and s = 1), then we add constraints
2094
 *
2095
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
2096
 *
2097
 * or
2098
 *
2099
 *  -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
2100
 *
2101
 * instead, forcing the dependence distance to be (less than or) equal to 0.
2102
 * That is, we plug in
2103
 * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
2104
 * Note that dependences marked local are treated as validity constraints
2105
 * by add_all_validity_constraints and therefore also have
2106
 * their distances bounded by 0 from below.
2107
 */
2108
static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
2109
  struct isl_sched_edge *edge, int s, int local)
2110
205
{
2111
205
  int offset;
2112
205
  unsigned nparam;
2113
205
  isl_map *map = isl_map_copy(edge->map);
2114
205
  isl_ctx *ctx = isl_map_get_ctx(map);
2115
205
  isl_dim_map *dim_map;
2116
205
  isl_basic_set *coef;
2117
205
  struct isl_sched_node *src = edge->src;
2118
205
  struct isl_sched_node *dst = edge->dst;
2119
205
2120
205
  coef = inter_coefficients(graph, edge, map);
2121
205
2122
205
  offset = coef_var_offset(coef);
2123
205
2124
205
  if (!coef)
2125
0
    return isl_stat_error;
2126
205
2127
205
  nparam = isl_space_dim(src->space, isl_dim_param);
2128
205
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
2129
205
2130
205
  if (!local) {
2131
121
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2132
121
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2133
121
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2134
121
  }
2135
205
2136
205
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2137
205
2138
205
  return isl_stat_ok;
2139
205
}
2140
2141
/* Should the distance over "edge" be forced to zero?
2142
 * That is, is it marked as a local edge?
2143
 * If "use_coincidence" is set, then coincidence edges are treated
2144
 * as local edges.
2145
 */
2146
static int force_zero(struct isl_sched_edge *edge, int use_coincidence)
2147
2.19k
{
2148
2.19k
  return is_local(edge) || 
(2.10k
use_coincidence2.10k
&&
is_coincidence(edge)739
);
2149
2.19k
}
2150
2151
/* Add all validity constraints to graph->lp.
2152
 *
2153
 * An edge that is forced to be local needs to have its dependence
2154
 * distances equal to zero.  We take care of bounding them by 0 from below
2155
 * here.  add_all_proximity_constraints takes care of bounding them by 0
2156
 * from above.
2157
 *
2158
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2159
 * Otherwise, we ignore them.
2160
 */
2161
static int add_all_validity_constraints(struct isl_sched_graph *graph,
2162
  int use_coincidence)
2163
409
{
2164
409
  int i;
2165
409
2166
1.00k
  for (i = 0; i < graph->n_edge; 
++i596
) {
2167
596
    struct isl_sched_edge *edge = &graph->edge[i];
2168
596
    int zero;
2169
596
2170
596
    zero = force_zero(edge, use_coincidence);
2171
596
    if (!is_validity(edge) && 
!zero112
)
2172
108
      continue;
2173
488
    if (edge->src != edge->dst)
2174
201
      continue;
2175
287
    if (add_intra_validity_constraints(graph, edge) < 0)
2176
0
      return -1;
2177
287
  }
2178
409
2179
1.00k
  
for (i = 0; 409
i < graph->n_edge;
++i596
) {
2180
596
    struct isl_sched_edge *edge = &graph->edge[i];
2181
596
    int zero;
2182
596
2183
596
    zero = force_zero(edge, use_coincidence);
2184
596
    if (!is_validity(edge) && 
!zero112
)
2185
108
      continue;
2186
488
    if (edge->src == edge->dst)
2187
287
      continue;
2188
201
    if (add_inter_validity_constraints(graph, edge) < 0)
2189
0
      return -1;
2190
201
  }
2191
409
2192
409
  return 0;
2193
409
}
2194
2195
/* Add constraints to graph->lp that bound the dependence distance
2196
 * for all dependence relations.
2197
 * If a given proximity dependence is identical to a validity
2198
 * dependence, then the dependence distance is already bounded
2199
 * from below (by zero), so we only need to bound the distance
2200
 * from above.  (This includes the case of "local" dependences
2201
 * which are treated as validity dependence by add_all_validity_constraints.)
2202
 * Otherwise, we need to bound the distance both from above and from below.
2203
 *
2204
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2205
 * Otherwise, we ignore them.
2206
 */
2207
static int add_all_proximity_constraints(struct isl_sched_graph *graph,
2208
  int use_coincidence)
2209
409
{
2210
409
  int i;
2211
409
2212
1.00k
  for (i = 0; i < graph->n_edge; 
++i596
) {
2213
596
    struct isl_sched_edge *edge = &graph->edge[i];
2214
596
    int zero;
2215
596
2216
596
    zero = force_zero(edge, use_coincidence);
2217
596
    if (!is_proximity(edge) && 
!zero127
)
2218
119
      continue;
2219
477
    if (edge->src == edge->dst &&
2220
477
        
add_intra_proximity_constraints(graph, edge, 1, zero) < 0281
)
2221
0
      return -1;
2222
477
    if (edge->src != edge->dst &&
2223
477
        
add_inter_proximity_constraints(graph, edge, 1, zero) < 0196
)
2224
0
      return -1;
2225
477
    if (is_validity(edge) || 
zero21
)
2226
460
      continue;
2227
17
    if (edge->src == edge->dst &&
2228
17
        
add_intra_proximity_constraints(graph, edge, -1, 0) < 08
)
2229
0
      return -1;
2230
17
    if (edge->src != edge->dst &&
2231
17
        
add_inter_proximity_constraints(graph, edge, -1, 0) < 09
)
2232
0
      return -1;
2233
17
  }
2234
409
2235
409
  return 0;
2236
409
}
2237
2238
/* Normalize the rows of "indep" such that all rows are lexicographically
2239
 * positive and such that each row contains as many final zeros as possible,
2240
 * given the choice for the previous rows.
2241
 * Do this by performing elementary row operations.
2242
 */
2243
static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep)
2244
1.29k
{
2245
1.29k
  indep = isl_mat_reverse_gauss(indep);
2246
1.29k
  indep = isl_mat_lexnonneg_rows(indep);
2247
1.29k
  return indep;
2248
1.29k
}
2249
2250
/* Compute a basis for the rows in the linear part of the schedule
2251
 * and extend this basis to a full basis.  The remaining rows
2252
 * can then be used to force linear independence from the rows
2253
 * in the schedule.
2254
 *
2255
 * In particular, given the schedule rows S, we compute
2256
 *
2257
 *  S   = H Q
2258
 *  S U = H
2259
 *
2260
 * with H the Hermite normal form of S.  That is, all but the
2261
 * first rank columns of H are zero and so each row in S is
2262
 * a linear combination of the first rank rows of Q.
2263
 * The matrix Q can be used as a variable transformation
2264
 * that isolates the directions of S in the first rank rows.
2265
 * Transposing S U = H yields
2266
 *
2267
 *  U^T S^T = H^T
2268
 *
2269
 * with all but the first rank rows of H^T zero.
2270
 * The last rows of U^T are therefore linear combinations
2271
 * of schedule coefficients that are all zero on schedule
2272
 * coefficients that are linearly dependent on the rows of S.
2273
 * At least one of these combinations is non-zero on
2274
 * linearly independent schedule coefficients.
2275
 * The rows are normalized to involve as few of the last
2276
 * coefficients as possible and to have a positive initial value.
2277
 */
2278
static int node_update_vmap(struct isl_sched_node *node)
2279
1.29k
{
2280
1.29k
  isl_mat *H, *U, *Q;
2281
1.29k
  int n_row = isl_mat_rows(node->sched);
2282
1.29k
2283
1.29k
  H = isl_mat_sub_alloc(node->sched, 0, n_row,
2284
1.29k
            1 + node->nparam, node->nvar);
2285
1.29k
2286
1.29k
  H = isl_mat_left_hermite(H, 0, &U, &Q);
2287
1.29k
  isl_mat_free(node->indep);
2288
1.29k
  isl_mat_free(node->vmap);
2289
1.29k
  node->vmap = Q;
2290
1.29k
  node->indep = isl_mat_transpose(U);
2291
1.29k
  node->rank = isl_mat_initial_non_zero_cols(H);
2292
1.29k
  node->indep = isl_mat_drop_rows(node->indep, 0, node->rank);
2293
1.29k
  node->indep = normalize_independent(node->indep);
2294
1.29k
  isl_mat_free(H);
2295
1.29k
2296
1.29k
  if (!node->indep || !node->vmap || node->rank < 0)
2297
0
    return -1;
2298
1.29k
  return 0;
2299
1.29k
}
2300
2301
/* Is "edge" marked as a validity or a conditional validity edge?
2302
 */
2303
static int is_any_validity(struct isl_sched_edge *edge)
2304
108
{
2305
108
  return is_validity(edge) || 
is_conditional_validity(edge)2
;
2306
108
}
2307
2308
/* How many times should we count the constraints in "edge"?
2309
 *
2310
 * We count as follows
2311
 * validity   -> 1 (>= 0)
2312
 * validity+proximity -> 2 (>= 0 and upper bound)
2313
 * proximity    -> 2 (lower and upper bound)
2314
 * local(+any)    -> 2 (>= 0 and <= 0)
2315
 *
2316
 * If an edge is only marked conditional_validity then it counts
2317
 * as zero since it is only checked afterwards.
2318
 *
2319
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2320
 * Otherwise, we ignore them.
2321
 */
2322
static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
2323
596
{
2324
596
  if (is_proximity(edge) || 
force_zero(edge, use_coincidence)127
)
2325
477
    return 2;
2326
119
  if (is_validity(edge))
2327
28
    return 1;
2328
91
  return 0;
2329
91
}
2330
2331
/* How many times should the constraints in "edge" be counted
2332
 * as a parametric intra-node constraint?
2333
 *
2334
 * Only proximity edges that are not forced zero need
2335
 * coefficient constraints that include coefficients for parameters.
2336
 * If the edge is also a validity edge, then only
2337
 * an upper bound is introduced.  Otherwise, both lower and upper bounds
2338
 * are introduced.
2339
 */
2340
static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge,
2341
  int use_coincidence)
2342
596
{
2343
596
  if (edge->src != edge->dst)
2344
272
    return 0;
2345
324
  if (!is_proximity(edge))
2346
48
    return 0;
2347
276
  if (force_zero(edge, use_coincidence))
2348
137
    return 0;
2349
139
  if (is_validity(edge))
2350
131
    return 1;
2351
8
  else
2352
8
    return 2;
2353
139
}
2354
2355
/* Add "f" times the number of equality and inequality constraints of "bset"
2356
 * to "n_eq" and "n_ineq" and free "bset".
2357
 */
2358
static isl_stat update_count(__isl_take isl_basic_set *bset,
2359
  int f, int *n_eq, int *n_ineq)
2360
700
{
2361
700
  if (!bset)
2362
0
    return isl_stat_error;
2363
700
2364
700
  *n_eq += isl_basic_set_n_equality(bset);
2365
700
  *n_ineq += isl_basic_set_n_inequality(bset);
2366
700
  isl_basic_set_free(bset);
2367
700
2368
700
  return isl_stat_ok;
2369
700
}
2370
2371
/* Count the number of equality and inequality constraints
2372
 * that will be added for the given map.
2373
 *
2374
 * The edges that require parameter coefficients are counted separately.
2375
 *
2376
 * "use_coincidence" is set if we should take into account coincidence edges.
2377
 */
2378
static isl_stat count_map_constraints(struct isl_sched_graph *graph,
2379
  struct isl_sched_edge *edge, __isl_take isl_map *map,
2380
  int *n_eq, int *n_ineq, int use_coincidence)
2381
596
{
2382
596
  isl_map *copy;
2383
596
  isl_basic_set *coef;
2384
596
  int f = edge_multiplicity(edge, use_coincidence);
2385
596
  int fp = parametric_intra_edge_multiplicity(edge, use_coincidence);
2386
596
2387
596
  if (f == 0) {
2388
91
    isl_map_free(map);
2389
91
    return isl_stat_ok;
2390
91
  }
2391
505
2392
505
  if (edge->src != edge->dst) {
2393
210
    coef = inter_coefficients(graph, edge, map);
2394
210
    return update_count(coef, f, n_eq, n_ineq);
2395
210
  }
2396
295
2397
295
  if (fp > 0) {
2398
139
    copy = isl_map_copy(map);
2399
139
    coef = intra_coefficients(graph, edge->src, copy, 1);
2400
139
    if (update_count(coef, fp, n_eq, n_ineq) < 0)
2401
0
      goto error;
2402
295
  }
2403
295
2404
295
  if (f > fp) {
2405
287
    copy = isl_map_copy(map);
2406
287
    coef = intra_coefficients(graph, edge->src, copy, 0);
2407
287
    if (update_count(coef, f - fp, n_eq, n_ineq) < 0)
2408
0
      goto error;
2409
295
  }
2410
295
2411
295
  isl_map_free(map);
2412
295
  return isl_stat_ok;
2413
0
error:
2414
0
  isl_map_free(map);
2415
0
  return isl_stat_error;
2416
295
}
2417
2418
/* Count the number of equality and inequality constraints
2419
 * that will be added to the main lp problem.
2420
 * We count as follows
2421
 * validity   -> 1 (>= 0)
2422
 * validity+proximity -> 2 (>= 0 and upper bound)
2423
 * proximity    -> 2 (lower and upper bound)
2424
 * local(+any)    -> 2 (>= 0 and <= 0)
2425
 *
2426
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2427
 * Otherwise, we ignore them.
2428
 */
2429
static int count_constraints(struct isl_sched_graph *graph,
2430
  int *n_eq, int *n_ineq, int use_coincidence)
2431
409
{
2432
409
  int i;
2433
409
2434
409
  *n_eq = *n_ineq = 0;
2435
1.00k
  for (i = 0; i < graph->n_edge; 
++i596
) {
2436
596
    struct isl_sched_edge *edge = &graph->edge[i];
2437
596
    isl_map *map = isl_map_copy(edge->map);
2438
596
2439
596
    if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
2440
596
              use_coincidence) < 0)
2441
0
      return -1;
2442
596
  }
2443
409
2444
409
  return 0;
2445
409
}
2446
2447
/* Count the number of constraints that will be added by
2448
 * add_bound_constant_constraints to bound the values of the constant terms
2449
 * and increment *n_eq and *n_ineq accordingly.
2450
 *
2451
 * In practice, add_bound_constant_constraints only adds inequalities.
2452
 */
2453
static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
2454
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2455
409
{
2456
409
  if (isl_options_get_schedule_max_constant_term(ctx) == -1)
2457
257
    return isl_stat_ok;
2458
152
2459
152
  *n_ineq += graph->n;
2460
152
2461
152
  return isl_stat_ok;
2462
152
}
2463
2464
/* Add constraints to bound the values of the constant terms in the schedule,
2465
 * if requested by the user.
2466
 *
2467
 * The maximal value of the constant terms is defined by the option
2468
 * "schedule_max_constant_term".
2469
 */
2470
static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
2471
  struct isl_sched_graph *graph)
2472
409
{
2473
409
  int i, k;
2474
409
  int max;
2475
409
  int total;
2476
409
2477
409
  max = isl_options_get_schedule_max_constant_term(ctx);
2478
409
  if (max == -1)
2479
257
    return isl_stat_ok;
2480
152
2481
152
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2482
152
2483
320
  for (i = 0; i < graph->n; 
++i168
) {
2484
168
    struct isl_sched_node *node = &graph->node[i];
2485
168
    int pos;
2486
168
2487
168
    k = isl_basic_set_alloc_inequality(graph->lp);
2488
168
    if (k < 0)
2489
0
      return isl_stat_error;
2490
168
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2491
168
    pos = node_cst_coef_offset(node);
2492
168
    isl_int_set_si(graph->lp->ineq[k][1 + pos], -1);
2493
168
    isl_int_set_si(graph->lp->ineq[k][0], max);
2494
168
  }
2495
152
2496
152
  return isl_stat_ok;
2497
152
}
2498
2499
/* Count the number of constraints that will be added by
2500
 * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
2501
 * accordingly.
2502
 *
2503
 * In practice, add_bound_coefficient_constraints only adds inequalities.
2504
 */
2505
static int count_bound_coefficient_constraints(isl_ctx *ctx,
2506
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2507
409
{
2508
409
  int i;
2509
409
2510
409
  if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
2511
409
      
!isl_options_get_schedule_treat_coalescing(ctx)256
)
2512
8
    return 0;
2513
401
2514
970
  
for (i = 0; 401
i < graph->n;
++i569
)
2515
569
    *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
2516
401
2517
401
  return 0;
2518
401
}
2519
2520
/* Add constraints to graph->lp that bound the values of
2521
 * the parameter schedule coefficients of "node" to "max" and
2522
 * the variable schedule coefficients to the corresponding entry
2523
 * in node->max.
2524
 * In either case, a negative value means that no bound needs to be imposed.
2525
 *
2526
 * For parameter coefficients, this amounts to adding a constraint
2527
 *
2528
 *  c_n <= max
2529
 *
2530
 * i.e.,
2531
 *
2532
 *  -c_n + max >= 0
2533
 *
2534
 * The variables coefficients are, however, not represented directly.
2535
 * Instead, the variable coefficients c_x are written as differences
2536
 * c_x = c_x^+ - c_x^-.
2537
 * That is,
2538
 *
2539
 *  -max_i <= c_x_i <= max_i
2540
 *
2541
 * is encoded as
2542
 *
2543
 *  -max_i <= c_x_i^+ - c_x_i^- <= max_i
2544
 *
2545
 * or
2546
 *
2547
 *  -(c_x_i^+ - c_x_i^-) + max_i >= 0
2548
 *  c_x_i^+ - c_x_i^- + max_i >= 0
2549
 */
2550
static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
2551
  struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
2552
569
{
2553
569
  int i, j, k;
2554
569
  int total;
2555
569
  isl_vec *ineq;
2556
569
2557
569
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2558
569
2559
882
  for (j = 0; j < node->nparam; 
++j313
) {
2560
313
    int dim;
2561
313
2562
313
    if (max < 0)
2563
246
      continue;
2564
67
2565
67
    k = isl_basic_set_alloc_inequality(graph->lp);
2566
67
    if (k < 0)
2567
0
      return isl_stat_error;
2568
67
    dim = 1 + node_par_coef_offset(node) + j;
2569
67
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2570
67
    isl_int_set_si(graph->lp->ineq[k][dim], -1);
2571
67
    isl_int_set_si(graph->lp->ineq[k][0], max);
2572
67
  }
2573
569
2574
569
  ineq = isl_vec_alloc(ctx, 1 + total);
2575
569
  ineq = isl_vec_clr(ineq);
2576
569
  if (!ineq)
2577
0
    return isl_stat_error;
2578
1.56k
  
for (i = 0; 569
i < node->nvar;
++i993
) {
2579
993
    int pos = 1 + node_var_coef_pos(node, i);
2580
993
2581
993
    if (isl_int_is_neg(node->max->el[i]))
2582
993
      
continue529
;
2583
464
2584
464
    isl_int_set_si(ineq->el[pos], 1);
2585
464
    isl_int_set_si(ineq->el[pos + 1], -1);
2586
464
    isl_int_set(ineq->el[0], node->max->el[i]);
2587
464
2588
464
    k = isl_basic_set_alloc_inequality(graph->lp);
2589
464
    if (k < 0)
2590
0
      goto error;
2591
464
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2592
464
2593
464
    isl_seq_neg(ineq->el + pos, ineq->el + pos, 2);
2594
464
    k = isl_basic_set_alloc_inequality(graph->lp);
2595
464
    if (k < 0)
2596
0
      goto error;
2597
464
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2598
464
2599
464
    isl_seq_clr(ineq->el + pos, 2);
2600
464
  }
2601
569
  isl_vec_free(ineq);
2602
569
2603
569
  return isl_stat_ok;
2604
0
error:
2605
0
  isl_vec_free(ineq);
2606
0
  return isl_stat_error;
2607
569
}
2608
2609
/* Add constraints that bound the values of the variable and parameter
2610
 * coefficients of the schedule.
2611
 *
2612
 * The maximal value of the coefficients is defined by the option
2613
 * 'schedule_max_coefficient' and the entries in node->max.
2614
 * These latter entries are only set if either the schedule_max_coefficient
2615
 * option or the schedule_treat_coalescing option is set.
2616
 */
2617
static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
2618
  struct isl_sched_graph *graph)
2619
409
{
2620
409
  int i;
2621
409
  int max;
2622
409
2623
409
  max = isl_options_get_schedule_max_coefficient(ctx);
2624
409
2625
409
  if (max == -1 && 
!isl_options_get_schedule_treat_coalescing(ctx)256
)
2626
8
    return isl_stat_ok;
2627
401
2628
970
  
for (i = 0; 401
i < graph->n;
++i569
) {
2629
569
    struct isl_sched_node *node = &graph->node[i];
2630
569
2631
569
    if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
2632
0
      return isl_stat_error;
2633
569
  }
2634
401
2635
401
  return isl_stat_ok;
2636
401
}
2637
2638
/* Add a constraint to graph->lp that equates the value at position
2639
 * "sum_pos" to the sum of the "n" values starting at "first".
2640
 */
2641
static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
2642
  int sum_pos, int first, int n)
2643
409
{
2644
409
  int i, k;
2645
409
  int total;
2646
409
2647
409
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2648
409
2649
409
  k = isl_basic_set_alloc_equality(graph->lp);
2650
409
  if (k < 0)
2651
0
    return isl_stat_error;
2652
409
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2653
409
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2654
931
  for (i = 0; i < n; 
++i522
)
2655
522
    isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
2656
409
2657
409
  return isl_stat_ok;
2658
409
}
2659
2660
/* Add a constraint to graph->lp that equates the value at position
2661
 * "sum_pos" to the sum of the parameter coefficients of all nodes.
2662
 */
2663
static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
2664
  int sum_pos)
2665
409
{
2666
409
  int i, j, k;
2667
409
  int total;
2668
409
2669
409
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2670
409
2671
409
  k = isl_basic_set_alloc_equality(graph->lp);
2672
409
  if (k < 0)
2673
0
    return isl_stat_error;
2674
409
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2675
409
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2676
976
  for (i = 0; i < graph->n; 
++i567
) {
2677
567
    int pos = 1 + node_par_coef_offset(&graph->node[i]);
2678
567
2679
895
    for (j = 0; j < graph->node[i].nparam; 
++j328
)
2680
567
      
isl_int_set_si328
(graph->lp->eq[k][pos + j], 1);
2681
567
  }
2682
409
2683
409
  return isl_stat_ok;
2684
409
}
2685
2686
/* Add a constraint to graph->lp that equates the value at position
2687
 * "sum_pos" to the sum of the variable coefficients of all nodes.
2688
 */
2689
static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
2690
  int sum_pos)
2691
441
{
2692
441
  int i, j, k;
2693
441
  int total;
2694
441
2695
441
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2696
441
2697
441
  k = isl_basic_set_alloc_equality(graph->lp);
2698
441
  if (k < 0)
2699
0
    return isl_stat_error;
2700
441
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2701
441
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2702
1.07k
  for (i = 0; i < graph->n; 
++i629
) {
2703
629
    struct isl_sched_node *node = &graph->node[i];
2704
629
    int pos = 1 + node_var_coef_offset(node);
2705
629
2706
2.85k
    for (j = 0; j < 2 * node->nvar; 
++j2.22k
)
2707
2.22k
      isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2708
629
  }
2709
441
2710
441
  return isl_stat_ok;
2711
441
}
2712
2713
/* Construct an ILP problem for finding schedule coefficients
2714
 * that result in non-negative, but small dependence distances
2715
 * over all dependences.
2716
 * In particular, the dependence distances over proximity edges
2717
 * are bounded by m_0 + m_n n and we compute schedule coefficients
2718
 * with small values (preferably zero) of m_n and m_0.
2719
 *
2720
 * All variables of the ILP are non-negative.  The actual coefficients
2721
 * may be negative, so each coefficient is represented as the difference
2722
 * of two non-negative variables.  The negative part always appears
2723
 * immediately before the positive part.
2724
 * Other than that, the variables have the following order
2725
 *
2726
 *  - sum of positive and negative parts of m_n coefficients
2727
 *  - m_0
2728
 *  - sum of all c_n coefficients
2729
 *    (unconstrained when computing non-parametric schedules)
2730
 *  - sum of positive and negative parts of all c_x coefficients
2731
 *  - positive and negative parts of m_n coefficients
2732
 *  - for each node
2733
 *    - positive and negative parts of c_i_x, in opposite order
2734
 *    - c_i_n (if parametric)
2735
 *    - c_i_0
2736
 *
2737
 * The constraints are those from the edges plus two or three equalities
2738
 * to express the sums.
2739
 *
2740
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2741
 * Otherwise, we ignore them.
2742
 */
2743
static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
2744
  int use_coincidence)
2745
409
{
2746
409
  int i;
2747
409
  unsigned nparam;
2748
409
  unsigned total;
2749
409
  isl_space *space;
2750
409
  int parametric;
2751
409
  int param_pos;
2752
409
  int n_eq, n_ineq;
2753
409
2754
409
  parametric = ctx->opt->schedule_parametric;
2755
409
  nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
2756
409
  param_pos = 4;
2757
409
  total = param_pos + 2 * nparam;
2758
994
  for (i = 0; i < graph->n; 
++i585
) {
2759
585
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2760
585
    if (node_update_vmap(node) < 0)
2761
0
      return isl_stat_error;
2762
585
    node->start = total;
2763
585
    total += 1 + node->nparam + 2 * node->nvar;
2764
585
  }
2765
409
2766
409
  if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0)
2767
0
    return isl_stat_error;
2768
409
  if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2769
0
    return isl_stat_error;
2770
409
  if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2771
0
    return isl_stat_error;
2772
409
2773
409
  space = isl_space_set_alloc(ctx, 0, total);
2774
409
  isl_basic_set_free(graph->lp);
2775
409
  n_eq += 2 + parametric;
2776
409
2777
409
  graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
2778
409
2779
409
  if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
2780
0
    return isl_stat_error;
2781
409
  if (parametric && 
add_param_sum_constraint(graph, 2) < 0377
)
2782
0
    return isl_stat_error;
2783
409
  if (add_var_sum_constraint(graph, 3) < 0)
2784
0
    return isl_stat_error;
2785
409
  if (add_bound_constant_constraints(ctx, graph) < 0)
2786
0
    return isl_stat_error;
2787
409
  if (add_bound_coefficient_constraints(ctx, graph) < 0)
2788
0
    return isl_stat_error;
2789
409
  if (add_all_validity_constraints(graph, use_coincidence) < 0)
2790
0
    return isl_stat_error;
2791
409
  if (add_all_proximity_constraints(graph, use_coincidence) < 0)
2792
0
    return isl_stat_error;
2793
409
2794
409
  return isl_stat_ok;
2795
409
}
2796
2797
/* Analyze the conflicting constraint found by
2798
 * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
2799
 * constraint of one of the edges between distinct nodes, living, moreover
2800
 * in distinct SCCs, then record the source and sink SCC as this may
2801
 * be a good place to cut between SCCs.
2802
 */
2803
static int check_conflict(int con, void *user)
2804
1.17k
{
2805
1.17k
  int i;
2806
1.17k
  struct isl_sched_graph *graph = user;
2807
1.17k
2808
1.17k
  if (graph->src_scc >= 0)
2809
38
    return 0;
2810
1.13k
2811
1.13k
  con -= graph->lp->n_eq;
2812
1.13k
2813
1.13k
  if (con >= graph->lp->n_ineq)
2814
529
    return 0;
2815
605
2816
1.99k
  
for (i = 0; 605
i < graph->n_edge;
++i1.39k
) {
2817
1.39k
    if (!is_validity(&graph->edge[i]))
2818
135
      continue;
2819
1.25k
    if (graph->edge[i].src == graph->edge[i].dst)
2820
683
      continue;
2821
572
    if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
2822
497
      continue;
2823
75
    if (graph->edge[i].start > con)
2824
44
      continue;
2825
31
    if (graph->edge[i].end <= con)
2826
22
      continue;
2827
9
    graph->src_scc = graph->edge[i].src->scc;
2828
9
    graph->dst_scc = graph->edge[i].dst->scc;
2829
9
  }
2830
605
2831
605
  return 0;
2832
605
}
2833
2834
/* Check whether the next schedule row of the given node needs to be
2835
 * non-trivial.  Lower-dimensional domains may have some trivial rows,
2836
 * but as soon as the number of remaining required non-trivial rows
2837
 * is as large as the number or remaining rows to be computed,
2838
 * all remaining rows need to be non-trivial.
2839
 */
2840
static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
2841
621
{
2842
621
  return node->nvar - node->rank >= graph->maxvar - graph->n_row;
2843
621
}
2844
2845
/* Construct a non-triviality region with triviality directions
2846
 * corresponding to the rows of "indep".
2847
 * The rows of "indep" are expressed in terms of the schedule coefficients c_i,
2848
 * while the triviality directions are expressed in terms of
2849
 * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
2850
 * before c^+_i.  Furthermore,
2851
 * the pairs of non-negative variables representing the coefficients
2852
 * are stored in the opposite order.
2853
 */
2854
static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep)
2855
479
{
2856
479
  isl_ctx *ctx;
2857
479
  isl_mat *mat;
2858
479
  int i, j, n, n_var;
2859
479
2860
479
  if (!indep)
2861
0
    return NULL;
2862
479
2863
479
  ctx = isl_mat_get_ctx(indep);
2864
479
  n = isl_mat_rows(indep);
2865
479
  n_var = isl_mat_cols(indep);
2866
479
  mat = isl_mat_alloc(ctx, n, 2 * n_var);
2867
479
  if (!mat)
2868
0
    return NULL;
2869
1.17k
  
for (i = 0; 479
i < n;
++i691
) {
2870
2.20k
    for (j = 0; j < n_var; 
++j1.51k
) {
2871
1.51k
      int nj = n_var - 1 - j;
2872
1.51k
      isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]);
2873
1.51k
      isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]);
2874
1.51k
    }
2875
691
  }
2876
479
2877
479
  return mat;
2878
479
}
2879
2880
/* Solve the ILP problem constructed in setup_lp.
2881
 * For each node such that all the remaining rows of its schedule
2882
 * need to be non-trivial, we construct a non-triviality region.
2883
 * This region imposes that the next row is independent of previous rows.
2884
 * In particular, the non-triviality region enforces that at least
2885
 * one of the linear combinations in the rows of node->indep is non-zero.
2886
 */
2887
static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
2888
409
{
2889
409
  int i;
2890
409
  isl_vec *sol;
2891
409
  isl_basic_set *lp;
2892
409
2893
994
  for (i = 0; i < graph->n; 
++i585
) {
2894
585
    struct isl_sched_node *node = &graph->node[i];
2895
585
    isl_mat *trivial;
2896
585
2897
585
    graph->region[i].pos = node_var_coef_offset(node);
2898
585
    if (needs_row(graph, node))
2899
479
      trivial = construct_trivial(node->indep);
2900
106
    else
2901
106
      trivial = isl_mat_zero(ctx, 0, 0);
2902
585
    graph->region[i].trivial = trivial;
2903
585
  }
2904
409
  lp = isl_basic_set_copy(graph->lp);
2905
409
  sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
2906
409
               graph->region, &check_conflict, graph);
2907
994
  for (i = 0; i < graph->n; 
++i585
)
2908
585
    isl_mat_free(graph->region[i].trivial);
2909
409
  return sol;
2910
409
}
2911
2912
/* Extract the coefficients for the variables of "node" from "sol".
2913
 *
2914
 * Each schedule coefficient c_i_x is represented as the difference
2915
 * between two non-negative variables c_i_x^+ - c_i_x^-.
2916
 * The c_i_x^- appear before their c_i_x^+ counterpart.
2917
 * Furthermore, the order of these pairs is the opposite of that
2918
 * of the corresponding coefficients.
2919
 *
2920
 * Return c_i_x = c_i_x^+ - c_i_x^-
2921
 */
2922
static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
2923
  __isl_keep isl_vec *sol)
2924
508
{
2925
508
  int i;
2926
508
  int pos;
2927
508
  isl_vec *csol;
2928
508
2929
508
  if (!sol)
2930
0
    return NULL;
2931
508
  csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
2932
508
  if (!csol)
2933
0
    return NULL;
2934
508
2935
508
  pos = 1 + node_var_coef_offset(node);
2936
1.46k
  for (i = 0; i < node->nvar; 
++i957
)
2937
957
    isl_int_sub(csol->el[node->nvar - 1 - i],
2938
508
          sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
2939
508
2940
508
  return csol;
2941
508
}
2942
2943
/* Update the schedules of all nodes based on the given solution
2944
 * of the LP problem.
2945
 * The new row is added to the current band.
2946
 * All possibly negative coefficients are encoded as a difference
2947
 * of two non-negative variables, so we need to perform the subtraction
2948
 * here.
2949
 *
2950
 * If coincident is set, then the caller guarantees that the new
2951
 * row satisfies the coincidence constraints.
2952
 */
2953
static int update_schedule(struct isl_sched_graph *graph,
2954
  __isl_take isl_vec *sol, int coincident)
2955
328
{
2956
328
  int i, j;
2957
328
  isl_vec *csol = NULL;
2958
328
2959
328
  if (!sol)
2960
0
    goto error;
2961
328
  if (sol->size == 0)
2962
328
    
isl_die0
(sol->ctx, isl_error_internal,
2963
328
      "no solution found", goto error);
2964
328
  if (graph->n_total_row >= graph->max_row)
2965
328
    
isl_die0
(sol->ctx, isl_error_internal,
2966
328
      "too many schedule rows", goto error);
2967
328
2968
777
  
for (i = 0; 328
i < graph->n;
++i449
) {
2969
449
    struct isl_sched_node *node = &graph->node[i];
2970
449
    int pos;
2971
449
    int row = isl_mat_rows(node->sched);
2972
449
2973
449
    isl_vec_free(csol);
2974
449
    csol = extract_var_coef(node, sol);
2975
449
    if (!csol)
2976
0
      goto error;
2977
449
2978
449
    isl_map_free(node->sched_map);
2979
449
    node->sched_map = NULL;
2980
449
    node->sched = isl_mat_add_rows(node->sched, 1);
2981
449
    if (!node->sched)
2982
0
      goto error;
2983
449
    pos = node_cst_coef_offset(node);
2984
449
    node->sched = isl_mat_set_element(node->sched,
2985
449
          row, 0, sol->el[1 + pos]);
2986
449
    pos = node_par_coef_offset(node);
2987
679
    for (j = 0; j < node->nparam; 
++j230
)
2988
230
      node->sched = isl_mat_set_element(node->sched,
2989
230
          row, 1 + j, sol->el[1 + pos + j]);
2990
1.29k
    for (j = 0; j < node->nvar; 
++j850
)
2991
850
      node->sched = isl_mat_set_element(node->sched,
2992
850
          row, 1 + node->nparam + j, csol->el[j]);
2993
449
    node->coincident[graph->n_total_row] = coincident;
2994
449
  }
2995
328
  isl_vec_free(sol);
2996
328
  isl_vec_free(csol);
2997
328
2998
328
  graph->n_row++;
2999
328
  graph->n_total_row++;
3000
328
3001
328
  return 0;
3002
0
error:
3003
0
  isl_vec_free(sol);
3004
0
  isl_vec_free(csol);
3005
0
  return -1;
3006
328
}
3007
3008
/* Convert row "row" of node->sched into an isl_aff living in "ls"
3009
 * and return this isl_aff.
3010
 */
3011
static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
3012
  struct isl_sched_node *node, int row)
3013
1.04k
{
3014
1.04k
  int j;
3015
1.04k
  isl_int v;
3016
1.04k
  isl_aff *aff;
3017
1.04k
3018
1.04k
  isl_int_init(v);
3019
1.04k
3020
1.04k
  aff = isl_aff_zero_on_domain(ls);
3021
1.04k
  if (isl_mat_get_element(node->sched, row, 0, &v) < 0)
3022
0
    goto error;
3023
1.04k
  aff = isl_aff_set_constant(aff, v);
3024
1.59k
  for (j = 0; j < node->nparam; 
++j548
) {
3025
548
    if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0)
3026
0
      goto error;
3027
548
    aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
3028
548
  }
3029
2.94k
  
for (j = 0; 1.04k
j < node->nvar;
++j1.90k
) {
3030
1.90k
    if (isl_mat_get_element(node->sched, row,
3031
1.90k
          1 + node->nparam + j, &v) < 0)
3032
0
      goto error;
3033
1.90k
    aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
3034
1.90k
  }
3035
1.04k
3036
1.04k
  isl_int_clear(v);
3037
1.04k
3038
1.04k
  return aff;
3039
0
error:
3040
0
  isl_int_clear(v);
3041
0
  isl_aff_free(aff);
3042
0
  return NULL;
3043
1.04k
}
3044
3045
/* Convert the "n" rows starting at "first" of node->sched into a multi_aff
3046
 * and return this multi_aff.
3047
 *
3048
 * The result is defined over the uncompressed node domain.
3049
 */
3050
static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
3051
  struct isl_sched_node *node, int first, int n)
3052
761
{
3053
761
  int i;
3054
761
  isl_space *space;
3055
761
  isl_local_space *ls;
3056
761
  isl_aff *aff;
3057
761
  isl_multi_aff *ma;
3058
761
  int nrow;
3059
761
3060
761
  if (!node)
3061
0
    return NULL;
3062
761
  nrow = isl_mat_rows(node->sched);
3063
761
  if (node->compressed)
3064
66
    space = isl_multi_aff_get_domain_space(node->decompress);
3065
695
  else
3066
695
    space = isl_space_copy(node->space);
3067
761
  ls = isl_local_space_from_space(isl_space_copy(space));
3068
761
  space = isl_space_from_domain(space);
3069
761
  space = isl_space_add_dims(space, isl_dim_out, n);
3070
761
  ma = isl_multi_aff_zero(space);
3071
761
3072
1.80k
  for (i = first; i < first + n; 
++i1.04k
) {
3073
1.04k
    aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
3074
1.04k
    ma = isl_multi_aff_set_aff(ma, i - first, aff);
3075
1.04k
  }
3076
761
3077
761
  isl_local_space_free(ls);
3078
761
3079
761
  if (node->compressed)
3080
66
    ma = isl_multi_aff_pullback_multi_aff(ma,
3081
66
          isl_multi_aff_copy(node->compress));
3082
761
3083
761
  return ma;
3084
761
}
3085
3086
/* Convert node->sched into a multi_aff and return this multi_aff.
3087
 *
3088
 * The result is defined over the uncompressed node domain.
3089
 */
3090
static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
3091
  struct isl_sched_node *node)
3092
220
{
3093
220
  int nrow;
3094
220
3095
220
  nrow = isl_mat_rows(node->sched);
3096
220
  return node_extract_partial_schedule_multi_aff(node, 0, nrow);
3097
220
}
3098
3099
/* Convert node->sched into a map and return this map.
3100
 *
3101
 * The result is cached in node->sched_map, which needs to be released
3102
 * whenever node->sched is updated.
3103
 * It is defined over the uncompressed node domain.
3104
 */
3105
static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
3106
636
{
3107
636
  if (!node->sched_map) {
3108
220
    isl_multi_aff *ma;
3109
220
3110
220
    ma = node_extract_schedule_multi_aff(node);
3111
220
    node->sched_map = isl_map_from_multi_aff(ma);
3112
220
  }
3113
636
3114
636
  return isl_map_copy(node->sched_map);
3115
636
}
3116
3117
/* Construct a map that can be used to update a dependence relation
3118
 * based on the current schedule.
3119
 * That is, construct a map expressing that source and sink
3120
 * are executed within the same iteration of the current schedule.
3121
 * This map can then be intersected with the dependence relation.
3122
 * This is not the most efficient way, but this shouldn't be a critical
3123
 * operation.
3124
 */
3125
static __isl_give isl_map *specializer(struct isl_sched_node *src,
3126
  struct isl_sched_node *dst)
3127
284
{
3128
284
  isl_map *src_sched, *dst_sched;
3129
284
3130
284
  src_sched = node_extract_schedule(src);
3131
284
  dst_sched = node_extract_schedule(dst);
3132
284
  return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
3133
284
}
3134
3135
/* Intersect the domains of the nested relations in domain and range
3136
 * of "umap" with "map".
3137
 */
3138
static __isl_give isl_union_map *intersect_domains(
3139
  __isl_take isl_union_map *umap, __isl_keep isl_map *map)
3140
81
{
3141
81
  isl_union_set *uset;
3142
81
3143
81
  umap = isl_union_map_zip(umap);
3144
81
  uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
3145
81
  umap = isl_union_map_intersect_domain(umap, uset);
3146
81
  umap = isl_union_map_zip(umap);
3147
81
  return umap;
3148
81
}
3149
3150
/* Update the dependence relation of the given edge based
3151
 * on the current schedule.
3152
 * If the dependence is carried completely by the current schedule, then
3153
 * it is removed from the edge_tables.  It is kept in the list of edges
3154
 * as otherwise all edge_tables would have to be recomputed.
3155
 *
3156
 * If the edge is of a type that can appear multiple times
3157
 * between the same pair of nodes, then it is added to
3158
 * the edge table (again).  This prevents the situation
3159
 * where none of these edges is referenced from the edge table
3160
 * because the one that was referenced turned out to be empty and
3161
 * was therefore removed from the table.
3162
 */
3163
static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph,
3164
  struct isl_sched_edge *edge)
3165
284
{
3166
284
  int empty;
3167
284
  isl_map *id;
3168
284
3169
284
  id = specializer(edge->src, edge->dst);
3170
284
  edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
3171
284
  if (!edge->map)
3172
0
    goto error;
3173
284
3174
284
  if (edge->tagged_condition) {
3175
38
    edge->tagged_condition =
3176
38
      intersect_domains(edge->tagged_condition, id);
3177
38
    if (!edge->tagged_condition)
3178
0
      goto error;
3179
284
  }
3180
284
  if (edge->tagged_validity) {
3181
43
    edge->tagged_validity =
3182
43
      intersect_domains(edge->tagged_validity, id);
3183
43
    if (!edge->tagged_validity)
3184
0
      goto error;
3185
284
  }
3186
284
3187
284
  empty = isl_map_plain_is_empty(edge->map);
3188
284
  if (empty < 0)
3189
0
    goto error;
3190
284
  if (empty) {
3191
172
    graph_remove_edge(graph, edge);
3192
172
  } else 
if (112
is_multi_edge_type(edge)112
) {
3193
33
    if (graph_edge_tables_add(ctx, graph, edge) < 0)
3194
0
      goto error;
3195
284
  }
3196
284
3197
284
  isl_map_free(id);
3198
284
  return isl_stat_ok;
3199
0
error:
3200
0
  isl_map_free(id);
3201
0
  return isl_stat_error;
3202
284
}
3203
3204
/* Does the domain of "umap" intersect "uset"?
3205
 */
3206
static int domain_intersects(__isl_keep isl_union_map *umap,
3207
  __isl_keep isl_union_set *uset)
3208
39
{
3209
39
  int empty;
3210
39
3211
39
  umap = isl_union_map_copy(umap);
3212
39
  umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
3213
39
  empty = isl_union_map_is_empty(umap);
3214
39
  isl_union_map_free(umap);
3215
39
3216
39
  return empty < 0 ? 
-10
: !empty;
3217
39
}
3218
3219
/* Does the range of "umap" intersect "uset"?
3220
 */
3221
static int range_intersects(__isl_keep isl_union_map *umap,
3222
  __isl_keep isl_union_set *uset)
3223
26
{
3224
26
  int empty;
3225
26
3226
26
  umap = isl_union_map_copy(umap);
3227
26
  umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
3228
26
  empty = isl_union_map_is_empty(umap);
3229
26
  isl_union_map_free(umap);
3230
26
3231
26
  return empty < 0 ? 
-10
: !empty;
3232
26
}
3233
3234
/* Are the condition dependences of "edge" local with respect to
3235
 * the current schedule?
3236
 *
3237
 * That is, are domain and range of the condition dependences mapped
3238
 * to the same point?
3239
 *
3240
 * In other words, is the condition false?
3241
 */
3242
static int is_condition_false(struct isl_sched_edge *edge)
3243
36
{
3244
36
  isl_union_map *umap;
3245
36
  isl_map *map, *sched, *test;
3246
36
  int empty, local;
3247
36
3248
36
  empty = isl_union_map_is_empty(edge->tagged_condition);
3249
36
  if (empty < 0 || empty)
3250
2
    return empty;
3251
34
3252
34
  umap = isl_union_map_copy(edge->tagged_condition);
3253
34
  umap = isl_union_map_zip(umap);
3254
34
  umap = isl_union_set_unwrap(isl_union_map_domain(umap));
3255
34
  map = isl_map_from_union_map(umap);
3256
34
3257
34
  sched = node_extract_schedule(edge->src);
3258
34
  map = isl_map_apply_domain(map, sched);
3259
34
  sched = node_extract_schedule(edge->dst);
3260
34
  map = isl_map_apply_range(map, sched);
3261
34
3262
34
  test = isl_map_identity(isl_map_get_space(map));
3263
34
  local = isl_map_is_subset(map, test);
3264
34
  isl_map_free(map);
3265
34
  isl_map_free(test);
3266
34
3267
34
  return local;
3268
34
}
3269
3270
/* For each conditional validity constraint that is adjacent
3271
 * to a condition with domain in condition_source or range in condition_sink,
3272
 * turn it into an unconditional validity constraint.
3273
 */
3274
static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
3275
  __isl_take isl_union_set *condition_source,
3276
  __isl_take isl_union_set *condition_sink)
3277
10
{
3278
10
  int i;
3279
10
3280
10
  condition_source = isl_union_set_coalesce(condition_source);
3281
10
  condition_sink = isl_union_set_coalesce(condition_sink);
3282
10
3283
51
  for (i = 0; i < graph->n_edge; 
++i41
) {
3284
41
    int adjacent;
3285
41
    isl_union_map *validity;
3286
41
3287
41
    if (!is_conditional_validity(&graph->edge[i]))
3288
18
      continue;
3289
23
    if (is_validity(&graph->edge[i]))
3290
4
      continue;
3291
19
3292
19
    validity = graph->edge[i].tagged_validity;
3293
19
    adjacent = domain_intersects(validity, condition_sink);
3294
19
    if (adjacent >= 0 && !adjacent)
3295
18
      adjacent = range_intersects(validity, condition_source);
3296
19
    if (adjacent < 0)
3297
0
      goto error;
3298
19
    if (!adjacent)
3299
17
      continue;
3300
2
3301
2
    set_validity(&graph->edge[i]);
3302
2
  }
3303
10
3304
10
  isl_union_set_free(condition_source);
3305
10
  isl_union_set_free(condition_sink);
3306
10
  return 0;
3307
0
error:
3308
0
  isl_union_set_free(condition_source);
3309
0
  isl_union_set_free(condition_sink);
3310
0
  return -1;
3311
10
}
3312
3313
/* Update the dependence relations of all edges based on the current schedule
3314
 * and enforce conditional validity constraints that are adjacent
3315
 * to satisfied condition constraints.
3316
 *
3317
 * First check if any of the condition constraints are satisfied
3318
 * (i.e., not local to the outer schedule) and keep track of
3319
 * their domain and range.
3320
 * Then update all dependence relations (which removes the non-local
3321
 * constraints).
3322
 * Finally, if any condition constraints turned out to be satisfied,
3323
 * then turn all adjacent conditional validity constraints into
3324
 * unconditional validity constraints.
3325
 */
3326
static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
3327
190
{
3328
190
  int i;
3329
190
  int any = 0;
3330
190
  isl_union_set *source, *sink;
3331
190
3332
190
  source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3333
190
  sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3334
474
  for (i = 0; i < graph->n_edge; 
++i284
) {
3335
284
    int local;
3336
284
    isl_union_set *uset;
3337
284
    isl_union_map *umap;
3338
284
3339
284
    if (!is_condition(&graph->edge[i]))
3340
246
      continue;
3341
38
    if (is_local(&graph->edge[i]))
3342
17
      continue;
3343
21
    local = is_condition_false(&graph->edge[i]);
3344
21
    if (local < 0)
3345
0
      goto error;
3346
21
    if (local)
3347
10
      continue;
3348
11
3349
11
    any = 1;
3350
11
3351
11
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3352
11
    uset = isl_union_map_domain(umap);
3353
11
    source = isl_union_set_union(source, uset);
3354
11
3355
11
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3356
11
    uset = isl_union_map_range(umap);
3357
11
    sink = isl_union_set_union(sink, uset);
3358
11
  }
3359
190
3360
474
  
for (i = 0; 190
i < graph->n_edge;
++i284
) {
3361
284
    if (update_edge(ctx, graph, &graph->edge[i]) < 0)
3362
0
      goto error;
3363
284
  }
3364
190
3365
190
  if (any)
3366
10
    return unconditionalize_adjacent_validity(graph, source, sink);
3367
180
3368
180
  isl_union_set_free(source);
3369
180
  isl_union_set_free(sink);
3370
180
  return 0;
3371
0
error:
3372
0
  isl_union_set_free(source);
3373
0
  isl_union_set_free(sink);
3374
0
  return -1;
3375
180
}
3376
3377
static void next_band(struct isl_sched_graph *graph)
3378
192
{
3379
192
  graph->band_start = graph->n_total_row;
3380
192
}
3381
3382
/* Return the union of the universe domains of the nodes in "graph"
3383
 * that satisfy "pred".
3384
 */
3385
static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
3386
  struct isl_sched_graph *graph,
3387
  int (*pred)(struct isl_sched_node *node, int data), int data)
3388
150
{
3389
150
  int i;
3390
150
  isl_set *set;
3391
150
  isl_union_set *dom;
3392
150
3393
252
  for (i = 0; i < graph->n; 
++i102
)
3394
252
    if (pred(&graph->node[i], data))
3395
150
      break;
3396
150
3397
150
  if (i >= graph->n)
3398
150
    
isl_die0
(ctx, isl_error_internal,
3399
150
      "empty component", return NULL);
3400
150
3401
150
  set = isl_set_universe(isl_space_copy(graph->node[i].space));
3402
150
  dom = isl_union_set_from_set(set);
3403
150
3404
275
  for (i = i + 1; i < graph->n; 
++i125
) {
3405
125
    if (!pred(&graph->node[i], data))
3406
112
      continue;
3407
13
    set = isl_set_universe(isl_space_copy(graph->node[i].space));
3408
13
    dom = isl_union_set_union(dom, isl_union_set_from_set(set));
3409
13
  }
3410
150
3411
150
  return dom;
3412
150
}
3413
3414
/* Return a list of unions of universe domains, where each element
3415
 * in the list corresponds to an SCC (or WCC) indexed by node->scc.
3416
 */
3417
static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
3418
  struct isl_sched_graph *graph)
3419
67
{
3420
67
  int i;
3421
67
  isl_union_set_list *filters;
3422
67
3423
67
  filters = isl_union_set_list_alloc(ctx, graph->scc);
3424
213
  for (i = 0; i < graph->scc; 
++i146
) {
3425
146
    isl_union_set *dom;
3426
146
3427
146
    dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
3428
146
    filters = isl_union_set_list_add(filters, dom);
3429
146
  }
3430
67
3431
67
  return filters;
3432
67
}
3433
3434
/* Return a list of two unions of universe domains, one for the SCCs up
3435
 * to and including graph->src_scc and another for the other SCCs.
3436
 */
3437
static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
3438
  struct isl_sched_graph *graph)
3439
2
{
3440
2
  isl_union_set *dom;
3441
2
  isl_union_set_list *filters;
3442
2
3443
2
  filters = isl_union_set_list_alloc(ctx, 2);
3444
2
  dom = isl_sched_graph_domain(ctx, graph,
3445
2
          &node_scc_at_most, graph->src_scc);
3446
2
  filters = isl_union_set_list_add(filters, dom);
3447
2
  dom = isl_sched_graph_domain(ctx, graph,
3448
2
          &node_scc_at_least, graph->src_scc + 1);
3449
2
  filters = isl_union_set_list_add(filters, dom);
3450
2
3451
2
  return filters;
3452
2
}
3453
3454
/* Copy nodes that satisfy node_pred from the src dependence graph
3455
 * to the dst dependence graph.
3456
 */
3457
static isl_stat copy_nodes(struct isl_sched_graph *dst,
3458
  struct isl_sched_graph *src,
3459
  int (*node_pred)(struct isl_sched_node *node, int data), int data)
3460
194
{
3461
194
  int i;
3462
194
3463
194
  dst->n = 0;
3464
694
  for (i = 0; i < src->n; 
++i500
) {
3465
500
    int j;
3466
500
3467
500
    if (!node_pred(&src->node[i], data))
3468
276
      continue;
3469
224
3470
224
    j = dst->n;
3471
224
    dst->node[j].space = isl_space_copy(src->node[i].space);
3472
224
    dst->node[j].compressed = src->node[i].compressed;
3473
224
    dst->node[j].hull = isl_set_copy(src->node[i].hull);
3474
224
    dst->node[j].compress =
3475
224
      isl_multi_aff_copy(src->node[i].compress);
3476
224
    dst->node[j].decompress =
3477
224
      isl_multi_aff_copy(src->node[i].decompress);
3478
224
    dst->node[j].nvar = src->node[i].nvar;
3479
224
    dst->node[j].nparam = src->node[i].nparam;
3480
224
    dst->node[j].sched = isl_mat_copy(src->node[i].sched);
3481
224
    dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
3482
224
    dst->node[j].coincident = src->node[i].coincident;
3483
224
    dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
3484
224
    dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds);
3485
224
    dst->node[j].max = isl_vec_copy(src->node[i].max);
3486
224
    dst->n++;
3487
224
3488
224
    if (!dst->node[j].space || !dst->node[j].sched)
3489
0
      return isl_stat_error;
3490
224
    if (dst->node[j].compressed &&
3491
224
        
(14
!dst->node[j].hull14
||
!dst->node[j].compress14
||
3492
14
         !dst->node[j].decompress))
3493
0
      return isl_stat_error;
3494
224
  }
3495
194
3496
194
  return isl_stat_ok;
3497
194
}
3498
3499
/* Copy non-empty edges that satisfy edge_pred from the src dependence graph
3500
 * to the dst dependence graph.
3501
 * If the source or destination node of the edge is not in the destination
3502
 * graph, then it must be a backward proximity edge and it should simply
3503
 * be ignored.
3504
 */
3505
static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
3506
  struct isl_sched_graph *src,
3507
  int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
3508
194
{
3509
194
  int i;
3510
194
3511
194
  dst->n_edge = 0;
3512
724
  for (i = 0; i < src->n_edge; 
++i530
) {
3513
530
    struct isl_sched_edge *edge = &src->edge[i];
3514
530
    isl_map *map;
3515
530
    isl_union_map *tagged_condition;
3516
530
    isl_union_map *tagged_validity;
3517
530
    struct isl_sched_node *dst_src, *dst_dst;
3518
530
3519
530
    if (!edge_pred(edge, data))
3520
418
      continue;
3521
112
3522
112
    if (isl_map_plain_is_empty(edge->map))
3523
31
      continue;
3524
81
3525
81
    dst_src = graph_find_node(ctx, dst, edge->src->space);
3526
81
    dst_dst = graph_find_node(ctx, dst, edge->dst->space);
3527
81
    if (!dst_src || !dst_dst)
3528
0
      return isl_stat_error;
3529
81
    if (!is_node(dst, dst_src) || !is_node(dst, dst_dst)) {
3530
0
      if (is_validity(edge) || is_conditional_validity(edge))
3531
0
        isl_die(ctx, isl_error_internal,
3532
0
          "backward (conditional) validity edge",
3533
0
          return isl_stat_error);
3534
0
      continue;
3535
81
    }
3536
81
3537
81
    map = isl_map_copy(edge->map);
3538
81
    tagged_condition = isl_union_map_copy(edge->tagged_condition);
3539
81
    tagged_validity = isl_union_map_copy(edge->tagged_validity);
3540
81
3541
81
    dst->edge[dst->n_edge].src = dst_src;
3542
81
    dst->edge[dst->n_edge].dst = dst_dst;
3543
81
    dst->edge[dst->n_edge].map = map;
3544
81
    dst->edge[dst->n_edge].tagged_condition = tagged_condition;
3545
81
    dst->edge[dst->n_edge].tagged_validity = tagged_validity;
3546
81
    dst->edge[dst->n_edge].types = edge->types;
3547
81
    dst->n_edge++;
3548
81
3549
81
    if (edge->tagged_condition && 
!tagged_condition11
)
3550
0
      return isl_stat_error;
3551
81
    if (edge->tagged_validity && 
!tagged_validity6
)
3552
0
      return isl_stat_error;
3553
81
3554
81
    if (graph_edge_tables_add(ctx, dst,
3555
81
              &dst->edge[dst->n_edge - 1]) < 0)
3556
0
      return isl_stat_error;
3557
81
  }
3558
194
3559
194
  return isl_stat_ok;
3560
194
}
3561
3562
/* Compute the maximal number of variables over all nodes.
3563
 * This is the maximal number of linearly independent schedule
3564
 * rows that we need to compute.
3565
 * Just in case we end up in a part of the dependence graph
3566
 * with only lower-dimensional domains, we make sure we will
3567
 * compute the required amount of extra linearly independent rows.
3568
 */
3569
static int compute_maxvar(struct isl_sched_graph *graph)
3570
446
{
3571
446
  int i;
3572
446
3573
446
  graph->maxvar = 0;
3574
1.07k
  for (i = 0; i < graph->n; 
++i628
) {
3575
628
    struct isl_sched_node *node = &graph->node[i];
3576
628
    int nvar;
3577
628
3578
628
    if (node_update_vmap(node) < 0)
3579
0
      return -1;
3580
628
    nvar = node->nvar + graph->n_row - node->rank;
3581
628
    if (nvar > graph->maxvar)
3582
439
      graph->maxvar = nvar;
3583
628
  }
3584
446
3585
446
  return 0;
3586
446
}
3587
3588
/* Extract the subgraph of "graph" that consists of the nodes satisfying
3589
 * "node_pred" and the edges satisfying "edge_pred" and store
3590
 * the result in "sub".
3591
 */
3592
static isl_stat extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
3593
  int (*node_pred)(struct isl_sched_node *node, int data),
3594
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3595
  int data, struct isl_sched_graph *sub)
3596
194
{
3597
194
  int i, n = 0, n_edge = 0;
3598
194
  int t;
3599
194
3600
694
  for (i = 0; i < graph->n; 
++i500
)
3601
500
    if (node_pred(&graph->node[i], data))
3602
224
      ++n;
3603
724
  for (i = 0; i < graph->n_edge; 
++i530
)
3604
530
    if (edge_pred(&graph->edge[i], data))
3605
112
      ++n_edge;
3606
194
  if (graph_alloc(ctx, sub, n, n_edge) < 0)
3607
0
    return isl_stat_error;
3608
194
  sub->root = graph->root;
3609
194
  if (copy_nodes(sub, graph, node_pred, data) < 0)
3610
0
    return isl_stat_error;
3611
194
  if (graph_init_table(ctx, sub) < 0)
3612
0
    return isl_stat_error;
3613
1.16k
  
for (t = 0; 194
t <= isl_edge_last;
++t970
)
3614
970
    sub->max_edge[t] = graph->max_edge[t];
3615
194
  if (graph_init_edge_tables(ctx, sub) < 0)
3616
0
    return isl_stat_error;
3617
194
  if (copy_edges(ctx, sub, graph, edge_pred, data) < 0)
3618
0
    return isl_stat_error;
3619
194
  sub->n_row = graph->n_row;
3620
194
  sub->max_row = graph->max_row;
3621
194
  sub->n_total_row = graph->n_total_row;
3622
194
  sub->band_start = graph->band_start;
3623
194
3624
194
  return isl_stat_ok;
3625
194
}
3626
3627
static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
3628
  struct isl_sched_graph *graph);
3629
static __isl_give isl_schedule_node *compute_schedule_wcc(
3630
  isl_schedule_node *node, struct isl_sched_graph *graph);
3631
3632
/* Compute a schedule for a subgraph of "graph".  In particular, for
3633
 * the graph composed of nodes that satisfy node_pred and edges that
3634
 * that satisfy edge_pred.
3635
 * If the subgraph is known to consist of a single component, then wcc should
3636
 * be set and then we call compute_schedule_wcc on the constructed subgraph.
3637
 * Otherwise, we call compute_schedule, which will check whether the subgraph
3638
 * is connected.
3639
 *
3640
 * The schedule is inserted at "node" and the updated schedule node
3641
 * is returned.
3642
 */
3643
static __isl_give isl_schedule_node *compute_sub_schedule(
3644
  __isl_take isl_schedule_node *node, isl_ctx *ctx,
3645
  struct isl_sched_graph *graph,
3646
  int (*node_pred)(struct isl_sched_node *node, int data),
3647
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3648
  int data, int wcc)
3649
76
{
3650
76
  struct isl_sched_graph split = { 0 };
3651
76
3652
76
  if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
3653
76
        &split) < 0)
3654
0
    goto error;
3655
76
3656
76
  if (wcc)
3657
72
    node = compute_schedule_wcc(node, &split);
3658
4
  else
3659
4
    node = compute_schedule(node, &split);
3660
76
3661
76
  graph_free(ctx, &split);
3662
76
  return node;
3663
0
error:
3664
0
  graph_free(ctx, &split);
3665
0
  return isl_schedule_node_free(node);
3666
76
}
3667
3668
static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
3669
766
{
3670
766
  return edge->src->scc == scc && 
edge->dst->scc == scc306
;
3671
766
}
3672
3673
static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
3674
14
{
3675
14
  return edge->dst->scc <= scc;
3676
14
}
3677
3678
static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
3679
14
{
3680
14
  return edge->src->scc >= scc;
3681
14
}
3682
3683
/* Reset the current band by dropping all its schedule rows.
3684
 */
3685
static isl_stat reset_band(struct isl_sched_graph *graph)
3686
8
{
3687
8
  int i;
3688
8
  int drop;
3689
8
3690
8
  drop = graph->n_total_row - graph->band_start;
3691
8
  graph->n_total_row -= drop;
3692
8
  graph->n_row -= drop;
3693
8
3694
28
  for (i = 0; i < graph->n; 
++i20
) {
3695
20
    struct isl_sched_node *node = &graph->node[i];
3696
20
3697
20
    isl_map_free(node->sched_map);
3698
20
    node->sched_map = NULL;
3699
20
3700
20
    node->sched = isl_mat_drop_rows(node->sched,
3701
20
            graph->band_start, drop);
3702
20
3703
20
    if (!node->sched)
3704
0
      return isl_stat_error;
3705
20
  }
3706
8
3707
8
  return isl_stat_ok;
3708
8
}
3709
3710
/* Split the current graph into two parts and compute a schedule for each
3711
 * part individually.  In particular, one part consists of all SCCs up
3712
 * to and including graph->src_scc, while the other part contains the other
3713
 * SCCs.  The split is enforced by a sequence node inserted at position "node"
3714
 * in the schedule tree.  Return the updated schedule node.
3715
 * If either of these two parts consists of a sequence, then it is spliced
3716
 * into the sequence containing the two parts.
3717
 *
3718
 * The current band is reset. It would be possible to reuse
3719
 * the previously computed rows as the first rows in the next
3720
 * band, but recomputing them may result in better rows as we are looking
3721
 * at a smaller part of the dependence graph.
3722
 */
3723
static __isl_give isl_schedule_node *compute_split_schedule(
3724
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3725
2
{
3726
2
  int is_seq;
3727
2
  isl_ctx *ctx;
3728
2
  isl_union_set_list *filters;
3729
2
3730
2
  if (!node)
3731
0
    return NULL;
3732
2
3733
2
  if (reset_band(graph) < 0)
3734
0
    return isl_schedule_node_free(node);
3735
2
3736
2
  next_band(graph);
3737
2
3738
2
  ctx = isl_schedule_node_get_ctx(node);
3739
2
  filters = extract_split(ctx, graph);
3740
2
  node = isl_schedule_node_insert_sequence(node, filters);
3741
2
  node = isl_schedule_node_child(node, 1);
3742
2
  node = isl_schedule_node_child(node, 0);
3743
2
3744
2
  node = compute_sub_schedule(node, ctx, graph,
3745
2
        &node_scc_at_least, &edge_src_scc_at_least,
3746
2
        graph->src_scc + 1, 0);
3747
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3748
2
  node = isl_schedule_node_parent(node);
3749
2
  node = isl_schedule_node_parent(node);
3750
2
  if (is_seq)
3751
0
    node = isl_schedule_node_sequence_splice_child(node, 1);
3752
2
  node = isl_schedule_node_child(node, 0);
3753
2
  node = isl_schedule_node_child(node, 0);
3754
2
  node = compute_sub_schedule(node, ctx, graph,
3755
2
        &node_scc_at_most, &edge_dst_scc_at_most,
3756
2
        graph->src_scc, 0);
3757
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3758
2
  node = isl_schedule_node_parent(node);
3759
2
  node = isl_schedule_node_parent(node);
3760
2
  if (is_seq)
3761
0
    node = isl_schedule_node_sequence_splice_child(node, 0);
3762
2
3763
2
  return node;
3764
2
}
3765
3766
/* Insert a band node at position "node" in the schedule tree corresponding
3767
 * to the current band in "graph".  Mark the band node permutable
3768
 * if "permutable" is set.
3769
 * The partial schedules and the coincidence property are extracted
3770
 * from the graph nodes.
3771
 * Return the updated schedule node.
3772
 */
3773
static __isl_give isl_schedule_node *insert_current_band(
3774
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3775
  int permutable)
3776
172
{
3777
172
  int i;
3778
172
  int start, end, n;
3779
172
  isl_multi_aff *ma;
3780
172
  isl_multi_pw_aff *mpa;
3781
172
  isl_multi_union_pw_aff *mupa;
3782
172
3783
172
  if (!node)
3784
0
    return NULL;
3785
172
3786
172
  if (graph->n < 1)
3787
172
    
isl_die0
(isl_schedule_node_get_ctx(node), isl_error_internal,
3788
172
      "graph should have at least one node",
3789
172
      return isl_schedule_node_free(node));
3790
172
3791
172
  start = graph->band_start;
3792
172
  end = graph->n_total_row;
3793
172
  n = end - start;
3794
172
3795
172
  ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
3796
172
  mpa = isl_multi_pw_aff_from_multi_aff(ma);
3797
172
  mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3798
172
3799
250
  for (i = 1; i < graph->n; 
++i78
) {
3800
78
    isl_multi_union_pw_aff *mupa_i;
3801
78
3802
78
    ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
3803
78
                start, n);
3804
78
    mpa = isl_multi_pw_aff_from_multi_aff(ma);
3805
78
    mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3806
78
    mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
3807
78
  }
3808
172
  node = isl_schedule_node_insert_partial_schedule(node, mupa);
3809
172
3810
451
  for (i = 0; i < n; 
++i279
)
3811
279
    node = isl_schedule_node_band_member_set_coincident(node, i,
3812
279
          graph->node[0].coincident[start + i]);
3813
172
  node = isl_schedule_node_band_set_permutable(node, permutable);
3814
172
3815
172
  return node;
3816
172
}
3817
3818
/* Update the dependence relations based on the current schedule,
3819
 * add the current band to "node" and then continue with the computation
3820
 * of the next band.
3821
 * Return the updated schedule node.
3822
 */
3823
static __isl_give isl_schedule_node *compute_next_band(
3824
  __isl_take isl_schedule_node *node,
3825
  struct isl_sched_graph *graph, int permutable)
3826
172
{
3827
172
  isl_ctx *ctx;
3828
172
3829
172
  if (!node)
3830
0
    return NULL;
3831
172
3832
172
  ctx = isl_schedule_node_get_ctx(node);
3833
172
  if (update_edges(ctx, graph) < 0)
3834
0
    return isl_schedule_node_free(node);
3835
172
  node = insert_current_band(node, graph, permutable);
3836
172
  next_band(graph);
3837
172
3838
172
  node = isl_schedule_node_child(node, 0);
3839
172
  node = compute_schedule(node, graph);
3840
172
  node = isl_schedule_node_parent(node);
3841
172
3842
172
  return node;
3843
172
}
3844
3845
/* Add the constraints "coef" derived from an edge from "node" to itself
3846
 * to graph->lp in order to respect the dependences and to try and carry them.
3847
 * "pos" is the sequence number of the edge that needs to be carried.
3848
 * "coef" represents general constraints on coefficients (c_0, c_x)
3849
 * of valid constraints for (y - x) with x and y instances of the node.
3850
 *
3851
 * The constraints added to graph->lp need to enforce
3852
 *
3853
 *  (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
3854
 *  = c_j_x (y - x) >= e_i
3855
 *
3856
 * for each (x,y) in the dependence relation of the edge.
3857
 * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
3858
 * taking into account that each coefficient in c_j_x is represented
3859
 * as a pair of non-negative coefficients.
3860
 */
3861
static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
3862
  struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
3863
46
{
3864
46
  int offset;
3865
46
  isl_ctx *ctx;
3866
46
  isl_dim_map *dim_map;
3867
46
3868
46
  if (!coef)
3869
0
    return isl_stat_error;
3870
46
3871
46
  ctx = isl_basic_set_get_ctx(coef);
3872
46
  offset = coef_var_offset(coef);
3873
46
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
3874
46
  isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3875
46
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3876
46
3877
46
  return isl_stat_ok;
3878
46
}
3879
3880
/* Add the constraints "coef" derived from an edge from "src" to "dst"
3881
 * to graph->lp in order to respect the dependences and to try and carry them.
3882
 * "pos" is the sequence number of the edge that needs to be carried or
3883
 * -1 if no attempt should be made to carry the dependences.
3884
 * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
3885
 * of valid constraints for (x, y) with x and y instances of "src" and "dst".
3886
 *
3887
 * The constraints added to graph->lp need to enforce
3888
 *
3889
 *  (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
3890
 *
3891
 * for each (x,y) in the dependence relation of the edge or
3892
 *
3893
 *  (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
3894
 *
3895
 * if pos is -1.
3896
 * That is,
3897
 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3898
 * or
3899
 * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3900
 * needs to be plugged in for (c_0, c_n, c_x, c_y),
3901
 * taking into account that each coefficient in c_j_x and c_k_x is represented
3902
 * as a pair of non-negative coefficients.
3903
 */
3904
static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
3905
  struct isl_sched_node *src, struct isl_sched_node *dst,
3906
  __isl_take isl_basic_set *coef, int pos)
3907
18
{
3908
18
  int offset;
3909
18
  isl_ctx *ctx;
3910
18
  isl_dim_map *dim_map;
3911
18
3912
18
  if (!coef)
3913
0
    return isl_stat_error;
3914
18
3915
18
  ctx = isl_basic_set_get_ctx(coef);
3916
18
  offset = coef_var_offset(coef);
3917
18
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
3918
18
  if (pos >= 0)
3919
18
    isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3920
18
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3921
18
3922
18
  return isl_stat_ok;
3923
18
}
3924
3925
/* Data structure for keeping track of the data needed
3926
 * to exploit non-trivial lineality spaces.
3927
 *
3928
 * "any_non_trivial" is true if there are any non-trivial lineality spaces.
3929
 * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
3930
 * "equivalent" connects instances to other instances on the same line(s).
3931
 * "mask" contains the domain spaces of "equivalent".
3932
 * Any instance set not in "mask" does not have a non-trivial lineality space.
3933
 */
3934
struct isl_exploit_lineality_data {
3935
  isl_bool any_non_trivial;
3936
  isl_union_map *equivalent;
3937
  isl_union_set *mask;
3938
};
3939
3940
/* Data structure collecting information used during the construction
3941
 * of an LP for carrying dependences.
3942
 *
3943
 * "intra" is a sequence of coefficient constraints for intra-node edges.
3944
 * "inter" is a sequence of coefficient constraints for inter-node edges.
3945
 * "lineality" contains data used to exploit non-trivial lineality spaces.
3946
 */
3947
struct isl_carry {
3948
  isl_basic_set_list *intra;
3949
  isl_basic_set_list *inter;
3950
  struct isl_exploit_lineality_data lineality;
3951
};
3952
3953
/* Free all the data stored in "carry".
3954
 */
3955
static void isl_carry_clear(struct isl_carry *carry)
3956
32
{
3957
32
  isl_basic_set_list_free(carry->intra);
3958
32
  isl_basic_set_list_free(carry->inter);
3959
32
  isl_union_map_free(carry->lineality.equivalent);
3960
32
  isl_union_set_free(carry->lineality.mask);
3961
32
}
3962
3963
/* Return a pointer to the node in "graph" that lives in "space".
3964
 * If the requested node has been compressed, then "space"
3965
 * corresponds to the compressed space.
3966
 * The graph is assumed to have such a node.
3967
 * Return NULL in case of error.
3968
 *
3969
 * First try and see if "space" is the space of an uncompressed node.
3970
 * If so, return that node.
3971
 * Otherwise, "space" was constructed by construct_compressed_id and
3972
 * contains a user pointer pointing to the node in the tuple id.
3973
 * However, this node belongs to the original dependence graph.
3974
 * If "graph" is a subgraph of this original dependence graph,
3975
 * then the node with the same space still needs to be looked up
3976
 * in the current graph.
3977
 */
3978
static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
3979
  struct isl_sched_graph *graph, __isl_keep isl_space *space)
3980
106
{
3981
106
  isl_id *id;
3982
106
  struct isl_sched_node *node;
3983
106
3984
106
  if (!space)
3985
0
    return NULL;
3986
106
3987
106
  node = graph_find_node(ctx, graph, space);
3988
106
  if (!node)
3989
0
    return NULL;
3990
106
  if (is_node(graph, node))
3991
106
    return node;
3992
0
3993
0
  id = isl_space_get_tuple_id(space, isl_dim_set);
3994
0
  node = isl_id_get_user(id);
3995
0
  isl_id_free(id);
3996
0
3997
0
  if (!node)
3998
0
    return NULL;
3999
0
4000
0
  if (!is_node(graph->root, node))
4001
0
    isl_die(ctx, isl_error_internal,
4002
0
      "space points to invalid node", return NULL);
4003
0
  if (graph != graph->root)
4004
0
    node = graph_find_node(ctx, graph, node->space);
4005
0
  if (!is_node(graph, node))
4006
0
    isl_die(ctx, isl_error_internal,
4007
0
      "unable to find node", return NULL);
4008
0
4009
0
  return node;
4010
0
}
4011
4012
/* Internal data structure for add_all_constraints.
4013
 *
4014
 * "graph" is the schedule constraint graph for which an LP problem
4015
 * is being constructed.
4016
 * "carry_inter" indicates whether inter-node edges should be carried.
4017
 * "pos" is the position of the next edge that needs to be carried.
4018
 */
4019
struct isl_add_all_constraints_data {
4020
  isl_ctx *ctx;
4021
  struct isl_sched_graph *graph;
4022
  int carry_inter;
4023
  int pos;
4024
};
4025
4026
/* Add the constraints "coef" derived from an edge from a node to itself
4027
 * to data->graph->lp in order to respect the dependences and
4028
 * to try and carry them.
4029
 *
4030
 * The space of "coef" is of the form
4031
 *
4032
 *  coefficients[[c_cst] -> S[c_x]]
4033
 *
4034
 * with S[c_x] the (compressed) space of the node.
4035
 * Extract the node from the space and call add_intra_constraints.
4036
 */
4037
static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
4038
46
{
4039
46
  struct isl_add_all_constraints_data *data = user;
4040
46
  isl_space *space;
4041
46
  struct isl_sched_node *node;
4042
46
4043
46
  space = isl_basic_set_get_space(coef);
4044
46
  space = isl_space_range(isl_space_unwrap(space));
4045
46
  node = graph_find_compressed_node(data->ctx, data->graph, space);
4046
46
  isl_space_free(space);
4047
46
  return add_intra_constraints(data->graph, node, coef, data->pos++);
4048
46
}
4049
4050
/* Add the constraints "coef" derived from an edge from a node j
4051
 * to a node k to data->graph->lp in order to respect the dependences and
4052
 * to try and carry them (provided data->carry_inter is set).
4053
 *
4054
 * The space of "coef" is of the form
4055
 *
4056
 *  coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
4057
 *
4058
 * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
4059
 * Extract the nodes from the space and call add_inter_constraints.
4060
 */
4061
static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
4062
18
{
4063
18
  struct isl_add_all_constraints_data *data = user;
4064
18
  isl_space *space, *dom;
4065
18
  struct isl_sched_node *src, *dst;
4066
18
  int pos;
4067
18
4068
18
  space = isl_basic_set_get_space(coef);
4069
18
  space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
4070
18
  dom = isl_space_domain(isl_space_copy(space));
4071
18
  src = graph_find_compressed_node(data->ctx, data->graph, dom);
4072
18
  isl_space_free(dom);
4073
18
  space = isl_space_range(space);
4074
18
  dst = graph_find_compressed_node(data->ctx, data->graph, space);
4075
18
  isl_space_free(space);
4076
18
4077
18
  pos = data->carry_inter ? data->pos++ : 
-10
;
4078
18
  return add_inter_constraints(data->graph, src, dst, coef, pos);
4079
18
}
4080
4081
/* Add constraints to graph->lp that force all (conditional) validity
4082
 * dependences to be respected and attempt to carry them.
4083
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4084
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4085
 * "carry_inter" indicates whether inter-node edges should be carried or
4086
 * only respected.
4087
 */
4088
static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
4089
  __isl_keep isl_basic_set_list *intra,
4090
  __isl_keep isl_basic_set_list *inter, int carry_inter)
4091
32
{
4092
32
  struct isl_add_all_constraints_data data = { ctx, graph, carry_inter };
4093
32
4094
32
  data.pos = 0;
4095
32
  if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
4096
0
    return isl_stat_error;
4097
32
  if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0)
4098
0
    return isl_stat_error;
4099
32
  return isl_stat_ok;
4100
32
}
4101
4102
/* Internal data structure for count_all_constraints
4103
 * for keeping track of the number of equality and inequality constraints.
4104
 */
4105
struct isl_sched_count {
4106
  int n_eq;
4107
  int n_ineq;
4108
};
4109
4110
/* Add the number of equality and inequality constraints of "bset"
4111
 * to data->n_eq and data->n_ineq.
4112
 */
4113
static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
4114
64
{
4115
64
  struct isl_sched_count *data = user;
4116
64
4117
64
  return update_count(bset, 1, &data->n_eq, &data->n_ineq);
4118
64
}
4119
4120
/* Count the number of equality and inequality constraints
4121
 * that will be added to the carry_lp problem.
4122
 * We count each edge exactly once.
4123
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4124
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4125
 */
4126
static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
4127
  __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
4128
32
{
4129
32
  struct isl_sched_count data;
4130
32
4131
32
  data.n_eq = data.n_ineq = 0;
4132
32
  if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
4133
0
    return isl_stat_error;
4134
32
  if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0)
4135
0
    return isl_stat_error;
4136
32
4137
32
  *n_eq = data.n_eq;
4138
32
  *n_ineq = data.n_ineq;
4139
32
4140
32
  return isl_stat_ok;
4141
32
}
4142
4143
/* Construct an LP problem for finding schedule coefficients
4144
 * such that the schedule carries as many validity dependences as possible.
4145
 * In particular, for each dependence i, we bound the dependence distance
4146
 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
4147
 * of all e_i's.  Dependences with e_i = 0 in the solution are simply
4148
 * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
4149
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4150
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4151
 * "n_edge" is the total number of edges.
4152
 * "carry_inter" indicates whether inter-node edges should be carried or
4153
 * only respected.  That is, if "carry_inter" is not set, then
4154
 * no e_i variables are introduced for the inter-node edges.
4155
 *
4156
 * All variables of the LP are non-negative.  The actual coefficients
4157
 * may be negative, so each coefficient is represented as the difference
4158
 * of two non-negative variables.  The negative part always appears
4159
 * immediately before the positive part.
4160
 * Other than that, the variables have the following order
4161
 *
4162
 *  - sum of (1 - e_i) over all edges
4163
 *  - sum of all c_n coefficients
4164
 *    (unconstrained when computing non-parametric schedules)
4165
 *  - sum of positive and negative parts of all c_x coefficients
4166
 *  - for each edge
4167
 *    - e_i
4168
 *  - for each node
4169
 *    - positive and negative parts of c_i_x, in opposite order
4170
 *    - c_i_n (if parametric)
4171
 *    - c_i_0
4172
 *
4173
 * The constraints are those from the (validity) edges plus three equalities
4174
 * to express the sums and n_edge inequalities to express e_i <= 1.
4175
 */
4176
static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
4177
  int n_edge, __isl_keep isl_basic_set_list *intra,
4178
  __isl_keep isl_basic_set_list *inter, int carry_inter)
4179
32
{
4180
32
  int i;
4181
32
  int k;
4182
32
  isl_space *dim;
4183
32
  unsigned total;
4184
32
  int n_eq, n_ineq;
4185
32
4186
32
  total = 3 + n_edge;
4187
76
  for (i = 0; i < graph->n; 
++i44
) {
4188
44
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
4189
44
    node->start = total;
4190
44
    total += 1 + node->nparam + 2 * node->nvar;
4191
44
  }
4192
32
4193
32
  if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
4194
0
    return isl_stat_error;
4195
32
4196
32
  dim = isl_space_set_alloc(ctx, 0, total);
4197
32
  isl_basic_set_free(graph->lp);
4198
32
  n_eq += 3;
4199
32
  n_ineq += n_edge;
4200
32
  graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
4201
32
  graph->lp = isl_basic_set_set_rational(graph->lp);
4202
32
4203
32
  k = isl_basic_set_alloc_equality(graph->lp);
4204
32
  if (k < 0)
4205
0
    return isl_stat_error;
4206
32
  isl_seq_clr(graph->lp->eq[k], 1 + total);
4207
32
  isl_int_set_si(graph->lp->eq[k][0], -n_edge);
4208
32
  isl_int_set_si(graph->lp->eq[k][1], 1);
4209
96
  for (i = 0; i < n_edge; 
++i64
)
4210
64
    isl_int_set_si(graph->lp->eq[k][4 + i], 1);
4211
32
4212
32
  if (add_param_sum_constraint(graph, 1) < 0)
4213
0
    return isl_stat_error;
4214
32
  if (add_var_sum_constraint(graph, 2) < 0)
4215
0
    return isl_stat_error;
4216
32
4217
96
  
for (i = 0; 32
i < n_edge;
++i64
) {
4218
64
    k = isl_basic_set_alloc_inequality(graph->lp);
4219
64
    if (k < 0)
4220
0
      return isl_stat_error;
4221
64
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
4222
64
    isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
4223
64
    isl_int_set_si(graph->lp->ineq[k][0], 1);
4224
64
  }
4225
32
4226
32
  if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0)
4227
0
    return isl_stat_error;
4228
32
4229
32
  return isl_stat_ok;
4230
32
}
4231
4232
static __isl_give isl_schedule_node *compute_component_schedule(
4233
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4234
  int wcc);
4235
4236
/* If the schedule_split_scaled option is set and if the linear
4237
 * parts of the scheduling rows for all nodes in the graphs have
4238
 * a non-trivial common divisor, then remove this
4239
 * common divisor from the linear part.
4240
 * Otherwise, insert a band node directly and continue with
4241
 * the construction of the schedule.
4242
 *
4243
 * If a non-trivial common divisor is found, then
4244
 * the linear part is reduced and the remainder is ignored.
4245
 * The pieces of the graph that are assigned different remainders
4246
 * form (groups of) strongly connected components within
4247
 * the scaled down band.  If needed, they can therefore
4248
 * be ordered along this remainder in a sequence node.
4249
 * However, this ordering is not enforced here in order to allow
4250
 * the scheduler to combine some of the strongly connected components.
4251
 */
4252
static __isl_give isl_schedule_node *split_scaled(
4253
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4254
30
{
4255
30
  int i;
4256
30
  int row;
4257
30
  isl_ctx *ctx;
4258
30
  isl_int gcd, gcd_i;
4259
30
4260
30
  if (!node)
4261
0
    return NULL;
4262
30
4263
30
  ctx = isl_schedule_node_get_ctx(node);
4264
30
  if (!ctx->opt->schedule_split_scaled)
4265
0
    return compute_next_band(node, graph, 0);
4266
30
  if (graph->n <= 1)
4267
24
    return compute_next_band(node, graph, 0);
4268
6
4269
6
  isl_int_init(gcd);
4270
6
  isl_int_init(gcd_i);
4271
6
4272
6
  isl_int_set_si(gcd, 0);
4273
6
4274
6
  row = isl_mat_rows(graph->node[0].sched) - 1;
4275
6
4276
20
  for (i = 0; i < graph->n; 
++i14
) {
4277
14
    struct isl_sched_node *node = &graph->node[i];
4278
14
    int cols = isl_mat_cols(node->sched);
4279
14
4280
14
    isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
4281
14
    isl_int_gcd(gcd, gcd, gcd_i);
4282
14
  }
4283
6
4284
6
  isl_int_clear(gcd_i);
4285
6
4286
6
  if (isl_int_cmp_si(gcd, 1) <= 0) {
4287
2
    isl_int_clear(gcd);
4288
2
    return compute_next_band(node, graph, 0);
4289
2
  }
4290
4
4291
14
  
for (i = 0; 4
i < graph->n;
++i10
) {
4292
10
    struct isl_sched_node *node = &graph->node[i];
4293
10
4294
10
    isl_int_fdiv_q(node->sched->row[row][0],
4295
10
             node->sched->row[row][0], gcd);
4296
10
    isl_int_mul(node->sched->row[row][0],
4297
10
          node->sched->row[row][0], gcd);
4298
10
    node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
4299
10
    if (!node->sched)
4300
0
      goto error;
4301
10
  }
4302
4
4303
4
  isl_int_clear(gcd);
4304
4
4305
4
  return compute_next_band(node, graph, 0);
4306
0
error:
4307
0
  isl_int_clear(gcd);
4308
0
  return isl_schedule_node_free(node);
4309
4
}
4310
4311
/* Is the schedule row "sol" trivial on node "node"?
4312
 * That is, is the solution zero on the dimensions linearly independent of
4313
 * the previously found solutions?
4314
 * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
4315
 *
4316
 * Each coefficient is represented as the difference between
4317
 * two non-negative values in "sol".
4318
 * We construct the schedule row s and check if it is linearly
4319
 * independent of previously computed schedule rows
4320
 * by computing T s, with T the linear combinations that are zero
4321
 * on linearly dependent schedule rows.
4322
 * If the result consists of all zeros, then the solution is trivial.
4323
 */
4324
static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
4325
36
{
4326
36
  int trivial;
4327
36
  isl_vec *node_sol;
4328
36
4329
36
  if (!sol)
4330
0
    return -1;
4331
36
  if (node->nvar == node->rank)
4332
4
    return 0;
4333
32
4334
32
  node_sol = extract_var_coef(node, sol);
4335
32
  node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol);
4336
32
  if (!node_sol)
4337
0
    return -1;
4338
32
4339
32
  trivial = isl_seq_first_non_zero(node_sol->el,
4340
32
          node->nvar - node->rank) == -1;
4341
32
4342
32
  isl_vec_free(node_sol);
4343
32
4344
32
  return trivial;
4345
32
}
4346
4347
/* Is the schedule row "sol" trivial on any node where it should
4348
 * not be trivial?
4349
 * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
4350
 */
4351
static int is_any_trivial(struct isl_sched_graph *graph,
4352
  __isl_keep isl_vec *sol)
4353
32
{
4354
32
  int i;
4355
32
4356
64
  for (i = 0; i < graph->n; 
++i32
) {
4357
36
    struct isl_sched_node *node = &graph->node[i];
4358
36
    int trivial;
4359
36
4360
36
    if (!needs_row(graph, node))
4361
0
      continue;
4362
36
    trivial = is_trivial(node, sol);
4363
36
    if (trivial < 0 || trivial)
4364
4
      return trivial;
4365
36
  }
4366
32
4367
32
  
return 028
;
4368
32
}
4369
4370
/* Does the schedule represented by "sol" perform loop coalescing on "node"?
4371
 * If so, return the position of the coalesced dimension.
4372
 * Otherwise, return node->nvar or -1 on error.
4373
 *
4374
 * In particular, look for pairs of coefficients c_i and c_j such that
4375
 * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
4376
 * If any such pair is found, then return i.
4377
 * If size_i is infinity, then no check on c_i needs to be performed.
4378
 */
4379
static int find_node_coalescing(struct isl_sched_node *node,
4380
  __isl_keep isl_vec *sol)
4381
50
{
4382
50
  int i, j;
4383
50
  isl_int max;
4384
50
  isl_vec *csol;
4385
50
4386
50
  if (node->nvar <= 1)
4387
23
    return node->nvar;
4388
27
4389
27
  csol = extract_var_coef(node, sol);
4390
27
  if (!csol)
4391
0
    return -1;
4392
27
  isl_int_init(max);
4393
77
  for (i = 0; i < node->nvar; 
++i50
) {
4394
54
    isl_val *v;
4395
54
4396
54
    if (isl_int_is_zero(csol->el[i]))
4397
54
      
continue20
;
4398
34
    v = isl_multi_val_get_val(node->sizes, i);
4399
34
    if (!v)
4400
0
      goto error;
4401
34
    if (!isl_val_is_int(v)) {
4402
14
      isl_val_free(v);
4403
14
      continue;
4404
14
    }
4405
20
    v = isl_val_div_ui(v, 2);
4406
20
    v = isl_val_ceil(v);
4407
20
    if (!v)
4408
0
      goto error;
4409
20
    isl_int_mul(max, v->n, csol->el[i]);
4410
20
    isl_val_free(v);
4411
20
4412
52
    for (j = 0; j < node->nvar; 
++j32
) {
4413
36
      if (j == i)
4414
16
        continue;
4415
20
      if (isl_int_abs_gt(csol->el[j], max))
4416
20
        
break4
;
4417
20
    }
4418
20
    if (j < node->nvar)
4419
4
      break;
4420
20
  }
4421
27
4422
27
  isl_int_clear(max);
4423
27
  isl_vec_free(csol);
4424
27
  return i;
4425
0
error:
4426
0
  isl_int_clear(max);
4427
0
  isl_vec_free(csol);
4428
0
  return -1;
4429
27
}
4430
4431
/* Force the schedule coefficient at position "pos" of "node" to be zero
4432
 * in "tl".
4433
 * The coefficient is encoded as the difference between two non-negative
4434
 * variables.  Force these two variables to have the same value.
4435
 */
4436
static __isl_give isl_tab_lexmin *zero_out_node_coef(
4437
  __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
4438
4
{
4439
4
  int dim;
4440
4
  isl_ctx *ctx;
4441
4
  isl_vec *eq;
4442
4
4443
4
  ctx = isl_space_get_ctx(node->space);
4444
4
  dim = isl_tab_lexmin_dim(tl);
4445
4
  if (dim < 0)
4446
0
    return isl_tab_lexmin_free(tl);
4447
4
  eq = isl_vec_alloc(ctx, 1 + dim);
4448
4
  eq = isl_vec_clr(eq);
4449
4
  if (!eq)
4450
0
    return isl_tab_lexmin_free(tl);
4451
4
4452
4
  pos = 1 + node_var_coef_pos(node, pos);
4453
4
  isl_int_set_si(eq->el[pos], 1);
4454
4
  isl_int_set_si(eq->el[pos + 1], -1);
4455
4
  tl = isl_tab_lexmin_add_eq(tl, eq->el);
4456
4
  isl_vec_free(eq);
4457
4
4458
4
  return tl;
4459
4
}
4460
4461
/* Return the lexicographically smallest rational point in the basic set
4462
 * from which "tl" was constructed, double checking that this input set
4463
 * was not empty.
4464
 */
4465
static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
4466
38
{
4467
38
  isl_vec *sol;
4468
38
4469
38
  sol = isl_tab_lexmin_get_solution(tl);
4470
38
  if (!sol)
4471
0
    return NULL;
4472
38
  if (sol->size == 0)
4473
38
    
isl_die0
(isl_vec_get_ctx(sol), isl_error_internal,
4474
38
      "error in schedule construction",
4475
38
      return isl_vec_free(sol));
4476
38
  return sol;
4477
38
}
4478
4479
/* Does the solution "sol" of the LP problem constructed by setup_carry_lp
4480
 * carry any of the "n_edge" groups of dependences?
4481
 * The value in the first position is the sum of (1 - e_i) over all "n_edge"
4482
 * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
4483
 * by the edge are carried by the solution.
4484
 * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
4485
 * one of those is carried.
4486
 *
4487
 * Note that despite the fact that the problem is solved using a rational
4488
 * solver, the solution is guaranteed to be integral.
4489
 * Specifically, the dependence distance lower bounds e_i (and therefore
4490
 * also their sum) are integers.  See Lemma 5 of [1].
4491
 *
4492
 * Any potential denominator of the sum is cleared by this function.
4493
 * The denominator is not relevant for any of the other elements
4494
 * in the solution.
4495
 *
4496
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4497
 *     Problem, Part II: Multi-Dimensional Time.
4498
 *     In Intl. Journal of Parallel Programming, 1992.
4499
 */
4500
static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
4501
38
{
4502
38
  isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
4503
38
  isl_int_set_si(sol->el[0], 1);
4504
38
  return isl_int_cmp_si(sol->el[1], n_edge) < 0;
4505
38
}
4506
4507
/* Return the lexicographically smallest rational point in "lp",
4508
 * assuming that all variables are non-negative and performing some
4509
 * additional sanity checks.
4510
 * If "want_integral" is set, then compute the lexicographically smallest
4511
 * integer point instead.
4512
 * In particular, "lp" should not be empty by construction.
4513
 * Double check that this is the case.
4514
 * If dependences are not carried for any of the "n_edge" edges,
4515
 * then return an empty vector.
4516
 *
4517
 * If the schedule_treat_coalescing option is set and
4518
 * if the computed schedule performs loop coalescing on a given node,
4519
 * i.e., if it is of the form
4520
 *
4521
 *  c_i i + c_j j + ...
4522
 *
4523
 * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
4524
 * to cut out this solution.  Repeat this process until no more loop
4525
 * coalescing occurs or until no more dependences can be carried.
4526
 * In the latter case, revert to the previously computed solution.
4527
 *
4528
 * If the caller requests an integral solution and if coalescing should
4529
 * be treated, then perform the coalescing treatment first as
4530
 * an integral solution computed before coalescing treatment
4531
 * would carry the same number of edges and would therefore probably
4532
 * also be coalescing.
4533
 *
4534
 * To allow the coalescing treatment to be performed first,
4535
 * the initial solution is allowed to be rational and it is only
4536
 * cut out (if needed) in the next iteration, if no coalescing measures
4537
 * were taken.
4538
 */
4539
static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
4540
  __isl_take isl_basic_set *lp, int n_edge, int want_integral)
4541
32
{
4542
32
  int i, pos, cut;
4543
32
  isl_ctx *ctx;
4544
32
  isl_tab_lexmin *tl;
4545
32
  isl_vec *sol = NULL, *prev;
4546
32
  int treat_coalescing;
4547
32
  int try_again;
4548
32
4549
32
  if (!lp)
4550
0
    return NULL;
4551
32
  ctx = isl_basic_set_get_ctx(lp);
4552
32
  treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
4553
32
  tl = isl_tab_lexmin_from_basic_set(lp);
4554
32
4555
32
  cut = 0;
4556
38
  do {
4557
38
    int integral;
4558
38
4559
38
    try_again = 0;
4560
38
    if (cut)
4561
2
      tl = isl_tab_lexmin_cut_to_integer(tl);
4562
38
    prev = sol;
4563
38
    sol = non_empty_solution(tl);
4564
38
    if (!sol)
4565
0
      goto error;
4566
38
4567
38
    integral = isl_int_is_one(sol->el[0]);
4568
38
    if (!carries_dependences(sol, n_edge)) {
4569
2
      if (!prev)
4570
0
        prev = isl_vec_alloc(ctx, 0);
4571
2
      isl_vec_free(sol);
4572
2
      sol = prev;
4573
2
      break;
4574
2
    }
4575
36
    prev = isl_vec_free(prev);
4576
36
    cut = want_integral && 
!integral22
;
4577
36
    if (cut)
4578
2
      try_again = 1;
4579
36
    if (!treat_coalescing)
4580
0
      continue;
4581
82
    
for (i = 0; 36
i < graph->n;
++i46
) {
4582
50
      struct isl_sched_node *node = &graph->node[i];
4583
50
4584
50
      pos = find_node_coalescing(node, sol);
4585
50
      if (pos < 0)
4586
0
        goto error;
4587
50
      if (pos < node->nvar)
4588
4
        break;
4589
50
    }
4590
36
    if (i < graph->n) {
4591
4
      try_again = 1;
4592
4
      tl = zero_out_node_coef(tl, &graph->node[i], pos);
4593
4
      cut = 0;
4594
4
    }
4595
36
  } while (try_again);
4596
32
4597
32
  isl_tab_lexmin_free(tl);
4598
32
4599
32
  return sol;
4600
0
error:
4601
0
  isl_tab_lexmin_free(tl);
4602
0
  isl_vec_free(prev);
4603
0
  isl_vec_free(sol);
4604
0
  return NULL;
4605
32
}
4606
4607
/* If "edge" is an edge from a node to itself, then add the corresponding
4608
 * dependence relation to "umap".
4609
 * If "node" has been compressed, then the dependence relation
4610
 * is also compressed first.
4611
 */
4612
static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
4613
  struct isl_sched_edge *edge)
4614
46
{
4615
46
  isl_map *map;
4616
46
  struct isl_sched_node *node = edge->src;
4617
46
4618
46
  if (edge->src != edge->dst)
4619
20
    return umap;
4620
26
4621
26
  map = isl_map_copy(edge->map);
4622
26
  if (node->compressed) {
4623
0
    map = isl_map_preimage_domain_multi_aff(map,
4624
0
            isl_multi_aff_copy(node->decompress));
4625
0
    map = isl_map_preimage_range_multi_aff(map,
4626
0
            isl_multi_aff_copy(node->decompress));
4627
0
  }
4628
26
  umap = isl_union_map_add_map(umap, map);
4629
26
  return umap;
4630
26
}
4631
4632
/* If "edge" is an edge from a node to another node, then add the corresponding
4633
 * dependence relation to "umap".
4634
 * If the source or destination nodes of "edge" have been compressed,
4635
 * then the dependence relation is also compressed first.
4636
 */
4637
static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
4638
  struct isl_sched_edge *edge)
4639
46
{
4640
46
  isl_map *map;
4641
46
4642
46
  if (edge->src == edge->dst)
4643
26
    return umap;
4644
20
4645
20
  map = isl_map_copy(edge->map);
4646
20
  if (edge->src->compressed)
4647
0
    map = isl_map_preimage_domain_multi_aff(map,
4648
0
            isl_multi_aff_copy(edge->src->decompress));
4649
20
  if (edge->dst->compressed)
4650
0
    map = isl_map_preimage_range_multi_aff(map,
4651
0
            isl_multi_aff_copy(edge->dst->decompress));
4652
20
  umap = isl_union_map_add_map(umap, map);
4653
20
  return umap;
4654
20
}
4655
4656
/* Internal data structure used by union_drop_coalescing_constraints
4657
 * to collect bounds on all relevant statements.
4658
 *
4659
 * "graph" is the schedule constraint graph for which an LP problem
4660
 * is being constructed.
4661
 * "bounds" collects the bounds.
4662
 */
4663
struct isl_collect_bounds_data {
4664
  isl_ctx *ctx;
4665
  struct isl_sched_graph *graph;
4666
  isl_union_set *bounds;
4667
};
4668
4669
/* Add the size bounds for the node with instance deltas in "set"
4670
 * to data->bounds.
4671
 */
4672
static isl_stat collect_bounds(__isl_take isl_set *set, void *user)
4673
24
{
4674
24
  struct isl_collect_bounds_data *data = user;
4675
24
  struct isl_sched_node *node;
4676
24
  isl_space *space;
4677
24
  isl_set *bounds;
4678
24
4679
24
  space = isl_set_get_space(set);
4680
24
  isl_set_free(set);
4681
24
4682
24
  node = graph_find_compressed_node(data->ctx, data->graph, space);
4683
24
  isl_space_free(space);
4684
24
4685
24
  bounds = isl_set_from_basic_set(get_size_bounds(node));
4686
24
  data->bounds = isl_union_set_add_set(data->bounds, bounds);
4687
24
4688
24
  return isl_stat_ok;
4689
24
}
4690
4691
/* Drop some constraints from "delta" that could be exploited
4692
 * to construct loop coalescing schedules.
4693
 * In particular, drop those constraint that bound the difference
4694
 * to the size of the domain.
4695
 * Do this for each set/node in "delta" separately.
4696
 * The parameters are assumed to have been projected out by the caller.
4697
 */
4698
static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx,
4699
  struct isl_sched_graph *graph, __isl_take isl_union_set *delta)
4700
32
{
4701
32
  struct isl_collect_bounds_data data = { ctx, graph };
4702
32
4703
32
  data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4704
32
  if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0)
4705
0
    data.bounds = isl_union_set_free(data.bounds);
4706
32
  delta = isl_union_set_plain_gist(delta, data.bounds);
4707
32
4708
32
  return delta;
4709
32
}
4710
4711
/* Given a non-trivial lineality space "lineality", add the corresponding
4712
 * universe set to data->mask and add a map from elements to
4713
 * other elements along the lines in "lineality" to data->equivalent.
4714
 * If this is the first time this function gets called
4715
 * (data->any_non_trivial is still false), then set data->any_non_trivial and
4716
 * initialize data->mask and data->equivalent.
4717
 *
4718
 * In particular, if the lineality space is defined by equality constraints
4719
 *
4720
 *  E x = 0
4721
 *
4722
 * then construct an affine mapping
4723
 *
4724
 *  f : x -> E x
4725
 *
4726
 * and compute the equivalence relation of having the same image under f:
4727
 *
4728
 *  { x -> x' : E x = E x' }
4729
 */
4730
static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality,
4731
  struct isl_exploit_lineality_data *data)
4732
2
{
4733
2
  isl_mat *eq;
4734
2
  isl_space *space;
4735
2
  isl_set *univ;
4736
2
  isl_multi_aff *ma;
4737
2
  isl_multi_pw_aff *mpa;
4738
2
  isl_map *map;
4739
2
  int n;
4740
2
4741
2
  if (!lineality)
4742
0
    return isl_stat_error;
4743
2
  if (isl_basic_set_dim(lineality, isl_dim_div) != 0)
4744
2
    
isl_die0
(isl_basic_set_get_ctx(lineality), isl_error_internal,
4745
2
      "local variables not allowed", goto error);
4746
2
4747
2
  space = isl_basic_set_get_space(lineality);
4748
2
  if (!data->any_non_trivial) {
4749
2
    data->equivalent = isl_union_map_empty(isl_space_copy(space));
4750
2
    data->mask = isl_union_set_empty(isl_space_copy(space));
4751
2
  }
4752
2
  data->any_non_trivial = isl_bool_true;
4753
2
4754
2
  univ = isl_set_universe(isl_space_copy(space));
4755
2
  data->mask = isl_union_set_add_set(data->mask, univ);
4756
2
4757
2
  eq = isl_basic_set_extract_equalities(lineality);
4758
2
  n = isl_mat_rows(eq);
4759
2
  eq = isl_mat_insert_zero_rows(eq, 0, 1);
4760
2
  eq = isl_mat_set_element_si(eq, 0, 0, 1);
4761
2
  space = isl_space_from_domain(space);
4762
2
  space = isl_space_add_dims(space, isl_dim_out, n);
4763
2
  ma = isl_multi_aff_from_aff_mat(space, eq);
4764
2
  mpa = isl_multi_pw_aff_from_multi_aff(ma);
4765
2
  map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa));
4766
2
  data->equivalent = isl_union_map_add_map(data->equivalent, map);
4767
2
4768
2
  isl_basic_set_free(lineality);
4769
2
  return isl_stat_ok;
4770
0
error:
4771
0
  isl_basic_set_free(lineality);
4772
0
  return isl_stat_error;
4773
2
}
4774
4775
/* Check if the lineality space "set" is non-trivial (i.e., is not just
4776
 * the origin or, in other words, satisfies a number of equality constraints
4777
 * that is smaller than the dimension of the set).
4778
 * If so, extend data->mask and data->equivalent accordingly.
4779
 *
4780
 * The input should not have any local variables already, but
4781
 * isl_set_remove_divs is called to make sure it does not.
4782
 */
4783
static isl_stat add_lineality(__isl_take isl_set *set, void *user)
4784
24
{
4785
24
  struct isl_exploit_lineality_data *data = user;
4786
24
  isl_basic_set *hull;
4787
24
  int dim, n_eq;
4788
24
4789
24
  set = isl_set_remove_divs(set);
4790
24
  hull = isl_set_unshifted_simple_hull(set);
4791
24
  dim = isl_basic_set_dim(hull, isl_dim_set);
4792
24
  n_eq = isl_basic_set_n_equality(hull);
4793
24
  if (!hull)
4794
0
    return isl_stat_error;
4795
24
  if (dim != n_eq)
4796
2
    return add_non_trivial_lineality(hull, data);
4797
22
  isl_basic_set_free(hull);
4798
22
  return isl_stat_ok;
4799
22
}
4800
4801
/* Check if the difference set on intra-node schedule constraints "intra"
4802
 * has any non-trivial lineality space.
4803
 * If so, then extend the difference set to a difference set
4804
 * on equivalent elements.  That is, if "intra" is
4805
 *
4806
 *  { y - x : (x,y) \in V }
4807
 *
4808
 * and elements are equivalent if they have the same image under f,
4809
 * then return
4810
 *
4811
 *  { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4812
 *
4813
 * or, since f is linear,
4814
 *
4815
 *  { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
4816
 *
4817
 * The results of the search for non-trivial lineality spaces is stored
4818
 * in "data".
4819
 */
4820
static __isl_give isl_union_set *exploit_intra_lineality(
4821
  __isl_take isl_union_set *intra,
4822
  struct isl_exploit_lineality_data *data)
4823
32
{
4824
32
  isl_union_set *lineality;
4825
32
  isl_union_set *uset;
4826
32
4827
32
  data->any_non_trivial = isl_bool_false;
4828
32
  lineality = isl_union_set_copy(intra);
4829
32
  lineality = isl_union_set_combined_lineality_space(lineality);
4830
32
  if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0)
4831
0
    data->any_non_trivial = isl_bool_error;
4832
32
  isl_union_set_free(lineality);
4833
32
4834
32
  if (data->any_non_trivial < 0)
4835
0
    return isl_union_set_free(intra);
4836
32
  if (!data->any_non_trivial)
4837
30
    return intra;
4838
2
4839
2
  uset = isl_union_set_copy(intra);
4840
2
  intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask));
4841
2
  uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent));
4842
2
  intra = isl_union_set_union(intra, uset);
4843
2
4844
2
  intra = isl_union_set_remove_divs(intra);
4845
2
4846
2
  return intra;
4847
2
}
4848
4849
/* If the difference set on intra-node schedule constraints was found to have
4850
 * any non-trivial lineality space by exploit_intra_lineality,
4851
 * as recorded in "data", then extend the inter-node
4852
 * schedule constraints "inter" to schedule constraints on equivalent elements.
4853
 * That is, if "inter" is V and
4854
 * elements are equivalent if they have the same image under f, then return
4855
 *
4856
 *  { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4857
 */
4858
static __isl_give isl_union_map *exploit_inter_lineality(
4859
  __isl_take isl_union_map *inter,
4860
  struct isl_exploit_lineality_data *data)
4861
32
{
4862
32
  isl_union_map *umap;
4863
32
4864
32
  if (data->any_non_trivial < 0)
4865
0
    return isl_union_map_free(inter);
4866
32
  if (!data->any_non_trivial)
4867
30
    return inter;
4868
2
4869
2
  umap = isl_union_map_copy(inter);
4870
2
  inter = isl_union_map_subtract_range(inter,
4871
2
        isl_union_set_copy(data->mask));
4872
2
  umap = isl_union_map_apply_range(umap,
4873
2
        isl_union_map_copy(data->equivalent));
4874
2
  inter = isl_union_map_union(inter, umap);
4875
2
  umap = isl_union_map_copy(inter);
4876
2
  inter = isl_union_map_subtract_domain(inter,
4877
2
        isl_union_set_copy(data->mask));
4878
2
  umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent),
4879
2
        umap);
4880
2
  inter = isl_union_map_union(inter, umap);
4881
2
4882
2
  inter = isl_union_map_remove_divs(inter);
4883
2
4884
2
  return inter;
4885
2
}
4886
4887
/* For each (conditional) validity edge in "graph",
4888
 * add the corresponding dependence relation using "add"
4889
 * to a collection of dependence relations and return the result.
4890
 * If "coincidence" is set, then coincidence edges are considered as well.
4891
 */
4892
static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
4893
  __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
4894
    struct isl_sched_edge *edge), int coincidence)
4895
64
{
4896
64
  int i;
4897
64
  isl_space *space;
4898
64
  isl_union_map *umap;
4899
64
4900
64
  space = isl_space_copy(graph->node[0].space);
4901
64
  umap = isl_union_map_empty(space);
4902
64
4903
156
  for (i = 0; i < graph->n_edge; 
++i92
) {
4904
92
    struct isl_sched_edge *edge = &graph->edge[i];
4905
92
4906
92
    if (!is_any_validity(edge) &&
4907
92
        
(0
!coincidence0
||
!is_coincidence(edge)0
))
4908
0
      continue;
4909
92
4910
92
    umap = add(umap, edge);
4911
92
  }
4912
64
4913
64
  return umap;
4914
64
}
4915
4916
/* Project out all parameters from "uset" and return the result.
4917
 */
4918
static __isl_give isl_union_set *union_set_drop_parameters(
4919
  __isl_take isl_union_set *uset)
4920
32
{
4921
32
  unsigned nparam;
4922
32
4923
32
  nparam = isl_union_set_dim(uset, isl_dim_param);
4924
32
  return isl_union_set_project_out(uset, isl_dim_param, 0, nparam);
4925
32
}
4926
4927
/* For each dependence relation on a (conditional) validity edge
4928
 * from a node to itself,
4929
 * construct the set of coefficients of valid constraints for elements
4930
 * in that dependence relation and collect the results.
4931
 * If "coincidence" is set, then coincidence edges are considered as well.
4932
 *
4933
 * In particular, for each dependence relation R, constraints
4934
 * on coefficients (c_0, c_x) are constructed such that
4935
 *
4936
 *  c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
4937
 *
4938
 * If the schedule_treat_coalescing option is set, then some constraints
4939
 * that could be exploited to construct coalescing schedules
4940
 * are removed before the dual is computed, but after the parameters
4941
 * have been projected out.
4942
 * The entire computation is essentially the same as that performed
4943
 * by intra_coefficients, except that it operates on multiple
4944
 * edges together and that the parameters are always projected out.
4945
 *
4946
 * Additionally, exploit any non-trivial lineality space
4947
 * in the difference set after removing coalescing constraints and
4948
 * store the results of the non-trivial lineality space detection in "data".
4949
 * The procedure is currently run unconditionally, but it is unlikely
4950
 * to find any non-trivial lineality spaces if no coalescing constraints
4951
 * have been removed.
4952
 *
4953
 * Note that if a dependence relation is a union of basic maps,
4954
 * then each basic map needs to be treated individually as it may only
4955
 * be possible to carry the dependences expressed by some of those
4956
 * basic maps and not all of them.
4957
 * The collected validity constraints are therefore not coalesced and
4958
 * it is assumed that they are not coalesced automatically.
4959
 * Duplicate basic maps can be removed, however.
4960
 * In particular, if the same basic map appears as a disjunct
4961
 * in multiple edges, then it only needs to be carried once.
4962
 */
4963
static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx,
4964
  struct isl_sched_graph *graph, int coincidence,
4965
  struct isl_exploit_lineality_data *data)
4966
32
{
4967
32
  isl_union_map *intra;
4968
32
  isl_union_set *delta;
4969
32
  isl_basic_set_list *list;
4970
32
4971
32
  intra = collect_validity(graph, &add_intra, coincidence);
4972
32
  delta = isl_union_map_deltas(intra);
4973
32
  delta = union_set_drop_parameters(delta);
4974
32
  delta = isl_union_set_remove_divs(delta);
4975
32
  if (isl_options_get_schedule_treat_coalescing(ctx))
4976
32
    delta = union_drop_coalescing_constraints(ctx, graph, delta);
4977
32
  delta = exploit_intra_lineality(delta, data);
4978
32
  list = isl_union_set_get_basic_set_list(delta);
4979
32
  isl_union_set_free(delta);
4980
32
4981
32
  return isl_basic_set_list_coefficients(list);
4982
32
}
4983
4984
/* For each dependence relation on a (conditional) validity edge
4985
 * from a node to some other node,
4986
 * construct the set of coefficients of valid constraints for elements
4987
 * in that dependence relation and collect the results.
4988
 * If "coincidence" is set, then coincidence edges are considered as well.
4989
 *
4990
 * In particular, for each dependence relation R, constraints
4991
 * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
4992
 *
4993
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
4994
 *
4995
 * This computation is essentially the same as that performed
4996
 * by inter_coefficients, except that it operates on multiple
4997
 * edges together.
4998
 *
4999
 * Additionally, exploit any non-trivial lineality space
5000
 * that may have been discovered by collect_intra_validity
5001
 * (as stored in "data").
5002
 *
5003
 * Note that if a dependence relation is a union of basic maps,
5004
 * then each basic map needs to be treated individually as it may only
5005
 * be possible to carry the dependences expressed by some of those
5006
 * basic maps and not all of them.
5007
 * The collected validity constraints are therefore not coalesced and
5008
 * it is assumed that they are not coalesced automatically.
5009
 * Duplicate basic maps can be removed, however.
5010
 * In particular, if the same basic map appears as a disjunct
5011
 * in multiple edges, then it only needs to be carried once.
5012
 */
5013
static __isl_give isl_basic_set_list *collect_inter_validity(
5014
  struct isl_sched_graph *graph, int coincidence,
5015
  struct isl_exploit_lineality_data *data)
5016
32
{
5017
32
  isl_union_map *inter;
5018
32
  isl_union_set *wrap;
5019
32
  isl_basic_set_list *list;
5020
32
5021
32
  inter = collect_validity(graph, &add_inter, coincidence);
5022
32
  inter = exploit_inter_lineality(inter, data);
5023
32
  inter = isl_union_map_remove_divs(inter);
5024
32
  wrap = isl_union_map_wrap(inter);
5025
32
  list = isl_union_set_get_basic_set_list(wrap);
5026
32
  isl_union_set_free(wrap);
5027
32
  return isl_basic_set_list_coefficients(list);
5028
32
}
5029
5030
/* Construct an LP problem for finding schedule coefficients
5031
 * such that the schedule carries as many of the "n_edge" groups of
5032
 * dependences as possible based on the corresponding coefficient
5033
 * constraints and return the lexicographically smallest non-trivial solution.
5034
 * "intra" is the sequence of coefficient constraints for intra-node edges.
5035
 * "inter" is the sequence of coefficient constraints for inter-node edges.
5036
 * If "want_integral" is set, then compute an integral solution
5037
 * for the coefficients rather than using the numerators
5038
 * of a rational solution.
5039
 * "carry_inter" indicates whether inter-node edges should be carried or
5040
 * only respected.
5041
 *
5042
 * If none of the "n_edge" groups can be carried
5043
 * then return an empty vector.
5044
 */
5045
static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx,
5046
  struct isl_sched_graph *graph, int n_edge,
5047
  __isl_keep isl_basic_set_list *intra,
5048
  __isl_keep isl_basic_set_list *inter, int want_integral,
5049
  int carry_inter)
5050
32
{
5051
32
  isl_basic_set *lp;
5052
32
5053
32
  if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0)
5054
0
    return NULL;
5055
32
5056
32
  lp = isl_basic_set_copy(graph->lp);
5057
32
  return non_neg_lexmin(graph, lp, n_edge, want_integral);
5058
32
}
5059
5060
/* Construct an LP problem for finding schedule coefficients
5061
 * such that the schedule carries as many of the validity dependences
5062
 * as possible and
5063
 * return the lexicographically smallest non-trivial solution.
5064
 * If "fallback" is set, then the carrying is performed as a fallback
5065
 * for the Pluto-like scheduler.
5066
 * If "coincidence" is set, then try and carry coincidence edges as well.
5067
 *
5068
 * The variable "n_edge" stores the number of groups that should be carried.
5069
 * If none of the "n_edge" groups can be carried
5070
 * then return an empty vector.
5071
 * If, moreover, "n_edge" is zero, then the LP problem does not even
5072
 * need to be constructed.
5073
 *
5074
 * If a fallback solution is being computed, then compute an integral solution
5075
 * for the coefficients rather than using the numerators
5076
 * of a rational solution.
5077
 *
5078
 * If a fallback solution is being computed, if there are any intra-node
5079
 * dependences, and if requested by the user, then first try
5080
 * to only carry those intra-node dependences.
5081
 * If this fails to carry any dependences, then try again
5082
 * with the inter-node dependences included.
5083
 */
5084
static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
5085
  struct isl_sched_graph *graph, int fallback, int coincidence)
5086
32
{
5087
32
  int n_intra, n_inter;
5088
32
  int n_edge;
5089
32
  struct isl_carry carry = { 0 };
5090
32
  isl_vec *sol;
5091
32
5092
32
  carry.intra = collect_intra_validity(ctx, graph, coincidence,
5093
32
            &carry.lineality);
5094
32
  carry.inter = collect_inter_validity(graph, coincidence,
5095
32
            &carry.lineality);
5096
32
  if (!carry.intra || !carry.inter)
5097
0
    goto error;
5098
32
  n_intra = isl_basic_set_list_n_basic_set(carry.intra);
5099
32
  n_inter = isl_basic_set_list_n_basic_set(carry.inter);
5100
32
5101
32
  if (fallback && 
n_intra > 018
&&
5102
32
      
isl_options_get_schedule_carry_self_first(ctx)14
) {
5103
14
    sol = compute_carrying_sol_coef(ctx, graph, n_intra,
5104
14
        carry.intra, carry.inter, fallback, 0);
5105
14
    if (!sol || sol->size != 0 || 
n_inter == 00
) {
5106
14
      isl_carry_clear(&carry);
5107
14
      return sol;
5108
14
    }
5109
0
    isl_vec_free(sol);
5110
0
  }
5111
32
5112
32
  n_edge = n_intra + n_inter;
5113
18
  if (n_edge == 0) {
5114
0
    isl_carry_clear(&carry);
5115
0
    return isl_vec_alloc(ctx, 0);
5116
0
  }
5117
18
5118
18
  sol = compute_carrying_sol_coef(ctx, graph, n_edge,
5119
18
        carry.intra, carry.inter, fallback, 1);
5120
18
  isl_carry_clear(&carry);
5121
18
  return sol;
5122
0
error:
5123
0
  isl_carry_clear(&carry);
5124
0
  return NULL;
5125
18
}
5126
5127
/* Construct a schedule row for each node such that as many validity dependences
5128
 * as possible are carried and then continue with the next band.
5129
 * If "fallback" is set, then the carrying is performed as a fallback
5130
 * for the Pluto-like scheduler.
5131
 * If "coincidence" is set, then try and carry coincidence edges as well.
5132
 *
5133
 * If there are no validity dependences, then no dependence can be carried and
5134
 * the procedure is guaranteed to fail.  If there is more than one component,
5135
 * then try computing a schedule on each component separately
5136
 * to prevent or at least postpone this failure.
5137
 *
5138
 * If a schedule row is computed, then check that dependences are carried
5139
 * for at least one of the edges.
5140
 *
5141
 * If the computed schedule row turns out to be trivial on one or
5142
 * more nodes where it should not be trivial, then we throw it away
5143
 * and try again on each component separately.
5144
 *
5145
 * If there is only one component, then we accept the schedule row anyway,
5146
 * but we do not consider it as a complete row and therefore do not
5147
 * increment graph->n_row.  Note that the ranks of the nodes that
5148
 * do get a non-trivial schedule part will get updated regardless and
5149
 * graph->maxvar is computed based on these ranks.  The test for
5150
 * whether more schedule rows are required in compute_schedule_wcc
5151
 * is therefore not affected.
5152
 *
5153
 * Insert a band corresponding to the schedule row at position "node"
5154
 * of the schedule tree and continue with the construction of the schedule.
5155
 * This insertion and the continued construction is performed by split_scaled
5156
 * after optionally checking for non-trivial common divisors.
5157
 */
5158
static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node,
5159
  struct isl_sched_graph *graph, int fallback, int coincidence)
5160
32
{
5161
32
  int trivial;
5162
32
  isl_ctx *ctx;
5163
32
  isl_vec *sol;
5164
32
5165
32
  if (!node)
5166
0
    return NULL;
5167
32
5168
32
  ctx = isl_schedule_node_get_ctx(node);
5169
32
  sol = compute_carrying_sol(ctx, graph, fallback, coincidence);
5170
32
  if (!sol)
5171
0
    return isl_schedule_node_free(node);
5172
32
  if (sol->size == 0) {
5173
0
    isl_vec_free(sol);
5174
0
    if (graph->scc > 1)
5175
0
      return compute_component_schedule(node, graph, 1);
5176
0
    isl_die(ctx, isl_error_unknown, "unable to carry dependences",
5177
0
      return isl_schedule_node_free(node));
5178
0
  }
5179
32
5180
32
  trivial = is_any_trivial(graph, sol);
5181
32
  if (trivial < 0) {
5182
0
    sol = isl_vec_free(sol);
5183
32
  } else if (trivial && 
graph->scc > 14
) {
5184
2
    isl_vec_free(sol);
5185
2
    return compute_component_schedule(node, graph, 1);
5186
2
  }
5187
30
5188
30
  if (update_schedule(graph, sol, 0) < 0)
5189
0
    return isl_schedule_node_free(node);
5190
30
  if (trivial)
5191
2
    graph->n_row--;
5192
30
5193
30
  return split_scaled(node, graph);
5194
30
}
5195
5196
/* Construct a schedule row for each node such that as many validity dependences
5197
 * as possible are carried and then continue with the next band.
5198
 * Do so as a fallback for the Pluto-like scheduler.
5199
 * If "coincidence" is set, then try and carry coincidence edges as well.
5200
 */
5201
static __isl_give isl_schedule_node *carry_fallback(
5202
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5203
  int coincidence)
5204
18
{
5205
18
  return carry(node, graph, 1, coincidence);
5206
18
}
5207
5208
/* Construct a schedule row for each node such that as many validity dependences
5209
 * as possible are carried and then continue with the next band.
5210
 * Do so for the case where the Feautrier scheduler was selected
5211
 * by the user.
5212
 */
5213
static __isl_give isl_schedule_node *carry_feautrier(
5214
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5215
14
{
5216
14
  return carry(node, graph, 0, 0);
5217
14
}
5218
5219
/* Construct a schedule row for each node such that as many validity dependences
5220
 * as possible are carried and then continue with the next band.
5221
 * Do so as a fallback for the Pluto-like scheduler.
5222
 */
5223
static __isl_give isl_schedule_node *carry_dependences(
5224
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5225
9
{
5226
9
  return carry_fallback(node, graph, 0);
5227
9
}
5228
5229
/* Construct a schedule row for each node such that as many validity or
5230
 * coincidence dependences as possible are carried and
5231
 * then continue with the next band.
5232
 * Do so as a fallback for the Pluto-like scheduler.
5233
 */
5234
static __