Coverage Report

Created: 2018-06-24 14:39

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/External/isl/isl_scheduler.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2011      INRIA Saclay
3
 * Copyright 2012-2014 Ecole Normale Superieure
4
 * Copyright 2015-2016 Sven Verdoolaege
5
 * Copyright 2016      INRIA Paris
6
 * Copyright 2017      Sven Verdoolaege
7
 *
8
 * Use of this software is governed by the MIT license
9
 *
10
 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
11
 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
12
 * 91893 Orsay, France
13
 * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14
 * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
15
 * CS 42112, 75589 Paris Cedex 12, France
16
 */
17
18
#include <isl_ctx_private.h>
19
#include <isl_map_private.h>
20
#include <isl_space_private.h>
21
#include <isl_aff_private.h>
22
#include <isl/hash.h>
23
#include <isl/id.h>
24
#include <isl/constraint.h>
25
#include <isl/schedule.h>
26
#include <isl_schedule_constraints.h>
27
#include <isl/schedule_node.h>
28
#include <isl_mat_private.h>
29
#include <isl_vec_private.h>
30
#include <isl/set.h>
31
#include <isl_union_set_private.h>
32
#include <isl_seq.h>
33
#include <isl_tab.h>
34
#include <isl_dim_map.h>
35
#include <isl/map_to_basic_set.h>
36
#include <isl_sort.h>
37
#include <isl_options_private.h>
38
#include <isl_tarjan.h>
39
#include <isl_morph.h>
40
#include <isl/ilp.h>
41
#include <isl_val_private.h>
42
43
/*
44
 * The scheduling algorithm implemented in this file was inspired by
45
 * Bondhugula et al., "Automatic Transformations for Communication-Minimized
46
 * Parallelization and Locality Optimization in the Polyhedral Model".
47
 *
48
 * For a detailed description of the variant implemented in isl,
49
 * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017).
50
 */
51
52
53
/* Internal information about a node that is used during the construction
54
 * of a schedule.
55
 * space represents the original space in which the domain lives;
56
 *  that is, the space is not affected by compression
57
 * sched is a matrix representation of the schedule being constructed
58
 *  for this node; if compressed is set, then this schedule is
59
 *  defined over the compressed domain space
60
 * sched_map is an isl_map representation of the same (partial) schedule
61
 *  sched_map may be NULL; if compressed is set, then this map
62
 *  is defined over the uncompressed domain space
63
 * rank is the number of linearly independent rows in the linear part
64
 *  of sched
65
 * the rows of "vmap" represent a change of basis for the node
66
 *  variables; the first rank rows span the linear part of
67
 *  the schedule rows; the remaining rows are linearly independent
68
 * the rows of "indep" represent linear combinations of the schedule
69
 * coefficients that are non-zero when the schedule coefficients are
70
 * linearly independent of previously computed schedule rows.
71
 * start is the first variable in the LP problem in the sequences that
72
 *  represents the schedule coefficients of this node
73
 * nvar is the dimension of the (compressed) domain
74
 * nparam is the number of parameters or 0 if we are not constructing
75
 *  a parametric schedule
76
 *
77
 * If compressed is set, then hull represents the constraints
78
 * that were used to derive the compression, while compress and
79
 * decompress map the original space to the compressed space and
80
 * vice versa.
81
 *
82
 * scc is the index of SCC (or WCC) this node belongs to
83
 *
84
 * "cluster" is only used inside extract_clusters and identifies
85
 * the cluster of SCCs that the node belongs to.
86
 *
87
 * coincident contains a boolean for each of the rows of the schedule,
88
 * indicating whether the corresponding scheduling dimension satisfies
89
 * the coincidence constraints in the sense that the corresponding
90
 * dependence distances are zero.
91
 *
92
 * If the schedule_treat_coalescing option is set, then
93
 * "sizes" contains the sizes of the (compressed) instance set
94
 * in each direction.  If there is no fixed size in a given direction,
95
 * then the corresponding size value is set to infinity.
96
 * If the schedule_treat_coalescing option or the schedule_max_coefficient
97
 * option is set, then "max" contains the maximal values for
98
 * schedule coefficients of the (compressed) variables.  If no bound
99
 * needs to be imposed on a particular variable, then the corresponding
100
 * value is negative.
101
 * If not NULL, then "bounds" contains a non-parametric set
102
 * in the compressed space that is bounded by the size in each direction.
103
 */
104
struct isl_sched_node {
105
  isl_space *space;
106
  int compressed;
107
  isl_set *hull;
108
  isl_multi_aff *compress;
109
  isl_multi_aff *decompress;
110
  isl_mat *sched;
111
  isl_map *sched_map;
112
  int  rank;
113
  isl_mat *indep;
114
  isl_mat *vmap;
115
  int  start;
116
  int  nvar;
117
  int  nparam;
118
119
  int  scc;
120
  int  cluster;
121
122
  int *coincident;
123
124
  isl_multi_val *sizes;
125
  isl_basic_set *bounds;
126
  isl_vec *max;
127
};
128
129
static int node_has_tuples(const void *entry, const void *val)
130
1.73k
{
131
1.73k
  struct isl_sched_node *node = (struct isl_sched_node *)entry;
132
1.73k
  isl_space *space = (isl_space *) val;
133
1.73k
134
1.73k
  return isl_space_has_equal_tuples(node->space, space);
135
1.73k
}
136
137
static int node_scc_exactly(struct isl_sched_node *node, int scc)
138
1.10k
{
139
1.10k
  return node->scc == scc;
140
1.10k
}
141
142
static int node_scc_at_most(struct isl_sched_node *node, int scc)
143
18
{
144
18
  return node->scc <= scc;
145
18
}
146
147
static int node_scc_at_least(struct isl_sched_node *node, int scc)
148
18
{
149
18
  return node->scc >= scc;
150
18
}
151
152
/* An edge in the dependence graph.  An edge may be used to
153
 * ensure validity of the generated schedule, to minimize the dependence
154
 * distance or both
155
 *
156
 * map is the dependence relation, with i -> j in the map if j depends on i
157
 * tagged_condition and tagged_validity contain the union of all tagged
158
 *  condition or conditional validity dependence relations that
159
 *  specialize the dependence relation "map"; that is,
160
 *  if (i -> a) -> (j -> b) is an element of "tagged_condition"
161
 *  or "tagged_validity", then i -> j is an element of "map".
162
 *  If these fields are NULL, then they represent the empty relation.
163
 * src is the source node
164
 * dst is the sink node
165
 *
166
 * types is a bit vector containing the types of this edge.
167
 * validity is set if the edge is used to ensure correctness
168
 * coincidence is used to enforce zero dependence distances
169
 * proximity is set if the edge is used to minimize dependence distances
170
 * condition is set if the edge represents a condition
171
 *  for a conditional validity schedule constraint
172
 * local can only be set for condition edges and indicates that
173
 *  the dependence distance over the edge should be zero
174
 * conditional_validity is set if the edge is used to conditionally
175
 *  ensure correctness
176
 *
177
 * For validity edges, start and end mark the sequence of inequality
178
 * constraints in the LP problem that encode the validity constraint
179
 * corresponding to this edge.
180
 *
181
 * During clustering, an edge may be marked "no_merge" if it should
182
 * not be used to merge clusters.
183
 * The weight is also only used during clustering and it is
184
 * an indication of how many schedule dimensions on either side
185
 * of the schedule constraints can be aligned.
186
 * If the weight is negative, then this means that this edge was postponed
187
 * by has_bounded_distances or any_no_merge.  The original weight can
188
 * be retrieved by adding 1 + graph->max_weight, with "graph"
189
 * the graph containing this edge.
190
 */
191
struct isl_sched_edge {
192
  isl_map *map;
193
  isl_union_map *tagged_condition;
194
  isl_union_map *tagged_validity;
195
196
  struct isl_sched_node *src;
197
  struct isl_sched_node *dst;
198
199
  unsigned types;
200
201
  int start;
202
  int end;
203
204
  int no_merge;
205
  int weight;
206
};
207
208
/* Is "edge" marked as being of type "type"?
209
 */
210
static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
211
12.8k
{
212
12.8k
  return ISL_FL_ISSET(edge->types, 1 << type);
213
12.8k
}
214
215
/* Mark "edge" as being of type "type".
216
 */
217
static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
218
680
{
219
680
  ISL_FL_SET(edge->types, 1 << type);
220
680
}
221
222
/* No longer mark "edge" as being of type "type"?
223
 */
224
static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
225
44
{
226
44
  ISL_FL_CLR(edge->types, 1 << type);
227
44
}
228
229
/* Is "edge" marked as a validity edge?
230
 */
231
static int is_validity(struct isl_sched_edge *edge)
232
3.43k
{
233
3.43k
  return is_type(edge, isl_edge_validity);
234
3.43k
}
235
236
/* Mark "edge" as a validity edge.
237
 */
238
static void set_validity(struct isl_sched_edge *edge)
239
2
{
240
2
  set_type(edge, isl_edge_validity);
241
2
}
242
243
/* Is "edge" marked as a proximity edge?
244
 */
245
static int is_proximity(struct isl_sched_edge *edge)
246
1.79k
{
247
1.79k
  return is_type(edge, isl_edge_proximity);
248
1.79k
}
249
250
/* Is "edge" marked as a local edge?
251
 */
252
static int is_local(struct isl_sched_edge *edge)
253
2.23k
{
254
2.23k
  return is_type(edge, isl_edge_local);
255
2.23k
}
256
257
/* Mark "edge" as a local edge.
258
 */
259
static void set_local(struct isl_sched_edge *edge)
260
15
{
261
15
  set_type(edge, isl_edge_local);
262
15
}
263
264
/* No longer mark "edge" as a local edge.
265
 */
266
static void clear_local(struct isl_sched_edge *edge)
267
44
{
268
44
  clear_type(edge, isl_edge_local);
269
44
}
270
271
/* Is "edge" marked as a coincidence edge?
272
 */
273
static int is_coincidence(struct isl_sched_edge *edge)
274
1.03k
{
275
1.03k
  return is_type(edge, isl_edge_coincidence);
276
1.03k
}
277
278
/* Is "edge" marked as a condition edge?
279
 */
280
static int is_condition(struct isl_sched_edge *edge)
281
2.02k
{
282
2.02k
  return is_type(edge, isl_edge_condition);
283
2.02k
}
284
285
/* Is "edge" marked as a conditional validity edge?
286
 */
287
static int is_conditional_validity(struct isl_sched_edge *edge)
288
1.48k
{
289
1.48k
  return is_type(edge, isl_edge_conditional_validity);
290
1.48k
}
291
292
/* Is "edge" of a type that can appear multiple times between
293
 * the same pair of nodes?
294
 *
295
 * Condition edges and conditional validity edges may have tagged
296
 * dependence relations, in which case an edge is added for each
297
 * pair of tags.
298
 */
299
static int is_multi_edge_type(struct isl_sched_edge *edge)
300
112
{
301
112
  return is_condition(edge) || 
is_conditional_validity(edge)89
;
302
112
}
303
304
/* Internal information about the dependence graph used during
305
 * the construction of the schedule.
306
 *
307
 * intra_hmap is a cache, mapping dependence relations to their dual,
308
 *  for dependences from a node to itself, possibly without
309
 *  coefficients for the parameters
310
 * intra_hmap_param is a cache, mapping dependence relations to their dual,
311
 *  for dependences from a node to itself, including coefficients
312
 *  for the parameters
313
 * inter_hmap is a cache, mapping dependence relations to their dual,
314
 *  for dependences between distinct nodes
315
 * if compression is involved then the key for these maps
316
 * is the original, uncompressed dependence relation, while
317
 * the value is the dual of the compressed dependence relation.
318
 *
319
 * n is the number of nodes
320
 * node is the list of nodes
321
 * maxvar is the maximal number of variables over all nodes
322
 * max_row is the allocated number of rows in the schedule
323
 * n_row is the current (maximal) number of linearly independent
324
 *  rows in the node schedules
325
 * n_total_row is the current number of rows in the node schedules
326
 * band_start is the starting row in the node schedules of the current band
327
 * root is set to the original dependence graph from which this graph
328
 *  is derived through splitting.  If this graph is not the result of
329
 *  splitting, then the root field points to the graph itself.
330
 *
331
 * sorted contains a list of node indices sorted according to the
332
 *  SCC to which a node belongs
333
 *
334
 * n_edge is the number of edges
335
 * edge is the list of edges
336
 * max_edge contains the maximal number of edges of each type;
337
 *  in particular, it contains the number of edges in the inital graph.
338
 * edge_table contains pointers into the edge array, hashed on the source
339
 *  and sink spaces; there is one such table for each type;
340
 *  a given edge may be referenced from more than one table
341
 *  if the corresponding relation appears in more than one of the
342
 *  sets of dependences; however, for each type there is only
343
 *  a single edge between a given pair of source and sink space
344
 *  in the entire graph
345
 *
346
 * node_table contains pointers into the node array, hashed on the space tuples
347
 *
348
 * region contains a list of variable sequences that should be non-trivial
349
 *
350
 * lp contains the (I)LP problem used to obtain new schedule rows
351
 *
352
 * src_scc and dst_scc are the source and sink SCCs of an edge with
353
 *  conflicting constraints
354
 *
355
 * scc represents the number of components
356
 * weak is set if the components are weakly connected
357
 *
358
 * max_weight is used during clustering and represents the maximal
359
 * weight of the relevant proximity edges.
360
 */
361
struct isl_sched_graph {
362
  isl_map_to_basic_set *intra_hmap;
363
  isl_map_to_basic_set *intra_hmap_param;
364
  isl_map_to_basic_set *inter_hmap;
365
366
  struct isl_sched_node *node;
367
  int n;
368
  int maxvar;
369
  int max_row;
370
  int n_row;
371
372
  int *sorted;
373
374
  int n_total_row;
375
  int band_start;
376
377
  struct isl_sched_graph *root;
378
379
  struct isl_sched_edge *edge;
380
  int n_edge;
381
  int max_edge[isl_edge_last + 1];
382
  struct isl_hash_table *edge_table[isl_edge_last + 1];
383
384
  struct isl_hash_table *node_table;
385
  struct isl_trivial_region *region;
386
387
  isl_basic_set *lp;
388
389
  int src_scc;
390
  int dst_scc;
391
392
  int scc;
393
  int weak;
394
395
  int max_weight;
396
};
397
398
/* Initialize node_table based on the list of nodes.
399
 */
400
static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
401
349
{
402
349
  int i;
403
349
404
349
  graph->node_table = isl_hash_table_alloc(ctx, graph->n);
405
349
  if (!graph->node_table)
406
0
    return -1;
407
349
408
856
  
for (i = 0; 349
i < graph->n;
++i507
) {
409
507
    struct isl_hash_table_entry *entry;
410
507
    uint32_t hash;
411
507
412
507
    hash = isl_space_get_tuple_hash(graph->node[i].space);
413
507
    entry = isl_hash_table_find(ctx, graph->node_table, hash,
414
507
              &node_has_tuples,
415
507
              graph->node[i].space, 1);
416
507
    if (!entry)
417
0
      return -1;
418
507
    entry->data = &graph->node[i];
419
507
  }
420
349
421
349
  return 0;
422
349
}
423
424
/* Return a pointer to the node that lives within the given space,
425
 * an invalid node if there is no such node, or NULL in case of error.
426
 */
427
static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
428
  struct isl_sched_graph *graph, __isl_keep isl_space *space)
429
1.75k
{
430
1.75k
  struct isl_hash_table_entry *entry;
431
1.75k
  uint32_t hash;
432
1.75k
433
1.75k
  if (!space)
434
0
    return NULL;
435
1.75k
436
1.75k
  hash = isl_space_get_tuple_hash(space);
437
1.75k
  entry = isl_hash_table_find(ctx, graph->node_table, hash,
438
1.75k
            &node_has_tuples, space, 0);
439
1.75k
440
1.75k
  return entry ? 
entry->data1.73k
:
graph->node + graph->n24
;
441
1.75k
}
442
443
/* Is "node" a node in "graph"?
444
 */
445
static int is_node(struct isl_sched_graph *graph,
446
  struct isl_sched_node *node)
447
1.75k
{
448
1.75k
  return node && node >= &graph->node[0] && node < &graph->node[graph->n];
449
1.75k
}
450
451
static int edge_has_src_and_dst(const void *entry, const void *val)
452
1.21k
{
453
1.21k
  const struct isl_sched_edge *edge = entry;
454
1.21k
  const struct isl_sched_edge *temp = val;
455
1.21k
456
1.21k
  return edge->src == temp->src && edge->dst == temp->dst;
457
1.21k
}
458
459
/* Add the given edge to graph->edge_table[type].
460
 */
461
static isl_stat graph_edge_table_add(isl_ctx *ctx,
462
  struct isl_sched_graph *graph, enum isl_edge_type type,
463
  struct isl_sched_edge *edge)
464
947
{
465
947
  struct isl_hash_table_entry *entry;
466
947
  uint32_t hash;
467
947
468
947
  hash = isl_hash_init();
469
947
  hash = isl_hash_builtin(hash, edge->src);
470
947
  hash = isl_hash_builtin(hash, edge->dst);
471
947
  entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
472
947
            &edge_has_src_and_dst, edge, 1);
473
947
  if (!entry)
474
0
    return isl_stat_error;
475
947
  entry->data = edge;
476
947
477
947
  return isl_stat_ok;
478
947
}
479
480
/* Add "edge" to all relevant edge tables.
481
 * That is, for every type of the edge, add it to the corresponding table.
482
 */
483
static isl_stat graph_edge_tables_add(isl_ctx *ctx,
484
  struct isl_sched_graph *graph, struct isl_sched_edge *edge)
485
114
{
486
114
  enum isl_edge_type t;
487
114
488
684
  for (t = isl_edge_first; t <= isl_edge_last; 
++t570
) {
489
570
    if (!is_type(edge, t))
490
286
      continue;
491
284
    if (graph_edge_table_add(ctx, graph, t, edge) < 0)
492
0
      return isl_stat_error;
493
284
  }
494
114
495
114
  return isl_stat_ok;
496
114
}
497
498
/* Allocate the edge_tables based on the maximal number of edges of
499
 * each type.
500
 */
501
static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
502
349
{
503
349
  int i;
504
349
505
2.09k
  for (i = 0; i <= isl_edge_last; 
++i1.74k
) {
506
1.74k
    graph->edge_table[i] = isl_hash_table_alloc(ctx,
507
1.74k
                  graph->max_edge[i]);
508
1.74k
    if (!graph->edge_table[i])
509
0
      return -1;
510
1.74k
  }
511
349
512
349
  return 0;
513
349
}
514
515
/* If graph->edge_table[type] contains an edge from the given source
516
 * to the given destination, then return the hash table entry of this edge.
517
 * Otherwise, return NULL.
518
 */
519
static struct isl_hash_table_entry *graph_find_edge_entry(
520
  struct isl_sched_graph *graph,
521
  enum isl_edge_type type,
522
  struct isl_sched_node *src, struct isl_sched_node *dst)
523
4.87k
{
524
4.87k
  isl_ctx *ctx = isl_space_get_ctx(src->space);
525
4.87k
  uint32_t hash;
526
4.87k
  struct isl_sched_edge temp = { .src = src, .dst = dst };
527
4.87k
528
4.87k
  hash = isl_hash_init();
529
4.87k
  hash = isl_hash_builtin(hash, temp.src);
530
4.87k
  hash = isl_hash_builtin(hash, temp.dst);
531
4.87k
  return isl_hash_table_find(ctx, graph->edge_table[type], hash,
532
4.87k
            &edge_has_src_and_dst, &temp, 0);
533
4.87k
}
534
535
536
/* If graph->edge_table[type] contains an edge from the given source
537
 * to the given destination, then return this edge.
538
 * Otherwise, return NULL.
539
 */
540
static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
541
  enum isl_edge_type type,
542
  struct isl_sched_node *src, struct isl_sched_node *dst)
543
4.02k
{
544
4.02k
  struct isl_hash_table_entry *entry;
545
4.02k
546
4.02k
  entry = graph_find_edge_entry(graph, type, src, dst);
547
4.02k
  if (!entry)
548
3.21k
    return NULL;
549
806
550
806
  return entry->data;
551
806
}
552
553
/* Check whether the dependence graph has an edge of the given type
554
 * between the given two nodes.
555
 */
556
static isl_bool graph_has_edge(struct isl_sched_graph *graph,
557
  enum isl_edge_type type,
558
  struct isl_sched_node *src, struct isl_sched_node *dst)
559
2.28k
{
560
2.28k
  struct isl_sched_edge *edge;
561
2.28k
  isl_bool empty;
562
2.28k
563
2.28k
  edge = graph_find_edge(graph, type, src, dst);
564
2.28k
  if (!edge)
565
1.89k
    return isl_bool_false;
566
393
567
393
  empty = isl_map_plain_is_empty(edge->map);
568
393
  if (empty < 0)
569
0
    return isl_bool_error;
570
393
571
393
  return !empty;
572
393
}
573
574
/* Look for any edge with the same src, dst and map fields as "model".
575
 *
576
 * Return the matching edge if one can be found.
577
 * Return "model" if no matching edge is found.
578
 * Return NULL on error.
579
 */
580
static struct isl_sched_edge *graph_find_matching_edge(
581
  struct isl_sched_graph *graph, struct isl_sched_edge *model)
582
663
{
583
663
  enum isl_edge_type i;
584
663
  struct isl_sched_edge *edge;
585
663
586
2.00k
  for (i = isl_edge_first; i <= isl_edge_last; 
++i1.34k
) {
587
1.73k
    int is_equal;
588
1.73k
589
1.73k
    edge = graph_find_edge(graph, i, model->src, model->dst);
590
1.73k
    if (!edge)
591
1.32k
      continue;
592
413
    is_equal = isl_map_plain_is_equal(model->map, edge->map);
593
413
    if (is_equal < 0)
594
0
      return NULL;
595
413
    if (is_equal)
596
395
      return edge;
597
413
  }
598
663
599
663
  
return model268
;
600
663
}
601
602
/* Remove the given edge from all the edge_tables that refer to it.
603
 */
604
static void graph_remove_edge(struct isl_sched_graph *graph,
605
  struct isl_sched_edge *edge)
606
171
{
607
171
  isl_ctx *ctx = isl_map_get_ctx(edge->map);
608
171
  enum isl_edge_type i;
609
171
610
1.02k
  for (i = isl_edge_first; i <= isl_edge_last; 
++i855
) {
611
855
    struct isl_hash_table_entry *entry;
612
855
613
855
    entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
614
855
    if (!entry)
615
513
      continue;
616
342
    if (entry->data != edge)
617
9
      continue;
618
333
    isl_hash_table_remove(ctx, graph->edge_table[i], entry);
619
333
  }
620
171
}
621
622
/* Check whether the dependence graph has any edge
623
 * between the given two nodes.
624
 */
625
static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
626
  struct isl_sched_node *src, struct isl_sched_node *dst)
627
497
{
628
497
  enum isl_edge_type i;
629
497
  isl_bool r;
630
497
631
1.86k
  for (i = isl_edge_first; i <= isl_edge_last; 
++i1.37k
) {
632
1.62k
    r = graph_has_edge(graph, i, src, dst);
633
1.62k
    if (r < 0 || r)
634
248
      return r;
635
1.62k
  }
636
497
637
497
  
return r249
;
638
497
}
639
640
/* Check whether the dependence graph has a validity edge
641
 * between the given two nodes.
642
 *
643
 * Conditional validity edges are essentially validity edges that
644
 * can be ignored if the corresponding condition edges are iteration private.
645
 * Here, we are only checking for the presence of validity
646
 * edges, so we need to consider the conditional validity edges too.
647
 * In particular, this function is used during the detection
648
 * of strongly connected components and we cannot ignore
649
 * conditional validity edges during this detection.
650
 */
651
static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
652
  struct isl_sched_node *src, struct isl_sched_node *dst)
653
397
{
654
397
  isl_bool r;
655
397
656
397
  r = graph_has_edge(graph, isl_edge_validity, src, dst);
657
397
  if (r < 0 || r)
658
126
    return r;
659
271
660
271
  return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
661
271
}
662
663
/* Perform all the required memory allocations for a schedule graph "graph"
664
 * with "n_node" nodes and "n_edge" edge and initialize the corresponding
665
 * fields.
666
 */
667
static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
668
  int n_node, int n_edge)
669
349
{
670
349
  int i;
671
349
672
349
  graph->n = n_node;
673
349
  graph->n_edge = n_edge;
674
349
  graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
675
349
  graph->sorted = isl_calloc_array(ctx, int, graph->n);
676
349
  graph->region = isl_alloc_array(ctx,
677
349
          struct isl_trivial_region, graph->n);
678
349
  graph->edge = isl_calloc_array(ctx,
679
349
          struct isl_sched_edge, graph->n_edge);
680
349
681
349
  graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
682
349
  graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
683
349
  graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
684
349
685
349
  if (!graph->node || !graph->region || (graph->n_edge && 
!graph->edge200
) ||
686
349
      !graph->sorted)
687
0
    return isl_stat_error;
688
349
689
858
  
for(i = 0; 349
i < graph->n;
++i509
)
690
509
    graph->sorted[i] = i;
691
349
692
349
  return isl_stat_ok;
693
349
}
694
695
/* Free the memory associated to node "node" in "graph".
696
 * The "coincident" field is shared by nodes in a graph and its subgraph.
697
 * It therefore only needs to be freed for the original dependence graph,
698
 * i.e., one that is not the result of splitting.
699
 */
700
static void clear_node(struct isl_sched_graph *graph,
701
  struct isl_sched_node *node)
702
507
{
703
507
  isl_space_free(node->space);
704
507
  isl_set_free(node->hull);
705
507
  isl_multi_aff_free(node->compress);
706
507
  isl_multi_aff_free(node->decompress);
707
507
  isl_mat_free(node->sched);
708
507
  isl_map_free(node->sched_map);
709
507
  isl_mat_free(node->indep);
710
507
  isl_mat_free(node->vmap);
711
507
  if (graph->root == graph)
712
283
    free(node->coincident);
713
507
  isl_multi_val_free(node->sizes);
714
507
  isl_basic_set_free(node->bounds);
715
507
  isl_vec_free(node->max);
716
507
}
717
718
static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
719
367
{
720
367
  int i;
721
367
722
367
  isl_map_to_basic_set_free(graph->intra_hmap);
723
367
  isl_map_to_basic_set_free(graph->intra_hmap_param);
724
367
  isl_map_to_basic_set_free(graph->inter_hmap);
725
367
726
367
  if (graph->node)
727
856
    
for (i = 0; 349
i < graph->n;
++i507
)
728
507
      clear_node(graph, &graph->node[i]);
729
367
  free(graph->node);
730
367
  free(graph->sorted);
731
367
  if (graph->edge)
732
698
    
for (i = 0; 349
i < graph->n_edge;
++i349
) {
733
349
      isl_map_free(graph->edge[i].map);
734
349
      isl_union_map_free(graph->edge[i].tagged_condition);
735
349
      isl_union_map_free(graph->edge[i].tagged_validity);
736
349
    }
737
367
  free(graph->edge);
738
367
  free(graph->region);
739
2.20k
  for (i = 0; i <= isl_edge_last; 
++i1.83k
)
740
1.83k
    isl_hash_table_free(ctx, graph->edge_table[i]);
741
367
  isl_hash_table_free(ctx, graph->node_table);
742
367
  isl_basic_set_free(graph->lp);
743
367
}
744
745
/* For each "set" on which this function is called, increment
746
 * graph->n by one and update graph->maxvar.
747
 */
748
static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
749
285
{
750
285
  struct isl_sched_graph *graph = user;
751
285
  int nvar = isl_set_dim(set, isl_dim_set);
752
285
753
285
  graph->n++;
754
285
  if (nvar > graph->maxvar)
755
159
    graph->maxvar = nvar;
756
285
757
285
  isl_set_free(set);
758
285
759
285
  return isl_stat_ok;
760
285
}
761
762
/* Compute the number of rows that should be allocated for the schedule.
763
 * In particular, we need one row for each variable or one row
764
 * for each basic map in the dependences.
765
 * Note that it is practically impossible to exhaust both
766
 * the number of dependences and the number of variables.
767
 */
768
static isl_stat compute_max_row(struct isl_sched_graph *graph,
769
  __isl_keep isl_schedule_constraints *sc)
770
155
{
771
155
  int n_edge;
772
155
  isl_stat r;
773
155
  isl_union_set *domain;
774
155
775
155
  graph->n = 0;
776
155
  graph->maxvar = 0;
777
155
  domain = isl_schedule_constraints_get_domain(sc);
778
155
  r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
779
155
  isl_union_set_free(domain);
780
155
  if (r < 0)
781
0
    return isl_stat_error;
782
155
  n_edge = isl_schedule_constraints_n_basic_map(sc);
783
155
  if (n_edge < 0)
784
0
    return isl_stat_error;
785
155
  graph->max_row = n_edge + graph->maxvar;
786
155
787
155
  return isl_stat_ok;
788
155
}
789
790
/* Does "bset" have any defining equalities for its set variables?
791
 */
792
static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
793
283
{
794
283
  int i, n;
795
283
796
283
  if (!bset)
797
0
    return isl_bool_error;
798
283
799
283
  n = isl_basic_set_dim(bset, isl_dim_set);
800
664
  for (i = 0; i < n; 
++i381
) {
801
389
    isl_bool has;
802
389
803
389
    has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
804
389
              NULL);
805
389
    if (has < 0 || has)
806
8
      return has;
807
389
  }
808
283
809
283
  
return isl_bool_false275
;
810
283
}
811
812
/* Set the entries of node->max to the value of the schedule_max_coefficient
813
 * option, if set.
814
 */
815
static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
816
4
{
817
4
  int max;
818
4
819
4
  max = isl_options_get_schedule_max_coefficient(ctx);
820
4
  if (max == -1)
821
4
    return isl_stat_ok;
822
0
823
0
  node->max = isl_vec_alloc(ctx, node->nvar);
824
0
  node->max = isl_vec_set_si(node->max, max);
825
0
  if (!node->max)
826
0
    return isl_stat_error;
827
0
828
0
  return isl_stat_ok;
829
0
}
830
831
/* Set the entries of node->max to the minimum of the schedule_max_coefficient
832
 * option (if set) and half of the minimum of the sizes in the other
833
 * dimensions.  Round up when computing the half such that
834
 * if the minimum of the sizes is one, half of the size is taken to be one
835
 * rather than zero.
836
 * If the global minimum is unbounded (i.e., if both
837
 * the schedule_max_coefficient is not set and the sizes in the other
838
 * dimensions are unbounded), then store a negative value.
839
 * If the schedule coefficient is close to the size of the instance set
840
 * in another dimension, then the schedule may represent a loop
841
 * coalescing transformation (especially if the coefficient
842
 * in that other dimension is one).  Forcing the coefficient to be
843
 * smaller than or equal to half the minimal size should avoid this
844
 * situation.
845
 */
846
static isl_stat compute_max_coefficient(isl_ctx *ctx,
847
  struct isl_sched_node *node)
848
279
{
849
279
  int max;
850
279
  int i, j;
851
279
  isl_vec *v;
852
279
853
279
  max = isl_options_get_schedule_max_coefficient(ctx);
854
279
  v = isl_vec_alloc(ctx, node->nvar);
855
279
  if (!v)
856
0
    return isl_stat_error;
857
279
858
650
  
for (i = 0; 279
i < node->nvar;
++i371
) {
859
371
    isl_int_set_si(v->el[i], max);
860
371
    isl_int_mul_si(v->el[i], v->el[i], 2);
861
371
  }
862
279
863
650
  for (i = 0; i < node->nvar; 
++i371
) {
864
371
    isl_val *size;
865
371
866
371
    size = isl_multi_val_get_val(node->sizes, i);
867
371
    if (!size)
868
0
      goto error;
869
371
    if (!isl_val_is_int(size)) {
870
204
      isl_val_free(size);
871
204
      continue;
872
204
    }
873
533
    
for (j = 0; 167
j < node->nvar;
++j366
) {
874
366
      if (j == i)
875
167
        continue;
876
199
      if (isl_int_is_neg(v->el[j]) ||
877
199
          
isl_int_gt177
(v->el[j], size->n))
878
199
        
isl_int_set24
(v->el[j], size->n);
879
199
    }
880
167
    isl_val_free(size);
881
167
  }
882
279
883
650
  
for (i = 0; 279
i < node->nvar;
++i371
)
884
371
    isl_int_cdiv_q_ui(v->el[i], v->el[i], 2);
885
279
886
279
  node->max = v;
887
279
  return isl_stat_ok;
888
0
error:
889
0
  isl_vec_free(v);
890
0
  return isl_stat_error;
891
279
}
892
893
/* Compute and return the size of "set" in dimension "dim".
894
 * The size is taken to be the difference in values for that variable
895
 * for fixed values of the other variables.
896
 * This assumes that "set" is convex.
897
 * In particular, the variable is first isolated from the other variables
898
 * in the range of a map
899
 *
900
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
901
 *
902
 * and then duplicated
903
 *
904
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
905
 *
906
 * The shared variables are then projected out and the maximal value
907
 * of i_dim' - i_dim is computed.
908
 */
909
static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
910
371
{
911
371
  isl_map *map;
912
371
  isl_local_space *ls;
913
371
  isl_aff *obj;
914
371
  isl_val *v;
915
371
916
371
  map = isl_set_project_onto_map(set, isl_dim_set, dim, 1);
917
371
  map = isl_map_project_out(map, isl_dim_in, dim, 1);
918
371
  map = isl_map_range_product(map, isl_map_copy(map));
919
371
  map = isl_set_unwrap(isl_map_range(map));
920
371
  set = isl_map_deltas(map);
921
371
  ls = isl_local_space_from_space(isl_set_get_space(set));
922
371
  obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
923
371
  v = isl_set_max_val(set, obj);
924
371
  isl_aff_free(obj);
925
371
  isl_set_free(set);
926
371
927
371
  return v;
928
371
}
929
930
/* Compute the size of the instance set "set" of "node", after compression,
931
 * as well as bounds on the corresponding coefficients, if needed.
932
 *
933
 * The sizes are needed when the schedule_treat_coalescing option is set.
934
 * The bounds are needed when the schedule_treat_coalescing option or
935
 * the schedule_max_coefficient option is set.
936
 *
937
 * If the schedule_treat_coalescing option is not set, then at most
938
 * the bounds need to be set and this is done in set_max_coefficient.
939
 * Otherwise, compress the domain if needed, compute the size
940
 * in each direction and store the results in node->size.
941
 * If the domain is not convex, then the sizes are computed
942
 * on a convex superset in order to avoid picking up sizes
943
 * that are valid for the individual disjuncts, but not for
944
 * the domain as a whole.
945
 * Finally, set the bounds on the coefficients based on the sizes
946
 * and the schedule_max_coefficient option in compute_max_coefficient.
947
 */
948
static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
949
  __isl_take isl_set *set)
950
283
{
951
283
  int j, n;
952
283
  isl_multi_val *mv;
953
283
954
283
  if (!isl_options_get_schedule_treat_coalescing(ctx)) {
955
4
    isl_set_free(set);
956
4
    return set_max_coefficient(ctx, node);
957
4
  }
958
279
959
279
  if (node->compressed)
960
8
    set = isl_set_preimage_multi_aff(set,
961
8
          isl_multi_aff_copy(node->decompress));
962
279
  set = isl_set_from_basic_set(isl_set_simple_hull(set));
963
279
  mv = isl_multi_val_zero(isl_set_get_space(set));
964
279
  n = isl_set_dim(set, isl_dim_set);
965
650
  for (j = 0; j < n; 
++j371
) {
966
371
    isl_val *v;
967
371
968
371
    v = compute_size(isl_set_copy(set), j);
969
371
    mv = isl_multi_val_set_val(mv, j, v);
970
371
  }
971
279
  node->sizes = mv;
972
279
  isl_set_free(set);
973
279
  if (!node->sizes)
974
0
    return isl_stat_error;
975
279
  return compute_max_coefficient(ctx, node);
976
279
}
977
978
/* Add a new node to the graph representing the given instance set.
979
 * "nvar" is the (possibly compressed) number of variables and
980
 * may be smaller than then number of set variables in "set"
981
 * if "compressed" is set.
982
 * If "compressed" is set, then "hull" represents the constraints
983
 * that were used to derive the compression, while "compress" and
984
 * "decompress" map the original space to the compressed space and
985
 * vice versa.
986
 * If "compressed" is not set, then "hull", "compress" and "decompress"
987
 * should be NULL.
988
 *
989
 * Compute the size of the instance set and bounds on the coefficients,
990
 * if needed.
991
 */
992
static isl_stat add_node(struct isl_sched_graph *graph,
993
  __isl_take isl_set *set, int nvar, int compressed,
994
  __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
995
  __isl_take isl_multi_aff *decompress)
996
283
{
997
283
  int nparam;
998
283
  isl_ctx *ctx;
999
283
  isl_mat *sched;
1000
283
  isl_space *space;
1001
283
  int *coincident;
1002
283
  struct isl_sched_node *node;
1003
283
1004
283
  if (!set)
1005
0
    return isl_stat_error;
1006
283
1007
283
  ctx = isl_set_get_ctx(set);
1008
283
  nparam = isl_set_dim(set, isl_dim_param);
1009
283
  if (!ctx->opt->schedule_parametric)
1010
28
    nparam = 0;
1011
283
  sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
1012
283
  node = &graph->node[graph->n];
1013
283
  graph->n++;
1014
283
  space = isl_set_get_space(set);
1015
283
  node->space = space;
1016
283
  node->nvar = nvar;
1017
283
  node->nparam = nparam;
1018
283
  node->sched = sched;
1019
283
  node->sched_map = NULL;
1020
283
  coincident = isl_calloc_array(ctx, int, graph->max_row);
1021
283
  node->coincident = coincident;
1022
283
  node->compressed = compressed;
1023
283
  node->hull = hull;
1024
283
  node->compress = compress;
1025
283
  node->decompress = decompress;
1026
283
  if (compute_sizes_and_max(ctx, node, set) < 0)
1027
0
    return isl_stat_error;
1028
283
1029
283
  if (!space || !sched || (graph->max_row && 
!coincident281
))
1030
0
    return isl_stat_error;
1031
283
  if (compressed && 
(8
!hull8
||
!compress8
||
!decompress8
))
1032
0
    return isl_stat_error;
1033
283
1034
283
  return isl_stat_ok;
1035
283
}
1036
1037
/* Construct an identifier for node "node", which will represent "set".
1038
 * The name of the identifier is either "compressed" or
1039
 * "compressed_<name>", with <name> the name of the space of "set".
1040
 * The user pointer of the identifier points to "node".
1041
 */
1042
static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
1043
  struct isl_sched_node *node)
1044
8
{
1045
8
  isl_bool has_name;
1046
8
  isl_ctx *ctx;
1047
8
  isl_id *id;
1048
8
  isl_printer *p;
1049
8
  const char *name;
1050
8
  char *id_name;
1051
8
1052
8
  has_name = isl_set_has_tuple_name(set);
1053
8
  if (has_name < 0)
1054
0
    return NULL;
1055
8
1056
8
  ctx = isl_set_get_ctx(set);
1057
8
  if (!has_name)
1058
0
    return isl_id_alloc(ctx, "compressed", node);
1059
8
1060
8
  p = isl_printer_to_str(ctx);
1061
8
  name = isl_set_get_tuple_name(set);
1062
8
  p = isl_printer_print_str(p, "compressed_");
1063
8
  p = isl_printer_print_str(p, name);
1064
8
  id_name = isl_printer_get_str(p);
1065
8
  isl_printer_free(p);
1066
8
1067
8
  id = isl_id_alloc(ctx, id_name, node);
1068
8
  free(id_name);
1069
8
1070
8
  return id;
1071
8
}
1072
1073
/* Add a new node to the graph representing the given set.
1074
 *
1075
 * If any of the set variables is defined by an equality, then
1076
 * we perform variable compression such that we can perform
1077
 * the scheduling on the compressed domain.
1078
 * In this case, an identifier is used that references the new node
1079
 * such that each compressed space is unique and
1080
 * such that the node can be recovered from the compressed space.
1081
 */
1082
static isl_stat extract_node(__isl_take isl_set *set, void *user)
1083
283
{
1084
283
  int nvar;
1085
283
  isl_bool has_equality;
1086
283
  isl_id *id;
1087
283
  isl_basic_set *hull;
1088
283
  isl_set *hull_set;
1089
283
  isl_morph *morph;
1090
283
  isl_multi_aff *compress, *decompress;
1091
283
  struct isl_sched_graph *graph = user;
1092
283
1093
283
  hull = isl_set_affine_hull(isl_set_copy(set));
1094
283
  hull = isl_basic_set_remove_divs(hull);
1095
283
  nvar = isl_set_dim(set, isl_dim_set);
1096
283
  has_equality = has_any_defining_equality(hull);
1097
283
1098
283
  if (has_equality < 0)
1099
0
    goto error;
1100
283
  if (!has_equality) {
1101
275
    isl_basic_set_free(hull);
1102
275
    return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
1103
275
  }
1104
8
1105
8
  id = construct_compressed_id(set, &graph->node[graph->n]);
1106
8
  morph = isl_basic_set_variable_compression_with_id(hull,
1107
8
                  isl_dim_set, id);
1108
8
  isl_id_free(id);
1109
8
  nvar = isl_morph_ran_dim(morph, isl_dim_set);
1110
8
  compress = isl_morph_get_var_multi_aff(morph);
1111
8
  morph = isl_morph_inverse(morph);
1112
8
  decompress = isl_morph_get_var_multi_aff(morph);
1113
8
  isl_morph_free(morph);
1114
8
1115
8
  hull_set = isl_set_from_basic_set(hull);
1116
8
  return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
1117
0
error:
1118
0
  isl_basic_set_free(hull);
1119
0
  isl_set_free(set);
1120
0
  return isl_stat_error;
1121
8
}
1122
1123
struct isl_extract_edge_data {
1124
  enum isl_edge_type type;
1125
  struct isl_sched_graph *graph;
1126
};
1127
1128
/* Merge edge2 into edge1, freeing the contents of edge2.
1129
 * Return 0 on success and -1 on failure.
1130
 *
1131
 * edge1 and edge2 are assumed to have the same value for the map field.
1132
 */
1133
static int merge_edge(struct isl_sched_edge *edge1,
1134
  struct isl_sched_edge *edge2)
1135
395
{
1136
395
  edge1->types |= edge2->types;
1137
395
  isl_map_free(edge2->map);
1138
395
1139
395
  if (is_condition(edge2)) {
1140
23
    if (!edge1->tagged_condition)
1141
23
      edge1->tagged_condition = edge2->tagged_condition;
1142
0
    else
1143
0
      edge1->tagged_condition =
1144
0
        isl_union_map_union(edge1->tagged_condition,
1145
0
                edge2->tagged_condition);
1146
23
  }
1147
395
1148
395
  if (is_conditional_validity(edge2)) {
1149
3
    if (!edge1->tagged_validity)
1150
3
      edge1->tagged_validity = edge2->tagged_validity;
1151
0
    else
1152
0
      edge1->tagged_validity =
1153
0
        isl_union_map_union(edge1->tagged_validity,
1154
0
                edge2->tagged_validity);
1155
3
  }
1156
395
1157
395
  if (is_condition(edge2) && 
!edge1->tagged_condition23
)
1158
0
    return -1;
1159
395
  if (is_conditional_validity(edge2) && 
!edge1->tagged_validity3
)
1160
0
    return -1;
1161
395
1162
395
  return 0;
1163
395
}
1164
1165
/* Insert dummy tags in domain and range of "map".
1166
 *
1167
 * In particular, if "map" is of the form
1168
 *
1169
 *  A -> B
1170
 *
1171
 * then return
1172
 *
1173
 *  [A -> dummy_tag] -> [B -> dummy_tag]
1174
 *
1175
 * where the dummy_tags are identical and equal to any dummy tags
1176
 * introduced by any other call to this function.
1177
 */
1178
static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1179
12
{
1180
12
  static char dummy;
1181
12
  isl_ctx *ctx;
1182
12
  isl_id *id;
1183
12
  isl_space *space;
1184
12
  isl_set *domain, *range;
1185
12
1186
12
  ctx = isl_map_get_ctx(map);
1187
12
1188
12
  id = isl_id_alloc(ctx, NULL, &dummy);
1189
12
  space = isl_space_params(isl_map_get_space(map));
1190
12
  space = isl_space_set_from_params(space);
1191
12
  space = isl_space_set_tuple_id(space, isl_dim_set, id);
1192
12
  space = isl_space_map_from_set(space);
1193
12
1194
12
  domain = isl_map_wrap(map);
1195
12
  range = isl_map_wrap(isl_map_universe(space));
1196
12
  map = isl_map_from_domain_and_range(domain, range);
1197
12
  map = isl_map_zip(map);
1198
12
1199
12
  return map;
1200
12
}
1201
1202
/* Given that at least one of "src" or "dst" is compressed, return
1203
 * a map between the spaces of these nodes restricted to the affine
1204
 * hull that was used in the compression.
1205
 */
1206
static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1207
  struct isl_sched_node *dst)
1208
22
{
1209
22
  isl_set *dom, *ran;
1210
22
1211
22
  if (src->compressed)
1212
14
    dom = isl_set_copy(src->hull);
1213
8
  else
1214
8
    dom = isl_set_universe(isl_space_copy(src->space));
1215
22
  if (dst->compressed)
1216
14
    ran = isl_set_copy(dst->hull);
1217
8
  else
1218
8
    ran = isl_set_universe(isl_space_copy(dst->space));
1219
22
1220
22
  return isl_map_from_domain_and_range(dom, ran);
1221
22
}
1222
1223
/* Intersect the domains of the nested relations in domain and range
1224
 * of "tagged" with "map".
1225
 */
1226
static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1227
  __isl_keep isl_map *map)
1228
14
{
1229
14
  isl_set *set;
1230
14
1231
14
  tagged = isl_map_zip(tagged);
1232
14
  set = isl_map_wrap(isl_map_copy(map));
1233
14
  tagged = isl_map_intersect_domain(tagged, set);
1234
14
  tagged = isl_map_zip(tagged);
1235
14
  return tagged;
1236
14
}
1237
1238
/* Return a pointer to the node that lives in the domain space of "map",
1239
 * an invalid node if there is no such node, or NULL in case of error.
1240
 */
1241
static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1242
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1243
687
{
1244
687
  struct isl_sched_node *node;
1245
687
  isl_space *space;
1246
687
1247
687
  space = isl_space_domain(isl_map_get_space(map));
1248
687
  node = graph_find_node(ctx, graph, space);
1249
687
  isl_space_free(space);
1250
687
1251
687
  return node;
1252
687
}
1253
1254
/* Return a pointer to the node that lives in the range space of "map",
1255
 * an invalid node if there is no such node, or NULL in case of error.
1256
 */
1257
static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1258
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1259
687
{
1260
687
  struct isl_sched_node *node;
1261
687
  isl_space *space;
1262
687
1263
687
  space = isl_space_range(isl_map_get_space(map));
1264
687
  node = graph_find_node(ctx, graph, space);
1265
687
  isl_space_free(space);
1266
687
1267
687
  return node;
1268
687
}
1269
1270
/* Refrain from adding a new edge based on "map".
1271
 * Instead, just free the map.
1272
 * "tagged" is either a copy of "map" with additional tags or NULL.
1273
 */
1274
static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged)
1275
24
{
1276
24
  isl_map_free(map);
1277
24
  isl_map_free(tagged);
1278
24
1279
24
  return isl_stat_ok;
1280
24
}
1281
1282
/* Add a new edge to the graph based on the given map
1283
 * and add it to data->graph->edge_table[data->type].
1284
 * If a dependence relation of a given type happens to be identical
1285
 * to one of the dependence relations of a type that was added before,
1286
 * then we don't create a new edge, but instead mark the original edge
1287
 * as also representing a dependence of the current type.
1288
 *
1289
 * Edges of type isl_edge_condition or isl_edge_conditional_validity
1290
 * may be specified as "tagged" dependence relations.  That is, "map"
1291
 * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1292
 * the dependence on iterations and a and b are tags.
1293
 * edge->map is set to the relation containing the elements i -> j,
1294
 * while edge->tagged_condition and edge->tagged_validity contain
1295
 * the union of all the "map" relations
1296
 * for which extract_edge is called that result in the same edge->map.
1297
 *
1298
 * If the source or the destination node is compressed, then
1299
 * intersect both "map" and "tagged" with the constraints that
1300
 * were used to construct the compression.
1301
 * This ensures that there are no schedule constraints defined
1302
 * outside of these domains, while the scheduler no longer has
1303
 * any control over those outside parts.
1304
 */
1305
static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1306
687
{
1307
687
  isl_bool empty;
1308
687
  isl_ctx *ctx = isl_map_get_ctx(map);
1309
687
  struct isl_extract_edge_data *data = user;
1310
687
  struct isl_sched_graph *graph = data->graph;
1311
687
  struct isl_sched_node *src, *dst;
1312
687
  struct isl_sched_edge *edge;
1313
687
  isl_map *tagged = NULL;
1314
687
1315
687
  if (data->type == isl_edge_condition ||
1316
687
      
data->type == isl_edge_conditional_validity658
) {
1317
61
    if (isl_map_can_zip(map)) {
1318
49
      tagged = isl_map_copy(map);
1319
49
      map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1320
49
    } else {
1321
12
      tagged = insert_dummy_tags(isl_map_copy(map));
1322
12
    }
1323
61
  }
1324
687
1325
687
  src = find_domain_node(ctx, graph, map);
1326
687
  dst = find_range_node(ctx, graph, map);
1327
687
1328
687
  if (!src || !dst)
1329
0
    goto error;
1330
687
  if (!is_node(graph, src) || !is_node(graph, dst))
1331
24
    return skip_edge(map, tagged);
1332
663
1333
663
  if (src->compressed || 
dst->compressed649
) {
1334
22
    isl_map *hull;
1335
22
    hull = extract_hull(src, dst);
1336
22
    if (tagged)
1337
14
      tagged = map_intersect_domains(tagged, hull);
1338
22
    map = isl_map_intersect(map, hull);
1339
22
  }
1340
663
1341
663
  empty = isl_map_plain_is_empty(map);
1342
663
  if (empty < 0)
1343
0
    goto error;
1344
663
  if (empty)
1345
0
    return skip_edge(map, tagged);
1346
663
1347
663
  graph->edge[graph->n_edge].src = src;
1348
663
  graph->edge[graph->n_edge].dst = dst;
1349
663
  graph->edge[graph->n_edge].map = map;
1350
663
  graph->edge[graph->n_edge].types = 0;
1351
663
  graph->edge[graph->n_edge].tagged_condition = NULL;
1352
663
  graph->edge[graph->n_edge].tagged_validity = NULL;
1353
663
  set_type(&graph->edge[graph->n_edge], data->type);
1354
663
  if (data->type == isl_edge_condition)
1355
29
    graph->edge[graph->n_edge].tagged_condition =
1356
29
          isl_union_map_from_map(tagged);
1357
663
  if (data->type == isl_edge_conditional_validity)
1358
32
    graph->edge[graph->n_edge].tagged_validity =
1359
32
          isl_union_map_from_map(tagged);
1360
663
1361
663
  edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1362
663
  if (!edge) {
1363
0
    graph->n_edge++;
1364
0
    return isl_stat_error;
1365
0
  }
1366
663
  if (edge == &graph->edge[graph->n_edge])
1367
268
    return graph_edge_table_add(ctx, graph, data->type,
1368
268
            &graph->edge[graph->n_edge++]);
1369
395
1370
395
  if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0)
1371
0
    return isl_stat_error;
1372
395
1373
395
  return graph_edge_table_add(ctx, graph, data->type, edge);
1374
0
error:
1375
0
  isl_map_free(map);
1376
0
  isl_map_free(tagged);
1377
0
  return isl_stat_error;
1378
395
}
1379
1380
/* Initialize the schedule graph "graph" from the schedule constraints "sc".
1381
 *
1382
 * The context is included in the domain before the nodes of
1383
 * the graphs are extracted in order to be able to exploit
1384
 * any possible additional equalities.
1385
 * Note that this intersection is only performed locally here.
1386
 */
1387
static isl_stat graph_init(struct isl_sched_graph *graph,
1388
  __isl_keep isl_schedule_constraints *sc)
1389
155
{
1390
155
  isl_ctx *ctx;
1391
155
  isl_union_set *domain;
1392
155
  isl_union_map *c;
1393
155
  struct isl_extract_edge_data data;
1394
155
  enum isl_edge_type i;
1395
155
  isl_stat r;
1396
155
1397
155
  if (!sc)
1398
0
    return isl_stat_error;
1399
155
1400
155
  ctx = isl_schedule_constraints_get_ctx(sc);
1401
155
1402
155
  domain = isl_schedule_constraints_get_domain(sc);
1403
155
  graph->n = isl_union_set_n_set(domain);
1404
155
  isl_union_set_free(domain);
1405
155
1406
155
  if (graph_alloc(ctx, graph, graph->n,
1407
155
      isl_schedule_constraints_n_map(sc)) < 0)
1408
0
    return isl_stat_error;
1409
155
1410
155
  if (compute_max_row(graph, sc) < 0)
1411
0
    return isl_stat_error;
1412
155
  graph->root = graph;
1413
155
  graph->n = 0;
1414
155
  domain = isl_schedule_constraints_get_domain(sc);
1415
155
  domain = isl_union_set_intersect_params(domain,
1416
155
            isl_schedule_constraints_get_context(sc));
1417
155
  r = isl_union_set_foreach_set(domain, &extract_node, graph);
1418
155
  isl_union_set_free(domain);
1419
155
  if (r < 0)
1420
0
    return isl_stat_error;
1421
155
  if (graph_init_table(ctx, graph) < 0)
1422
0
    return isl_stat_error;
1423
930
  
for (i = isl_edge_first; 155
i <= isl_edge_last;
++i775
) {
1424
775
    c = isl_schedule_constraints_get(sc, i);
1425
775
    graph->max_edge[i] = isl_union_map_n_map(c);
1426
775
    isl_union_map_free(c);
1427
775
    if (!c)
1428
0
      return isl_stat_error;
1429
775
  }
1430
155
  if (graph_init_edge_tables(ctx, graph) < 0)
1431
0
    return isl_stat_error;
1432
155
  graph->n_edge = 0;
1433
155
  data.graph = graph;
1434
930
  for (i = isl_edge_first; i <= isl_edge_last; 
++i775
) {
1435
775
    isl_stat r;
1436
775
1437
775
    data.type = i;
1438
775
    c = isl_schedule_constraints_get(sc, i);
1439
775
    r = isl_union_map_foreach_map(c, &extract_edge, &data);
1440
775
    isl_union_map_free(c);
1441
775
    if (r < 0)
1442
0
      return isl_stat_error;
1443
775
  }
1444
155
1445
155
  return isl_stat_ok;
1446
155
}
1447
1448
/* Check whether there is any dependence from node[j] to node[i]
1449
 * or from node[i] to node[j].
1450
 */
1451
static isl_bool node_follows_weak(int i, int j, void *user)
1452
323
{
1453
323
  isl_bool f;
1454
323
  struct isl_sched_graph *graph = user;
1455
323
1456
323
  f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
1457
323
  if (f < 0 || f)
1458
149
    return f;
1459
174
  return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
1460
174
}
1461
1462
/* Check whether there is a (conditional) validity dependence from node[j]
1463
 * to node[i], forcing node[i] to follow node[j].
1464
 */
1465
static isl_bool node_follows_strong(int i, int j, void *user)
1466
313
{
1467
313
  struct isl_sched_graph *graph = user;
1468
313
1469
313
  return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
1470
313
}
1471
1472
/* Use Tarjan's algorithm for computing the strongly connected components
1473
 * in the dependence graph only considering those edges defined by "follows".
1474
 */
1475
static isl_stat detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
1476
  isl_bool (*follows)(int i, int j, void *user))
1477
666
{
1478
666
  int i, n;
1479
666
  struct isl_tarjan_graph *g = NULL;
1480
666
1481
666
  g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
1482
666
  if (!g)
1483
0
    return isl_stat_error;
1484
666
1485
666
  graph->scc = 0;
1486
666
  i = 0;
1487
666
  n = graph->n;
1488
1.51k
  while (n) {
1489
1.87k
    while (g->order[i] != -1) {
1490
1.02k
      graph->node[g->order[i]].scc = graph->scc;
1491
1.02k
      --n;
1492
1.02k
      ++i;
1493
1.02k
    }
1494
848
    ++i;
1495
848
    graph->scc++;
1496
848
  }
1497
666
1498
666
  isl_tarjan_graph_free(g);
1499
666
1500
666
  return isl_stat_ok;
1501
666
}
1502
1503
/* Apply Tarjan's algorithm to detect the strongly connected components
1504
 * in the dependence graph.
1505
 * Only consider the (conditional) validity dependences and clear "weak".
1506
 */
1507
static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1508
434
{
1509
434
  graph->weak = 0;
1510
434
  return detect_ccs(ctx, graph, &node_follows_strong);
1511
434
}
1512
1513
/* Apply Tarjan's algorithm to detect the (weakly) connected components
1514
 * in the dependence graph.
1515
 * Consider all dependences and set "weak".
1516
 */
1517
static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1518
203
{
1519
203
  graph->weak = 1;
1520
203
  return detect_ccs(ctx, graph, &node_follows_weak);
1521
203
}
1522
1523
static int cmp_scc(const void *a, const void *b, void *data)
1524
125
{
1525
125
  struct isl_sched_graph *graph = data;
1526
125
  const int *i1 = a;
1527
125
  const int *i2 = b;
1528
125
1529
125
  return graph->node[*i1].scc - graph->node[*i2].scc;
1530
125
}
1531
1532
/* Sort the elements of graph->sorted according to the corresponding SCCs.
1533
 */
1534
static int sort_sccs(struct isl_sched_graph *graph)
1535
385
{
1536
385
  return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
1537
385
}
1538
1539
/* Return a non-parametric set in the compressed space of "node" that is
1540
 * bounded by the size in each direction
1541
 *
1542
 *  { [x] : -S_i <= x_i <= S_i }
1543
 *
1544
 * If S_i is infinity in direction i, then there are no constraints
1545
 * in that direction.
1546
 *
1547
 * Cache the result in node->bounds.
1548
 */
1549
static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node)
1550
128
{
1551
128
  isl_space *space;
1552
128
  isl_basic_set *bounds;
1553
128
  int i;
1554
128
  unsigned nparam;
1555
128
1556
128
  if (node->bounds)
1557
32
    return isl_basic_set_copy(node->bounds);
1558
96
1559
96
  if (node->compressed)
1560
0
    space = isl_multi_aff_get_domain_space(node->decompress);
1561
96
  else
1562
96
    space = isl_space_copy(node->space);
1563
96
  nparam = isl_space_dim(space, isl_dim_param);
1564
96
  space = isl_space_drop_dims(space, isl_dim_param, 0, nparam);
1565
96
  bounds = isl_basic_set_universe(space);
1566
96
1567
285
  for (i = 0; i < node->nvar; 
++i189
) {
1568
189
    isl_val *size;
1569
189
1570
189
    size = isl_multi_val_get_val(node->sizes, i);
1571
189
    if (!size)
1572
0
      return isl_basic_set_free(bounds);
1573
189
    if (!isl_val_is_int(size)) {
1574
122
      isl_val_free(size);
1575
122
      continue;
1576
122
    }
1577
67
    bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i,
1578
67
              isl_val_copy(size));
1579
67
    bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i,
1580
67
              isl_val_neg(size));
1581
67
  }
1582
96
1583
96
  node->bounds = isl_basic_set_copy(bounds);
1584
96
  return bounds;
1585
96
}
1586
1587
/* Drop some constraints from "delta" that could be exploited
1588
 * to construct loop coalescing schedules.
1589
 * In particular, drop those constraint that bound the difference
1590
 * to the size of the domain.
1591
 * First project out the parameters to improve the effectiveness.
1592
 */
1593
static __isl_give isl_set *drop_coalescing_constraints(
1594
  __isl_take isl_set *delta, struct isl_sched_node *node)
1595
104
{
1596
104
  unsigned nparam;
1597
104
  isl_basic_set *bounds;
1598
104
1599
104
  bounds = get_size_bounds(node);
1600
104
1601
104
  nparam = isl_set_dim(delta, isl_dim_param);
1602
104
  delta = isl_set_project_out(delta, isl_dim_param, 0, nparam);
1603
104
  delta = isl_set_remove_divs(delta);
1604
104
  delta = isl_set_plain_gist_basic_set(delta, bounds);
1605
104
  return delta;
1606
104
}
1607
1608
/* Given a dependence relation R from "node" to itself,
1609
 * construct the set of coefficients of valid constraints for elements
1610
 * in that dependence relation.
1611
 * In particular, the result contains tuples of coefficients
1612
 * c_0, c_n, c_x such that
1613
 *
1614
 *  c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
1615
 *
1616
 * or, equivalently,
1617
 *
1618
 *  c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
1619
 *
1620
 * We choose here to compute the dual of delta R.
1621
 * Alternatively, we could have computed the dual of R, resulting
1622
 * in a set of tuples c_0, c_n, c_x, c_y, and then
1623
 * plugged in (c_0, c_n, c_x, -c_x).
1624
 *
1625
 * If "need_param" is set, then the resulting coefficients effectively
1626
 * include coefficients for the parameters c_n.  Otherwise, they may
1627
 * have been projected out already.
1628
 * Since the constraints may be different for these two cases,
1629
 * they are stored in separate caches.
1630
 * In particular, if no parameter coefficients are required and
1631
 * the schedule_treat_coalescing option is set, then the parameters
1632
 * are projected out and some constraints that could be exploited
1633
 * to construct coalescing schedules are removed before the dual
1634
 * is computed.
1635
 *
1636
 * If "node" has been compressed, then the dependence relation
1637
 * is also compressed before the set of coefficients is computed.
1638
 */
1639
static __isl_give isl_basic_set *intra_coefficients(
1640
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1641
  __isl_take isl_map *map, int need_param)
1642
992
{
1643
992
  isl_ctx *ctx;
1644
992
  isl_set *delta;
1645
992
  isl_map *key;
1646
992
  isl_basic_set *coef;
1647
992
  isl_maybe_isl_basic_set m;
1648
992
  isl_map_to_basic_set **hmap = &graph->intra_hmap;
1649
992
  int treat;
1650
992
1651
992
  if (!map)
1652
0
    return NULL;
1653
992
1654
992
  ctx = isl_map_get_ctx(map);
1655
992
  treat = !need_param && 
isl_options_get_schedule_treat_coalescing(ctx)708
;
1656
992
  if (!treat)
1657
316
    hmap = &graph->intra_hmap_param;
1658
992
  m = isl_map_to_basic_set_try_get(*hmap, map);
1659
992
  if (m.valid < 0 || m.valid) {
1660
796
    isl_map_free(map);
1661
796
    return m.value;
1662
796
  }
1663
196
1664
196
  key = isl_map_copy(map);
1665
196
  if (node->compressed) {
1666
0
    map = isl_map_preimage_domain_multi_aff(map,
1667
0
            isl_multi_aff_copy(node->decompress));
1668
0
    map = isl_map_preimage_range_multi_aff(map,
1669
0
            isl_multi_aff_copy(node->decompress));
1670
0
  }
1671
196
  delta = isl_map_deltas(map);
1672
196
  if (treat)
1673
104
    delta = drop_coalescing_constraints(delta, node);
1674
196
  delta = isl_set_remove_divs(delta);
1675
196
  coef = isl_set_coefficients(delta);
1676
196
  *hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef));
1677
196
1678
196
  return coef;
1679
196
}
1680
1681
/* Given a dependence relation R, construct the set of coefficients
1682
 * of valid constraints for elements in that dependence relation.
1683
 * In particular, the result contains tuples of coefficients
1684
 * c_0, c_n, c_x, c_y such that
1685
 *
1686
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
1687
 *
1688
 * If the source or destination nodes of "edge" have been compressed,
1689
 * then the dependence relation is also compressed before
1690
 * the set of coefficients is computed.
1691
 */
1692
static __isl_give isl_basic_set *inter_coefficients(
1693
  struct isl_sched_graph *graph, struct isl_sched_edge *edge,
1694
  __isl_take isl_map *map)
1695
616
{
1696
616
  isl_set *set;
1697
616
  isl_map *key;
1698
616
  isl_basic_set *coef;
1699
616
  isl_maybe_isl_basic_set m;
1700
616
1701
616
  m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
1702
616
  if (m.valid < 0 || m.valid) {
1703
529
    isl_map_free(map);
1704
529
    return m.value;
1705
529
  }
1706
87
1707
87
  key = isl_map_copy(map);
1708
87
  if (edge->src->compressed)
1709
3
    map = isl_map_preimage_domain_multi_aff(map,
1710
3
            isl_multi_aff_copy(edge->src->decompress));
1711
87
  if (edge->dst->compressed)
1712
3
    map = isl_map_preimage_range_multi_aff(map,
1713
3
            isl_multi_aff_copy(edge->dst->decompress));
1714
87
  set = isl_map_wrap(isl_map_remove_divs(map));
1715
87
  coef = isl_set_coefficients(set);
1716
87
  graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
1717
87
          isl_basic_set_copy(coef));
1718
87
1719
87
  return coef;
1720
87
}
1721
1722
/* Return the position of the coefficients of the variables in
1723
 * the coefficients constraints "coef".
1724
 *
1725
 * The space of "coef" is of the form
1726
 *
1727
 *  { coefficients[[cst, params] -> S] }
1728
 *
1729
 * Return the position of S.
1730
 */
1731
static int coef_var_offset(__isl_keep isl_basic_set *coef)
1732
1.04k
{
1733
1.04k
  int offset;
1734
1.04k
  isl_space *space;
1735
1.04k
1736
1.04k
  space = isl_space_unwrap(isl_basic_set_get_space(coef));
1737
1.04k
  offset = isl_space_dim(space, isl_dim_in);
1738
1.04k
  isl_space_free(space);
1739
1.04k
1740
1.04k
  return offset;
1741
1.04k
}
1742
1743
/* Return the offset of the coefficient of the constant term of "node"
1744
 * within the (I)LP.
1745
 *
1746
 * Within each node, the coefficients have the following order:
1747
 *  - positive and negative parts of c_i_x
1748
 *  - c_i_n (if parametric)
1749
 *  - c_i_0
1750
 */
1751
static int node_cst_coef_offset(struct isl_sched_node *node)
1752
1.46k
{
1753
1.46k
  return node->start + 2 * node->nvar + node->nparam;
1754
1.46k
}
1755
1756
/* Return the offset of the coefficients of the parameters of "node"
1757
 * within the (I)LP.
1758
 *
1759
 * Within each node, the coefficients have the following order:
1760
 *  - positive and negative parts of c_i_x
1761
 *  - c_i_n (if parametric)
1762
 *  - c_i_0
1763
 */
1764
static int node_par_coef_offset(struct isl_sched_node *node)
1765
1.93k
{
1766
1.93k
  return node->start + 2 * node->nvar;
1767
1.93k
}
1768
1769
/* Return the offset of the coefficients of the variables of "node"
1770
 * within the (I)LP.
1771
 *
1772
 * Within each node, the coefficients have the following order:
1773
 *  - positive and negative parts of c_i_x
1774
 *  - c_i_n (if parametric)
1775
 *  - c_i_0
1776
 */
1777
static int node_var_coef_offset(struct isl_sched_node *node)
1778
4.17k
{
1779
4.17k
  return node->start;
1780
4.17k
}
1781
1782
/* Return the position of the pair of variables encoding
1783
 * coefficient "i" of "node".
1784
 *
1785
 * The order of these variable pairs is the opposite of
1786
 * that of the coefficients, with 2 variables per coefficient.
1787
 */
1788
static int node_var_coef_pos(struct isl_sched_node *node, int i)
1789
2.45k
{
1790
2.45k
  return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i);
1791
2.45k
}
1792
1793
/* Construct an isl_dim_map for mapping constraints on coefficients
1794
 * for "node" to the corresponding positions in graph->lp.
1795
 * "offset" is the offset of the coefficients for the variables
1796
 * in the input constraints.
1797
 * "s" is the sign of the mapping.
1798
 *
1799
 * The input constraints are given in terms of the coefficients
1800
 * (c_0, c_x) or (c_0, c_n, c_x).
1801
 * The mapping produced by this function essentially plugs in
1802
 * (0, c_i_x^+ - c_i_x^-) if s = 1 and
1803
 * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
1804
 * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
1805
 * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
1806
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1807
 * Furthermore, the order of these pairs is the opposite of that
1808
 * of the corresponding coefficients.
1809
 *
1810
 * The caller can extend the mapping to also map the other coefficients
1811
 * (and therefore not plug in 0).
1812
 */
1813
static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
1814
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1815
  int offset, int s)
1816
616
{
1817
616
  int pos;
1818
616
  unsigned total;
1819
616
  isl_dim_map *dim_map;
1820
616
1821
616
  if (!node || !graph->lp)
1822
0
    return NULL;
1823
616
1824
616
  total = isl_basic_set_total_dim(graph->lp);
1825
616
  pos = node_var_coef_pos(node, 0);
1826
616
  dim_map = isl_dim_map_alloc(ctx, total);
1827
616
  isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s);
1828
616
  isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s);
1829
616
1830
616
  return dim_map;
1831
616
}
1832
1833
/* Construct an isl_dim_map for mapping constraints on coefficients
1834
 * for "src" (node i) and "dst" (node j) to the corresponding positions
1835
 * in graph->lp.
1836
 * "offset" is the offset of the coefficients for the variables of "src"
1837
 * in the input constraints.
1838
 * "s" is the sign of the mapping.
1839
 *
1840
 * The input constraints are given in terms of the coefficients
1841
 * (c_0, c_n, c_x, c_y).
1842
 * The mapping produced by this function essentially plugs in
1843
 * (c_j_0 - c_i_0, c_j_n - c_i_n,
1844
 *  -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
1845
 * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
1846
 *  c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
1847
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1848
 * Furthermore, the order of these pairs is the opposite of that
1849
 * of the corresponding coefficients.
1850
 *
1851
 * The caller can further extend the mapping.
1852
 */
1853
static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
1854
  struct isl_sched_graph *graph, struct isl_sched_node *src,
1855
  struct isl_sched_node *dst, int offset, int s)
1856
424
{
1857
424
  int pos;
1858
424
  unsigned total;
1859
424
  isl_dim_map *dim_map;
1860
424
1861
424
  if (!src || !dst || !graph->lp)
1862
0
    return NULL;
1863
424
1864
424
  total = isl_basic_set_total_dim(graph->lp);
1865
424
  dim_map = isl_dim_map_alloc(ctx, total);
1866
424
1867
424
  pos = node_cst_coef_offset(dst);
1868
424
  isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s);
1869
424
  pos = node_par_coef_offset(dst);
1870
424
  isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s);
1871
424
  pos = node_var_coef_pos(dst, 0);
1872
424
  isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1,
1873
424
        dst->nvar, -s);
1874
424
  isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1,
1875
424
        dst->nvar, s);
1876
424
1877
424
  pos = node_cst_coef_offset(src);
1878
424
  isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s);
1879
424
  pos = node_par_coef_offset(src);
1880
424
  isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s);
1881
424
  pos = node_var_coef_pos(src, 0);
1882
424
  isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s);
1883
424
  isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s);
1884
424
1885
424
  return dim_map;
1886
424
}
1887
1888
/* Add the constraints from "src" to "dst" using "dim_map",
1889
 * after making sure there is enough room in "dst" for the extra constraints.
1890
 */
1891
static __isl_give isl_basic_set *add_constraints_dim_map(
1892
  __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
1893
  __isl_take isl_dim_map *dim_map)
1894
1.04k
{
1895
1.04k
  int n_eq, n_ineq;
1896
1.04k
1897
1.04k
  n_eq = isl_basic_set_n_equality(src);
1898
1.04k
  n_ineq = isl_basic_set_n_inequality(src);
1899
1.04k
  dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
1900
1.04k
  dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
1901
1.04k
  return dst;
1902
1.04k
}
1903
1904
/* Add constraints to graph->lp that force validity for the given
1905
 * dependence from a node i to itself.
1906
 * That is, add constraints that enforce
1907
 *
1908
 *  (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
1909
 *  = c_i_x (y - x) >= 0
1910
 *
1911
 * for each (x,y) in R.
1912
 * We obtain general constraints on coefficients (c_0, c_x)
1913
 * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
1914
 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
1915
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1916
 * Note that the result of intra_coefficients may also contain
1917
 * parameter coefficients c_n, in which case 0 is plugged in for them as well.
1918
 */
1919
static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
1920
  struct isl_sched_edge *edge)
1921
284
{
1922
284
  int offset;
1923
284
  isl_map *map = isl_map_copy(edge->map);
1924
284
  isl_ctx *ctx = isl_map_get_ctx(map);
1925
284
  isl_dim_map *dim_map;
1926
284
  isl_basic_set *coef;
1927
284
  struct isl_sched_node *node = edge->src;
1928
284
1929
284
  coef = intra_coefficients(graph, node, map, 0);
1930
284
1931
284
  offset = coef_var_offset(coef);
1932
284
1933
284
  if (!coef)
1934
0
    return isl_stat_error;
1935
284
1936
284
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
1937
284
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1938
284
1939
284
  return isl_stat_ok;
1940
284
}
1941
1942
/* Add constraints to graph->lp that force validity for the given
1943
 * dependence from node i to node j.
1944
 * That is, add constraints that enforce
1945
 *
1946
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
1947
 *
1948
 * for each (x,y) in R.
1949
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1950
 * of valid constraints for R and then plug in
1951
 * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
1952
 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
1953
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1954
 */
1955
static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
1956
  struct isl_sched_edge *edge)
1957
201
{
1958
201
  int offset;
1959
201
  isl_map *map;
1960
201
  isl_ctx *ctx;
1961
201
  isl_dim_map *dim_map;
1962
201
  isl_basic_set *coef;
1963
201
  struct isl_sched_node *src = edge->src;
1964
201
  struct isl_sched_node *dst = edge->dst;
1965
201
1966
201
  if (!graph->lp)
1967
0
    return isl_stat_error;
1968
201
1969
201
  map = isl_map_copy(edge->map);
1970
201
  ctx = isl_map_get_ctx(map);
1971
201
  coef = inter_coefficients(graph, edge, map);
1972
201
1973
201
  offset = coef_var_offset(coef);
1974
201
1975
201
  if (!coef)
1976
0
    return isl_stat_error;
1977
201
1978
201
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
1979
201
1980
201
  edge->start = graph->lp->n_ineq;
1981
201
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1982
201
  if (!graph->lp)
1983
0
    return isl_stat_error;
1984
201
  edge->end = graph->lp->n_ineq;
1985
201
1986
201
  return isl_stat_ok;
1987
201
}
1988
1989
/* Add constraints to graph->lp that bound the dependence distance for the given
1990
 * dependence from a node i to itself.
1991
 * If s = 1, we add the constraint
1992
 *
1993
 *  c_i_x (y - x) <= m_0 + m_n n
1994
 *
1995
 * or
1996
 *
1997
 *  -c_i_x (y - x) + m_0 + m_n n >= 0
1998
 *
1999
 * for each (x,y) in R.
2000
 * If s = -1, we add the constraint
2001
 *
2002
 *  -c_i_x (y - x) <= m_0 + m_n n
2003
 *
2004
 * or
2005
 *
2006
 *  c_i_x (y - x) + m_0 + m_n n >= 0
2007
 *
2008
 * for each (x,y) in R.
2009
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
2010
 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
2011
 * with each coefficient (except m_0) represented as a pair of non-negative
2012
 * coefficients.
2013
 *
2014
 *
2015
 * If "local" is set, then we add constraints
2016
 *
2017
 *  c_i_x (y - x) <= 0
2018
 *
2019
 * or
2020
 *
2021
 *  -c_i_x (y - x) <= 0
2022
 *
2023
 * instead, forcing the dependence distance to be (less than or) equal to 0.
2024
 * That is, we plug in (0, 0, -s * c_i_x),
2025
 * intra_coefficients is not required to have c_n in its result when
2026
 * "local" is set.  If they are missing, then (0, -s * c_i_x) is plugged in.
2027
 * Note that dependences marked local are treated as validity constraints
2028
 * by add_all_validity_constraints and therefore also have
2029
 * their distances bounded by 0 from below.
2030
 */
2031
static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
2032
  struct isl_sched_edge *edge, int s, int local)
2033
286
{
2034
286
  int offset;
2035
286
  unsigned nparam;
2036
286
  isl_map *map = isl_map_copy(edge->map);
2037
286
  isl_ctx *ctx = isl_map_get_ctx(map);
2038
286
  isl_dim_map *dim_map;
2039
286
  isl_basic_set *coef;
2040
286
  struct isl_sched_node *node = edge->src;
2041
286
2042
286
  coef = intra_coefficients(graph, node, map, !local);
2043
286
2044
286
  offset = coef_var_offset(coef);
2045
286
2046
286
  if (!coef)
2047
0
    return isl_stat_error;
2048
286
2049
286
  nparam = isl_space_dim(node->space, isl_dim_param);
2050
286
  dim_map = intra_dim_map(ctx, graph, node, offset, -s);
2051
286
2052
286
  if (!local) {
2053
146
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2054
146
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2055
146
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2056
146
  }
2057
286
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2058
286
2059
286
  return isl_stat_ok;
2060
286
}
2061
2062
/* Add constraints to graph->lp that bound the dependence distance for the given
2063
 * dependence from node i to node j.
2064
 * If s = 1, we add the constraint
2065
 *
2066
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
2067
 *    <= m_0 + m_n n
2068
 *
2069
 * or
2070
 *
2071
 *  -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
2072
 *    m_0 + m_n n >= 0
2073
 *
2074
 * for each (x,y) in R.
2075
 * If s = -1, we add the constraint
2076
 *
2077
 *  -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
2078
 *    <= m_0 + m_n n
2079
 *
2080
 * or
2081
 *
2082
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
2083
 *    m_0 + m_n n >= 0
2084
 *
2085
 * for each (x,y) in R.
2086
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2087
 * of valid constraints for R and then plug in
2088
 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
2089
 *  s*c_i_x, -s*c_j_x)
2090
 * with each coefficient (except m_0, c_*_0 and c_*_n)
2091
 * represented as a pair of non-negative coefficients.
2092
 *
2093
 *
2094
 * If "local" is set (and s = 1), then we add constraints
2095
 *
2096
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
2097
 *
2098
 * or
2099
 *
2100
 *  -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
2101
 *
2102
 * instead, forcing the dependence distance to be (less than or) equal to 0.
2103
 * That is, we plug in
2104
 * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
2105
 * Note that dependences marked local are treated as validity constraints
2106
 * by add_all_validity_constraints and therefore also have
2107
 * their distances bounded by 0 from below.
2108
 */
2109
static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
2110
  struct isl_sched_edge *edge, int s, int local)
2111
205
{
2112
205
  int offset;
2113
205
  unsigned nparam;
2114
205
  isl_map *map = isl_map_copy(edge->map);
2115
205
  isl_ctx *ctx = isl_map_get_ctx(map);
2116
205
  isl_dim_map *dim_map;
2117
205
  isl_basic_set *coef;
2118
205
  struct isl_sched_node *src = edge->src;
2119
205
  struct isl_sched_node *dst = edge->dst;
2120
205
2121
205
  coef = inter_coefficients(graph, edge, map);
2122
205
2123
205
  offset = coef_var_offset(coef);
2124
205
2125
205
  if (!coef)
2126
0
    return isl_stat_error;
2127
205
2128
205
  nparam = isl_space_dim(src->space, isl_dim_param);
2129
205
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
2130
205
2131
205
  if (!local) {
2132
121
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2133
121
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2134
121
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2135
121
  }
2136
205
2137
205
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2138
205
2139
205
  return isl_stat_ok;
2140
205
}
2141
2142
/* Should the distance over "edge" be forced to zero?
2143
 * That is, is it marked as a local edge?
2144
 * If "use_coincidence" is set, then coincidence edges are treated
2145
 * as local edges.
2146
 */
2147
static int force_zero(struct isl_sched_edge *edge, int use_coincidence)
2148
2.17k
{
2149
2.17k
  return is_local(edge) || 
(2.09k
use_coincidence2.09k
&&
is_coincidence(edge)731
);
2150
2.17k
}
2151
2152
/* Add all validity constraints to graph->lp.
2153
 *
2154
 * An edge that is forced to be local needs to have its dependence
2155
 * distances equal to zero.  We take care of bounding them by 0 from below
2156
 * here.  add_all_proximity_constraints takes care of bounding them by 0
2157
 * from above.
2158
 *
2159
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2160
 * Otherwise, we ignore them.
2161
 */
2162
static int add_all_validity_constraints(struct isl_sched_graph *graph,
2163
  int use_coincidence)
2164
408
{
2165
408
  int i;
2166
408
2167
1.00k
  for (i = 0; i < graph->n_edge; 
++i593
) {
2168
593
    struct isl_sched_edge *edge = &graph->edge[i];
2169
593
    int zero;
2170
593
2171
593
    zero = force_zero(edge, use_coincidence);
2172
593
    if (!is_validity(edge) && 
!zero112
)
2173
108
      continue;
2174
485
    if (edge->src != edge->dst)
2175
201
      continue;
2176
284
    if (add_intra_validity_constraints(graph, edge) < 0)
2177
0
      return -1;
2178
284
  }
2179
408
2180
1.00k
  
for (i = 0; 408
i < graph->n_edge;
++i593
) {
2181
593
    struct isl_sched_edge *edge = &graph->edge[i];
2182
593
    int zero;
2183
593
2184
593
    zero = force_zero(edge, use_coincidence);
2185
593
    if (!is_validity(edge) && 
!zero112
)
2186
108
      continue;
2187
485
    if (edge->src == edge->dst)
2188
284
      continue;
2189
201
    if (add_inter_validity_constraints(graph, edge) < 0)
2190
0
      return -1;
2191
201
  }
2192
408
2193
408
  return 0;
2194
408
}
2195
2196
/* Add constraints to graph->lp that bound the dependence distance
2197
 * for all dependence relations.
2198
 * If a given proximity dependence is identical to a validity
2199
 * dependence, then the dependence distance is already bounded
2200
 * from below (by zero), so we only need to bound the distance
2201
 * from above.  (This includes the case of "local" dependences
2202
 * which are treated as validity dependence by add_all_validity_constraints.)
2203
 * Otherwise, we need to bound the distance both from above and from below.
2204
 *
2205
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2206
 * Otherwise, we ignore them.
2207
 */
2208
static int add_all_proximity_constraints(struct isl_sched_graph *graph,
2209
  int use_coincidence)
2210
408
{
2211
408
  int i;
2212
408
2213
1.00k
  for (i = 0; i < graph->n_edge; 
++i593
) {
2214
593
    struct isl_sched_edge *edge = &graph->edge[i];
2215
593
    int zero;
2216
593
2217
593
    zero = force_zero(edge, use_coincidence);
2218
593
    if (!is_proximity(edge) && 
!zero127
)
2219
119
      continue;
2220
474
    if (edge->src == edge->dst &&
2221
474
        
add_intra_proximity_constraints(graph, edge, 1, zero) < 0278
)
2222
0
      return -1;
2223
474
    if (edge->src != edge->dst &&
2224
474
        
add_inter_proximity_constraints(graph, edge, 1, zero) < 0196
)
2225
0
      return -1;
2226
474
    if (is_validity(edge) || 
zero21
)
2227
457
      continue;
2228
17
    if (edge->src == edge->dst &&
2229
17
        
add_intra_proximity_constraints(graph, edge, -1, 0) < 08
)
2230
0
      return -1;
2231
17
    if (edge->src != edge->dst &&
2232
17
        
add_inter_proximity_constraints(graph, edge, -1, 0) < 09
)
2233
0
      return -1;
2234
17
  }
2235
408
2236
408
  return 0;
2237
408
}
2238
2239
/* Normalize the rows of "indep" such that all rows are lexicographically
2240
 * positive and such that each row contains as many final zeros as possible,
2241
 * given the choice for the previous rows.
2242
 * Do this by performing elementary row operations.
2243
 */
2244
static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep)
2245
1.28k
{
2246
1.28k
  indep = isl_mat_reverse_gauss(indep);
2247
1.28k
  indep = isl_mat_lexnonneg_rows(indep);
2248
1.28k
  return indep;
2249
1.28k
}
2250
2251
/* Compute a basis for the rows in the linear part of the schedule
2252
 * and extend this basis to a full basis.  The remaining rows
2253
 * can then be used to force linear independence from the rows
2254
 * in the schedule.
2255
 *
2256
 * In particular, given the schedule rows S, we compute
2257
 *
2258
 *  S   = H Q
2259
 *  S U = H
2260
 *
2261
 * with H the Hermite normal form of S.  That is, all but the
2262
 * first rank columns of H are zero and so each row in S is
2263
 * a linear combination of the first rank rows of Q.
2264
 * The matrix Q can be used as a variable transformation
2265
 * that isolates the directions of S in the first rank rows.
2266
 * Transposing S U = H yields
2267
 *
2268
 *  U^T S^T = H^T
2269
 *
2270
 * with all but the first rank rows of H^T zero.
2271
 * The last rows of U^T are therefore linear combinations
2272
 * of schedule coefficients that are all zero on schedule
2273
 * coefficients that are linearly dependent on the rows of S.
2274
 * At least one of these combinations is non-zero on
2275
 * linearly independent schedule coefficients.
2276
 * The rows are normalized to involve as few of the last
2277
 * coefficients as possible and to have a positive initial value.
2278
 */
2279
static int node_update_vmap(struct isl_sched_node *node)
2280
1.28k
{
2281
1.28k
  isl_mat *H, *U, *Q;
2282
1.28k
  int n_row = isl_mat_rows(node->sched);
2283
1.28k
2284
1.28k
  H = isl_mat_sub_alloc(node->sched, 0, n_row,
2285
1.28k
            1 + node->nparam, node->nvar);
2286
1.28k
2287
1.28k
  H = isl_mat_left_hermite(H, 0, &U, &Q);
2288
1.28k
  isl_mat_free(node->indep);
2289
1.28k
  isl_mat_free(node->vmap);
2290
1.28k
  node->vmap = Q;
2291
1.28k
  node->indep = isl_mat_transpose(U);
2292
1.28k
  node->rank = isl_mat_initial_non_zero_cols(H);
2293
1.28k
  node->indep = isl_mat_drop_rows(node->indep, 0, node->rank);
2294
1.28k
  node->indep = normalize_independent(node->indep);
2295
1.28k
  isl_mat_free(H);
2296
1.28k
2297
1.28k
  if (!node->indep || !node->vmap || node->rank < 0)
2298
0
    return -1;
2299
1.28k
  return 0;
2300
1.28k
}
2301
2302
/* Is "edge" marked as a validity or a conditional validity edge?
2303
 */
2304
static int is_any_validity(struct isl_sched_edge *edge)
2305
108
{
2306
108
  return is_validity(edge) || 
is_conditional_validity(edge)2
;
2307
108
}
2308
2309
/* How many times should we count the constraints in "edge"?
2310
 *
2311
 * We count as follows
2312
 * validity   -> 1 (>= 0)
2313
 * validity+proximity -> 2 (>= 0 and upper bound)
2314
 * proximity    -> 2 (lower and upper bound)
2315
 * local(+any)    -> 2 (>= 0 and <= 0)
2316
 *
2317
 * If an edge is only marked conditional_validity then it counts
2318
 * as zero since it is only checked afterwards.
2319
 *
2320
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2321
 * Otherwise, we ignore them.
2322
 */
2323
static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
2324
593
{
2325
593
  if (is_proximity(edge) || 
force_zero(edge, use_coincidence)127
)
2326
474
    return 2;
2327
119
  if (is_validity(edge))
2328
28
    return 1;
2329
91
  return 0;
2330
91
}
2331
2332
/* How many times should the constraints in "edge" be counted
2333
 * as a parametric intra-node constraint?
2334
 *
2335
 * Only proximity edges that are not forced zero need
2336
 * coefficient constraints that include coefficients for parameters.
2337
 * If the edge is also a validity edge, then only
2338
 * an upper bound is introduced.  Otherwise, both lower and upper bounds
2339
 * are introduced.
2340
 */
2341
static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge,
2342
  int use_coincidence)
2343
593
{
2344
593
  if (edge->src != edge->dst)
2345
272
    return 0;
2346
321
  if (!is_proximity(edge))
2347
48
    return 0;
2348
273
  if (force_zero(edge, use_coincidence))
2349
135
    return 0;
2350
138
  if (is_validity(edge))
2351
130
    return 1;
2352
8
  else
2353
8
    return 2;
2354
138
}
2355
2356
/* Add "f" times the number of equality and inequality constraints of "bset"
2357
 * to "n_eq" and "n_ineq" and free "bset".
2358
 */
2359
static isl_stat update_count(__isl_take isl_basic_set *bset,
2360
  int f, int *n_eq, int *n_ineq)
2361
696
{
2362
696
  if (!bset)
2363
0
    return isl_stat_error;
2364
696
2365
696
  *n_eq += isl_basic_set_n_equality(bset);
2366
696
  *n_ineq += isl_basic_set_n_inequality(bset);
2367
696
  isl_basic_set_free(bset);
2368
696
2369
696
  return isl_stat_ok;
2370
696
}
2371
2372
/* Count the number of equality and inequality constraints
2373
 * that will be added for the given map.
2374
 *
2375
 * The edges that require parameter coefficients are counted separately.
2376
 *
2377
 * "use_coincidence" is set if we should take into account coincidence edges.
2378
 */
2379
static isl_stat count_map_constraints(struct isl_sched_graph *graph,
2380
  struct isl_sched_edge *edge, __isl_take isl_map *map,
2381
  int *n_eq, int *n_ineq, int use_coincidence)
2382
593
{
2383
593
  isl_map *copy;
2384
593
  isl_basic_set *coef;
2385
593
  int f = edge_multiplicity(edge, use_coincidence);
2386
593
  int fp = parametric_intra_edge_multiplicity(edge, use_coincidence);
2387
593
2388
593
  if (f == 0) {
2389
91
    isl_map_free(map);
2390
91
    return isl_stat_ok;
2391
91
  }
2392
502
2393
502
  if (edge->src != edge->dst) {
2394
210
    coef = inter_coefficients(graph, edge, map);
2395
210
    return update_count(coef, f, n_eq, n_ineq);
2396
210
  }
2397
292
2398
292
  if (fp > 0) {
2399
138
    copy = isl_map_copy(map);
2400
138
    coef = intra_coefficients(graph, edge->src, copy, 1);
2401
138
    if (update_count(coef, fp, n_eq, n_ineq) < 0)
2402
0
      goto error;
2403
292
  }
2404
292
2405
292
  if (f > fp) {
2406
284
    copy = isl_map_copy(map);
2407
284
    coef = intra_coefficients(graph, edge->src, copy, 0);
2408
284
    if (update_count(coef, f - fp, n_eq, n_ineq) < 0)
2409
0
      goto error;
2410
292
  }
2411
292
2412
292
  isl_map_free(map);
2413
292
  return isl_stat_ok;
2414
0
error:
2415
0
  isl_map_free(map);
2416
0
  return isl_stat_error;
2417
292
}
2418
2419
/* Count the number of equality and inequality constraints
2420
 * that will be added to the main lp problem.
2421
 * We count as follows
2422
 * validity   -> 1 (>= 0)
2423
 * validity+proximity -> 2 (>= 0 and upper bound)
2424
 * proximity    -> 2 (lower and upper bound)
2425
 * local(+any)    -> 2 (>= 0 and <= 0)
2426
 *
2427
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2428
 * Otherwise, we ignore them.
2429
 */
2430
static int count_constraints(struct isl_sched_graph *graph,
2431
  int *n_eq, int *n_ineq, int use_coincidence)
2432
408
{
2433
408
  int i;
2434
408
2435
408
  *n_eq = *n_ineq = 0;
2436
1.00k
  for (i = 0; i < graph->n_edge; 
++i593
) {
2437
593
    struct isl_sched_edge *edge = &graph->edge[i];
2438
593
    isl_map *map = isl_map_copy(edge->map);
2439
593
2440
593
    if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
2441
593
              use_coincidence) < 0)
2442
0
      return -1;
2443
593
  }
2444
408
2445
408
  return 0;
2446
408
}
2447
2448
/* Count the number of constraints that will be added by
2449
 * add_bound_constant_constraints to bound the values of the constant terms
2450
 * and increment *n_eq and *n_ineq accordingly.
2451
 *
2452
 * In practice, add_bound_constant_constraints only adds inequalities.
2453
 */
2454
static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
2455
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2456
408
{
2457
408
  if (isl_options_get_schedule_max_constant_term(ctx) == -1)
2458
257
    return isl_stat_ok;
2459
151
2460
151
  *n_ineq += graph->n;
2461
151
2462
151
  return isl_stat_ok;
2463
151
}
2464
2465
/* Add constraints to bound the values of the constant terms in the schedule,
2466
 * if requested by the user.
2467
 *
2468
 * The maximal value of the constant terms is defined by the option
2469
 * "schedule_max_constant_term".
2470
 */
2471
static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
2472
  struct isl_sched_graph *graph)
2473
408
{
2474
408
  int i, k;
2475
408
  int max;
2476
408
  int total;
2477
408
2478
408
  max = isl_options_get_schedule_max_constant_term(ctx);
2479
408
  if (max == -1)
2480
257
    return isl_stat_ok;
2481
151
2482
151
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2483
151
2484
318
  for (i = 0; i < graph->n; 
++i167
) {
2485
167
    struct isl_sched_node *node = &graph->node[i];
2486
167
    int pos;
2487
167
2488
167
    k = isl_basic_set_alloc_inequality(graph->lp);
2489
167
    if (k < 0)
2490
0
      return isl_stat_error;
2491
167
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2492
167
    pos = node_cst_coef_offset(node);
2493
167
    isl_int_set_si(graph->lp->ineq[k][1 + pos], -1);
2494
167
    isl_int_set_si(graph->lp->ineq[k][0], max);
2495
167
  }
2496
151
2497
151
  return isl_stat_ok;
2498
151
}
2499
2500
/* Count the number of constraints that will be added by
2501
 * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
2502
 * accordingly.
2503
 *
2504
 * In practice, add_bound_coefficient_constraints only adds inequalities.
2505
 */
2506
static int count_bound_coefficient_constraints(isl_ctx *ctx,
2507
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2508
408
{
2509
408
  int i;
2510
408
2511
408
  if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
2512
408
      
!isl_options_get_schedule_treat_coalescing(ctx)255
)
2513
8
    return 0;
2514
400
2515
968
  
for (i = 0; 400
i < graph->n;
++i568
)
2516
568
    *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
2517
400
2518
400
  return 0;
2519
400
}
2520
2521
/* Add constraints to graph->lp that bound the values of
2522
 * the parameter schedule coefficients of "node" to "max" and
2523
 * the variable schedule coefficients to the corresponding entry
2524
 * in node->max.
2525
 * In either case, a negative value means that no bound needs to be imposed.
2526
 *
2527
 * For parameter coefficients, this amounts to adding a constraint
2528
 *
2529
 *  c_n <= max
2530
 *
2531
 * i.e.,
2532
 *
2533
 *  -c_n + max >= 0
2534
 *
2535
 * The variables coefficients are, however, not represented directly.
2536
 * Instead, the variable coefficients c_x are written as differences
2537
 * c_x = c_x^+ - c_x^-.
2538
 * That is,
2539
 *
2540
 *  -max_i <= c_x_i <= max_i
2541
 *
2542
 * is encoded as
2543
 *
2544
 *  -max_i <= c_x_i^+ - c_x_i^- <= max_i
2545
 *
2546
 * or
2547
 *
2548
 *  -(c_x_i^+ - c_x_i^-) + max_i >= 0
2549
 *  c_x_i^+ - c_x_i^- + max_i >= 0
2550
 */
2551
static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
2552
  struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
2553
568
{
2554
568
  int i, j, k;
2555
568
  int total;
2556
568
  isl_vec *ineq;
2557
568
2558
568
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2559
568
2560
880
  for (j = 0; j < node->nparam; 
++j312
) {
2561
312
    int dim;
2562
312
2563
312
    if (max < 0)
2564
245
      continue;
2565
67
2566
67
    k = isl_basic_set_alloc_inequality(graph->lp);
2567
67
    if (k < 0)
2568
0
      return isl_stat_error;
2569
67
    dim = 1 + node_par_coef_offset(node) + j;
2570
67
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2571
67
    isl_int_set_si(graph->lp->ineq[k][dim], -1);
2572
67
    isl_int_set_si(graph->lp->ineq[k][0], max);
2573
67
  }
2574
568
2575
568
  ineq = isl_vec_alloc(ctx, 1 + total);
2576
568
  ineq = isl_vec_clr(ineq);
2577
568
  if (!ineq)
2578
0
    return isl_stat_error;
2579
1.55k
  
for (i = 0; 568
i < node->nvar;
++i991
) {
2580
991
    int pos = 1 + node_var_coef_pos(node, i);
2581
991
2582
991
    if (isl_int_is_neg(node->max->el[i]))
2583
991
      
continue526
;
2584
465
2585
465
    isl_int_set_si(ineq->el[pos], 1);
2586
465
    isl_int_set_si(ineq->el[pos + 1], -1);
2587
465
    isl_int_set(ineq->el[0], node->max->el[i]);
2588
465
2589
465
    k = isl_basic_set_alloc_inequality(graph->lp);
2590
465
    if (k < 0)
2591
0
      goto error;
2592
465
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2593
465
2594
465
    isl_seq_neg(ineq->el + pos, ineq->el + pos, 2);
2595
465
    k = isl_basic_set_alloc_inequality(graph->lp);
2596
465
    if (k < 0)
2597
0
      goto error;
2598
465
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2599
465
2600
465
    isl_seq_clr(ineq->el + pos, 2);
2601
465
  }
2602
568
  isl_vec_free(ineq);
2603
568
2604
568
  return isl_stat_ok;
2605
0
error:
2606
0
  isl_vec_free(ineq);
2607
0
  return isl_stat_error;
2608
568
}
2609
2610
/* Add constraints that bound the values of the variable and parameter
2611
 * coefficients of the schedule.
2612
 *
2613
 * The maximal value of the coefficients is defined by the option
2614
 * 'schedule_max_coefficient' and the entries in node->max.
2615
 * These latter entries are only set if either the schedule_max_coefficient
2616
 * option or the schedule_treat_coalescing option is set.
2617
 */
2618
static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
2619
  struct isl_sched_graph *graph)
2620
408
{
2621
408
  int i;
2622
408
  int max;
2623
408
2624
408
  max = isl_options_get_schedule_max_coefficient(ctx);
2625
408
2626
408
  if (max == -1 && 
!isl_options_get_schedule_treat_coalescing(ctx)255
)
2627
8
    return isl_stat_ok;
2628
400
2629
968
  
for (i = 0; 400
i < graph->n;
++i568
) {
2630
568
    struct isl_sched_node *node = &graph->node[i];
2631
568
2632
568
    if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
2633
0
      return isl_stat_error;
2634
568
  }
2635
400
2636
400
  return isl_stat_ok;
2637
400
}
2638
2639
/* Add a constraint to graph->lp that equates the value at position
2640
 * "sum_pos" to the sum of the "n" values starting at "first".
2641
 */
2642
static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
2643
  int sum_pos, int first, int n)
2644
408
{
2645
408
  int i, k;
2646
408
  int total;
2647
408
2648
408
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2649
408
2650
408
  k = isl_basic_set_alloc_equality(graph->lp);
2651
408
  if (k < 0)
2652
0
    return isl_stat_error;
2653
408
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2654
408
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2655
928
  for (i = 0; i < n; 
++i520
)
2656
520
    isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
2657
408
2658
408
  return isl_stat_ok;
2659
408
}
2660
2661
/* Add a constraint to graph->lp that equates the value at position
2662
 * "sum_pos" to the sum of the parameter coefficients of all nodes.
2663
 */
2664
static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
2665
  int sum_pos)
2666
408
{
2667
408
  int i, j, k;
2668
408
  int total;
2669
408
2670
408
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2671
408
2672
408
  k = isl_basic_set_alloc_equality(graph->lp);
2673
408
  if (k < 0)
2674
0
    return isl_stat_error;
2675
408
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2676
408
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2677
974
  for (i = 0; i < graph->n; 
++i566
) {
2678
566
    int pos = 1 + node_par_coef_offset(&graph->node[i]);
2679
566
2680
893
    for (j = 0; j < graph->node[i].nparam; 
++j327
)
2681
566
      
isl_int_set_si327
(graph->lp->eq[k][pos + j], 1);
2682
566
  }
2683
408
2684
408
  return isl_stat_ok;
2685
408
}
2686
2687
/* Add a constraint to graph->lp that equates the value at position
2688
 * "sum_pos" to the sum of the variable coefficients of all nodes.
2689
 */
2690
static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
2691
  int sum_pos)
2692
440
{
2693
440
  int i, j, k;
2694
440
  int total;
2695
440
2696
440
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2697
440
2698
440
  k = isl_basic_set_alloc_equality(graph->lp);
2699
440
  if (k < 0)
2700
0
    return isl_stat_error;
2701
440
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2702
440
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2703
1.06k
  for (i = 0; i < graph->n; 
++i628
) {
2704
628
    struct isl_sched_node *node = &graph->node[i];
2705
628
    int pos = 1 + node_var_coef_offset(node);
2706
628
2707
2.84k
    for (j = 0; j < 2 * node->nvar; 
++j2.22k
)
2708
2.22k
      isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2709
628
  }
2710
440
2711
440
  return isl_stat_ok;
2712
440
}
2713
2714
/* Construct an ILP problem for finding schedule coefficients
2715
 * that result in non-negative, but small dependence distances
2716
 * over all dependences.
2717
 * In particular, the dependence distances over proximity edges
2718
 * are bounded by m_0 + m_n n and we compute schedule coefficients
2719
 * with small values (preferably zero) of m_n and m_0.
2720
 *
2721
 * All variables of the ILP are non-negative.  The actual coefficients
2722
 * may be negative, so each coefficient is represented as the difference
2723
 * of two non-negative variables.  The negative part always appears
2724
 * immediately before the positive part.
2725
 * Other than that, the variables have the following order
2726
 *
2727
 *  - sum of positive and negative parts of m_n coefficients
2728
 *  - m_0
2729
 *  - sum of all c_n coefficients
2730
 *    (unconstrained when computing non-parametric schedules)
2731
 *  - sum of positive and negative parts of all c_x coefficients
2732
 *  - positive and negative parts of m_n coefficients
2733
 *  - for each node
2734
 *    - positive and negative parts of c_i_x, in opposite order
2735
 *    - c_i_n (if parametric)
2736
 *    - c_i_0
2737
 *
2738
 * The constraints are those from the edges plus two or three equalities
2739
 * to express the sums.
2740
 *
2741
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2742
 * Otherwise, we ignore them.
2743
 */
2744
static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
2745
  int use_coincidence)
2746
408
{
2747
408
  int i;
2748
408
  unsigned nparam;
2749
408
  unsigned total;
2750
408
  isl_space *space;
2751
408
  int parametric;
2752
408
  int param_pos;
2753
408
  int n_eq, n_ineq;
2754
408
2755
408
  parametric = ctx->opt->schedule_parametric;
2756
408
  nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
2757
408
  param_pos = 4;
2758
408
  total = param_pos + 2 * nparam;
2759
992
  for (i = 0; i < graph->n; 
++i584
) {
2760
584
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2761
584
    if (node_update_vmap(node) < 0)
2762
0
      return isl_stat_error;
2763
584
    node->start = total;
2764
584
    total += 1 + node->nparam + 2 * node->nvar;
2765
584
  }
2766
408
2767
408
  if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0)
2768
0
    return isl_stat_error;
2769
408
  if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2770
0
    return isl_stat_error;
2771
408
  if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2772
0
    return isl_stat_error;
2773
408
2774
408
  space = isl_space_set_alloc(ctx, 0, total);
2775
408
  isl_basic_set_free(graph->lp);
2776
408
  n_eq += 2 + parametric;
2777
408
2778
408
  graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
2779
408
2780
408
  if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
2781
0
    return isl_stat_error;
2782
408
  if (parametric && 
add_param_sum_constraint(graph, 2) < 0376
)
2783
0
    return isl_stat_error;
2784
408
  if (add_var_sum_constraint(graph, 3) < 0)
2785
0
    return isl_stat_error;
2786
408
  if (add_bound_constant_constraints(ctx, graph) < 0)
2787
0
    return isl_stat_error;
2788
408
  if (add_bound_coefficient_constraints(ctx, graph) < 0)
2789
0
    return isl_stat_error;
2790
408
  if (add_all_validity_constraints(graph, use_coincidence) < 0)
2791
0
    return isl_stat_error;
2792
408
  if (add_all_proximity_constraints(graph, use_coincidence) < 0)
2793
0
    return isl_stat_error;
2794
408
2795
408
  return isl_stat_ok;
2796
408
}
2797
2798
/* Analyze the conflicting constraint found by
2799
 * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
2800
 * constraint of one of the edges between distinct nodes, living, moreover
2801
 * in distinct SCCs, then record the source and sink SCC as this may
2802
 * be a good place to cut between SCCs.
2803
 */
2804
static int check_conflict(int con, void *user)
2805
1.16k
{
2806
1.16k
  int i;
2807
1.16k
  struct isl_sched_graph *graph = user;
2808
1.16k
2809
1.16k
  if (graph->src_scc >= 0)
2810
38
    return 0;
2811
1.12k
2812
1.12k
  con -= graph->lp->n_eq;
2813
1.12k
2814
1.12k
  if (con >= graph->lp->n_ineq)
2815
526
    return 0;
2816
602
2817
1.98k
  
for (i = 0; 602
i < graph->n_edge;
++i1.38k
) {
2818
1.38k
    if (!is_validity(&graph->edge[i]))
2819
135
      continue;
2820
1.25k
    if (graph->edge[i].src == graph->edge[i].dst)
2821
680
      continue;
2822
572
    if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
2823
497
      continue;
2824
75
    if (graph->edge[i].start > con)
2825
44
      continue;
2826
31
    if (graph->edge[i].end <= con)
2827
22
      continue;
2828
9
    graph->src_scc = graph->edge[i].src->scc;
2829
9
    graph->dst_scc = graph->edge[i].dst->scc;
2830
9
  }
2831
602
2832
602
  return 0;
2833
602
}
2834
2835
/* Check whether the next schedule row of the given node needs to be
2836
 * non-trivial.  Lower-dimensional domains may have some trivial rows,
2837
 * but as soon as the number of remaining required non-trivial rows
2838
 * is as large as the number or remaining rows to be computed,
2839
 * all remaining rows need to be non-trivial.
2840
 */
2841
static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
2842
620
{
2843
620
  return node->nvar - node->rank >= graph->maxvar - graph->n_row;
2844
620
}
2845
2846
/* Construct a non-triviality region with triviality directions
2847
 * corresponding to the rows of "indep".
2848
 * The rows of "indep" are expressed in terms of the schedule coefficients c_i,
2849
 * while the triviality directions are expressed in terms of
2850
 * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
2851
 * before c^+_i.  Furthermore,
2852
 * the pairs of non-negative variables representing the coefficients
2853
 * are stored in the opposite order.
2854
 */
2855
static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep)
2856
478
{
2857
478
  isl_ctx *ctx;
2858
478
  isl_mat *mat;
2859
478
  int i, j, n, n_var;
2860
478
2861
478
  if (!indep)
2862
0
    return NULL;
2863
478
2864
478
  ctx = isl_mat_get_ctx(indep);
2865
478
  n = isl_mat_rows(indep);
2866
478
  n_var = isl_mat_cols(indep);
2867
478
  mat = isl_mat_alloc(ctx, n, 2 * n_var);
2868
478
  if (!mat)
2869
0
    return NULL;
2870
1.16k
  
for (i = 0; 478
i < n;
++i690
) {
2871
2.19k
    for (j = 0; j < n_var; 
++j1.50k
) {
2872
1.50k
      int nj = n_var - 1 - j;
2873
1.50k
      isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]);
2874
1.50k
      isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]);
2875
1.50k
    }
2876
690
  }
2877
478
2878
478
  return mat;
2879
478
}
2880
2881
/* Solve the ILP problem constructed in setup_lp.
2882
 * For each node such that all the remaining rows of its schedule
2883
 * need to be non-trivial, we construct a non-triviality region.
2884
 * This region imposes that the next row is independent of previous rows.
2885
 * In particular, the non-triviality region enforces that at least
2886
 * one of the linear combinations in the rows of node->indep is non-zero.
2887
 */
2888
static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
2889
408
{
2890
408
  int i;
2891
408
  isl_vec *sol;
2892
408
  isl_basic_set *lp;
2893
408
2894
992
  for (i = 0; i < graph->n; 
++i584
) {
2895
584
    struct isl_sched_node *node = &graph->node[i];
2896
584
    isl_mat *trivial;
2897
584
2898
584
    graph->region[i].pos = node_var_coef_offset(node);
2899
584
    if (needs_row(graph, node))
2900
478
      trivial = construct_trivial(node->indep);
2901
106
    else
2902
106
      trivial = isl_mat_zero(ctx, 0, 0);
2903
584
    graph->region[i].trivial = trivial;
2904
584
  }
2905
408
  lp = isl_basic_set_copy(graph->lp);
2906
408
  sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
2907
408
               graph->region, &check_conflict, graph);
2908
992
  for (i = 0; i < graph->n; 
++i584
)
2909
584
    isl_mat_free(graph->region[i].trivial);
2910
408
  return sol;
2911
408
}
2912
2913
/* Extract the coefficients for the variables of "node" from "sol".
2914
 *
2915
 * Each schedule coefficient c_i_x is represented as the difference
2916
 * between two non-negative variables c_i_x^+ - c_i_x^-.
2917
 * The c_i_x^- appear before their c_i_x^+ counterpart.
2918
 * Furthermore, the order of these pairs is the opposite of that
2919
 * of the corresponding coefficients.
2920
 *
2921
 * Return c_i_x = c_i_x^+ - c_i_x^-
2922
 */
2923
static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
2924
  __isl_keep isl_vec *sol)
2925
508
{
2926
508
  int i;
2927
508
  int pos;
2928
508
  isl_vec *csol;
2929
508
2930
508
  if (!sol)
2931
0
    return NULL;
2932
508
  csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
2933
508
  if (!csol)
2934
0
    return NULL;
2935
508
2936
508
  pos = 1 + node_var_coef_offset(node);
2937
1.46k
  for (i = 0; i < node->nvar; 
++i957
)
2938
957
    isl_int_sub(csol->el[node->nvar - 1 - i],
2939
508
          sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
2940
508
2941
508
  return csol;
2942
508
}
2943
2944
/* Update the schedules of all nodes based on the given solution
2945
 * of the LP problem.
2946
 * The new row is added to the current band.
2947
 * All possibly negative coefficients are encoded as a difference
2948
 * of two non-negative variables, so we need to perform the subtraction
2949
 * here.
2950
 *
2951
 * If coincident is set, then the caller guarantees that the new
2952
 * row satisfies the coincidence constraints.
2953
 */
2954
static int update_schedule(struct isl_sched_graph *graph,
2955
  __isl_take isl_vec *sol, int coincident)
2956
328
{
2957
328
  int i, j;
2958
328
  isl_vec *csol = NULL;
2959
328
2960
328
  if (!sol)
2961
0
    goto error;
2962
328
  if (sol->size == 0)
2963
328
    
isl_die0
(sol->ctx, isl_error_internal,
2964
328
      "no solution found", goto error);
2965
328
  if (graph->n_total_row >= graph->max_row)
2966
328
    
isl_die0
(sol->ctx, isl_error_internal,
2967
328
      "too many schedule rows", goto error);
2968
328
2969
777
  
for (i = 0; 328
i < graph->n;
++i449
) {
2970
449
    struct isl_sched_node *node = &graph->node[i];
2971
449
    int pos;
2972
449
    int row = isl_mat_rows(node->sched);
2973
449
2974
449
    isl_vec_free(csol);
2975
449
    csol = extract_var_coef(node, sol);
2976
449
    if (!csol)
2977
0
      goto error;
2978
449
2979
449
    isl_map_free(node->sched_map);
2980
449
    node->sched_map = NULL;
2981
449
    node->sched = isl_mat_add_rows(node->sched, 1);
2982
449
    if (!node->sched)
2983
0
      goto error;
2984
449
    pos = node_cst_coef_offset(node);
2985
449
    node->sched = isl_mat_set_element(node->sched,
2986
449
          row, 0, sol->el[1 + pos]);
2987
449
    pos = node_par_coef_offset(node);
2988
679
    for (j = 0; j < node->nparam; 
++j230
)
2989
230
      node->sched = isl_mat_set_element(node->sched,
2990
230
          row, 1 + j, sol->el[1 + pos + j]);
2991
1.29k
    for (j = 0; j < node->nvar; 
++j850
)
2992
850
      node->sched = isl_mat_set_element(node->sched,
2993
850
          row, 1 + node->nparam + j, csol->el[j]);
2994
449
    node->coincident[graph->n_total_row] = coincident;
2995
449
  }
2996
328
  isl_vec_free(sol);
2997
328
  isl_vec_free(csol);
2998
328
2999
328
  graph->n_row++;
3000
328
  graph->n_total_row++;
3001
328
3002
328
  return 0;
3003
0
error:
3004
0
  isl_vec_free(sol);
3005
0
  isl_vec_free(csol);
3006
0
  return -1;
3007
328
}
3008
3009
/* Convert row "row" of node->sched into an isl_aff living in "ls"
3010
 * and return this isl_aff.
3011
 */
3012
static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
3013
  struct isl_sched_node *node, int row)
3014
1.04k
{
3015
1.04k
  int j;
3016
1.04k
  isl_int v;
3017
1.04k
  isl_aff *aff;
3018
1.04k
3019
1.04k
  isl_int_init(v);
3020
1.04k
3021
1.04k
  aff = isl_aff_zero_on_domain(ls);
3022
1.04k
  if (isl_mat_get_element(node->sched, row, 0, &v) < 0)
3023
0
    goto error;
3024
1.04k
  aff = isl_aff_set_constant(aff, v);
3025
1.58k
  for (j = 0; j < node->nparam; 
++j546
) {
3026
546
    if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0)
3027
0
      goto error;
3028
546
    aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
3029
546
  }
3030
2.94k
  
for (j = 0; 1.04k
j < node->nvar;
++j1.89k
) {
3031
1.89k
    if (isl_mat_get_element(node->sched, row,
3032
1.89k
          1 + node->nparam + j, &v) < 0)
3033
0
      goto error;
3034
1.89k
    aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
3035
1.89k
  }
3036
1.04k
3037
1.04k
  isl_int_clear(v);
3038
1.04k
3039
1.04k
  return aff;
3040
0
error:
3041
0
  isl_int_clear(v);
3042
0
  isl_aff_free(aff);
3043
0
  return NULL;
3044
1.04k
}
3045
3046
/* Convert the "n" rows starting at "first" of node->sched into a multi_aff
3047
 * and return this multi_aff.
3048
 *
3049
 * The result is defined over the uncompressed node domain.
3050
 */
3051
static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
3052
  struct isl_sched_node *node, int first, int n)
3053
760
{
3054
760
  int i;
3055
760
  isl_space *space;
3056
760
  isl_local_space *ls;
3057
760
  isl_aff *aff;
3058
760
  isl_multi_aff *ma;
3059
760
  int nrow;
3060
760
3061
760
  if (!node)
3062
0
    return NULL;
3063
760
  nrow = isl_mat_rows(node->sched);
3064
760
  if (node->compressed)
3065
66
    space = isl_multi_aff_get_domain_space(node->decompress);
3066
694
  else
3067
694
    space = isl_space_copy(node->space);
3068
760
  ls = isl_local_space_from_space(isl_space_copy(space));
3069
760
  space = isl_space_from_domain(space);
3070
760
  space = isl_space_add_dims(space, isl_dim_out, n);
3071
760
  ma = isl_multi_aff_zero(space);
3072
760
3073
1.80k
  for (i = first; i < first + n; 
++i1.04k
) {
3074
1.04k
    aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
3075
1.04k
    ma = isl_multi_aff_set_aff(ma, i - first, aff);
3076
1.04k
  }
3077
760
3078
760
  isl_local_space_free(ls);
3079
760
3080
760
  if (node->compressed)
3081
66
    ma = isl_multi_aff_pullback_multi_aff(ma,
3082
66
          isl_multi_aff_copy(node->compress));
3083
760
3084
760
  return ma;
3085
760
}
3086
3087
/* Convert node->sched into a multi_aff and return this multi_aff.
3088
 *
3089
 * The result is defined over the uncompressed node domain.
3090
 */
3091
static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
3092
  struct isl_sched_node *node)
3093
219
{
3094
219
  int nrow;
3095
219
3096
219
  nrow = isl_mat_rows(node->sched);
3097
219
  return node_extract_partial_schedule_multi_aff(node, 0, nrow);
3098
219
}
3099
3100
/* Convert node->sched into a map and return this map.
3101
 *
3102
 * The result is cached in node->sched_map, which needs to be released
3103
 * whenever node->sched is updated.
3104
 * It is defined over the uncompressed node domain.
3105
 */
3106
static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
3107
634
{
3108
634
  if (!node->sched_map) {
3109
219
    isl_multi_aff *ma;
3110
219
3111
219
    ma = node_extract_schedule_multi_aff(node);
3112
219
    node->sched_map = isl_map_from_multi_aff(ma);
3113
219
  }
3114
634
3115
634
  return isl_map_copy(node->sched_map);
3116
634
}
3117
3118
/* Construct a map that can be used to update a dependence relation
3119
 * based on the current schedule.
3120
 * That is, construct a map expressing that source and sink
3121
 * are executed within the same iteration of the current schedule.
3122
 * This map can then be intersected with the dependence relation.
3123
 * This is not the most efficient way, but this shouldn't be a critical
3124
 * operation.
3125
 */
3126
static __isl_give isl_map *specializer(struct isl_sched_node *src,
3127
  struct isl_sched_node *dst)
3128
283
{
3129
283
  isl_map *src_sched, *dst_sched;
3130
283
3131
283
  src_sched = node_extract_schedule(src);
3132
283
  dst_sched = node_extract_schedule(dst);
3133
283
  return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
3134
283
}
3135
3136
/* Intersect the domains of the nested relations in domain and range
3137
 * of "umap" with "map".
3138
 */
3139
static __isl_give isl_union_map *intersect_domains(
3140
  __isl_take isl_union_map *umap, __isl_keep isl_map *map)
3141
81
{
3142
81
  isl_union_set *uset;
3143
81
3144
81
  umap = isl_union_map_zip(umap);
3145
81
  uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
3146
81
  umap = isl_union_map_intersect_domain(umap, uset);
3147
81
  umap = isl_union_map_zip(umap);
3148
81
  return umap;
3149
81
}
3150
3151
/* Update the dependence relation of the given edge based
3152
 * on the current schedule.
3153
 * If the dependence is carried completely by the current schedule, then
3154
 * it is removed from the edge_tables.  It is kept in the list of edges
3155
 * as otherwise all edge_tables would have to be recomputed.
3156
 *
3157
 * If the edge is of a type that can appear multiple times
3158
 * between the same pair of nodes, then it is added to
3159
 * the edge table (again).  This prevents the situation
3160
 * where none of these edges is referenced from the edge table
3161
 * because the one that was referenced turned out to be empty and
3162
 * was therefore removed from the table.
3163
 */
3164
static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph,
3165
  struct isl_sched_edge *edge)
3166
283
{
3167
283
  int empty;
3168
283
  isl_map *id;
3169
283
3170
283
  id = specializer(edge->src, edge->dst);
3171
283
  edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
3172
283
  if (!edge->map)
3173
0
    goto error;
3174
283
3175
283
  if (edge->tagged_condition) {
3176
38
    edge->tagged_condition =
3177
38
      intersect_domains(edge->tagged_condition, id);
3178
38
    if (!edge->tagged_condition)
3179
0
      goto error;
3180
283
  }
3181
283
  if (edge->tagged_validity) {
3182
43
    edge->tagged_validity =
3183
43
      intersect_domains(edge->tagged_validity, id);
3184
43
    if (!edge->tagged_validity)
3185
0
      goto error;
3186
283
  }
3187
283
3188
283
  empty = isl_map_plain_is_empty(edge->map);
3189
283
  if (empty < 0)
3190
0
    goto error;
3191
283
  if (empty) {
3192
171
    graph_remove_edge(graph, edge);
3193
171
  } else 
if (112
is_multi_edge_type(edge)112
) {
3194
33
    if (graph_edge_tables_add(ctx, graph, edge) < 0)
3195
0
      goto error;
3196
283
  }
3197
283
3198
283
  isl_map_free(id);
3199
283
  return isl_stat_ok;
3200
0
error:
3201
0
  isl_map_free(id);
3202
0
  return isl_stat_error;
3203
283
}
3204
3205
/* Does the domain of "umap" intersect "uset"?
3206
 */
3207
static int domain_intersects(__isl_keep isl_union_map *umap,
3208
  __isl_keep isl_union_set *uset)
3209
39
{
3210
39
  int empty;
3211
39
3212
39
  umap = isl_union_map_copy(umap);
3213
39
  umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
3214
39
  empty = isl_union_map_is_empty(umap);
3215
39
  isl_union_map_free(umap);
3216
39
3217
39
  return empty < 0 ? 
-10
: !empty;
3218
39
}
3219
3220
/* Does the range of "umap" intersect "uset"?
3221
 */
3222
static int range_intersects(__isl_keep isl_union_map *umap,
3223
  __isl_keep isl_union_set *uset)
3224
26
{
3225
26
  int empty;
3226
26
3227
26
  umap = isl_union_map_copy(umap);
3228
26
  umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
3229
26
  empty = isl_union_map_is_empty(umap);
3230
26
  isl_union_map_free(umap);
3231
26
3232
26
  return empty < 0 ? 
-10
: !empty;
3233
26
}
3234
3235
/* Are the condition dependences of "edge" local with respect to
3236
 * the current schedule?
3237
 *
3238
 * That is, are domain and range of the condition dependences mapped
3239
 * to the same point?
3240
 *
3241
 * In other words, is the condition false?
3242
 */
3243
static int is_condition_false(struct isl_sched_edge *edge)
3244
36
{
3245
36
  isl_union_map *umap;
3246
36
  isl_map *map, *sched, *test;
3247
36
  int empty, local;
3248
36
3249
36
  empty = isl_union_map_is_empty(edge->tagged_condition);
3250
36
  if (empty < 0 || empty)
3251
2
    return empty;
3252
34
3253
34
  umap = isl_union_map_copy(edge->tagged_condition);
3254
34
  umap = isl_union_map_zip(umap);
3255
34
  umap = isl_union_set_unwrap(isl_union_map_domain(umap));
3256
34
  map = isl_map_from_union_map(umap);
3257
34
3258
34
  sched = node_extract_schedule(edge->src);
3259
34
  map = isl_map_apply_domain(map, sched);
3260
34
  sched = node_extract_schedule(edge->dst);
3261
34
  map = isl_map_apply_range(map, sched);
3262
34
3263
34
  test = isl_map_identity(isl_map_get_space(map));
3264
34
  local = isl_map_is_subset(map, test);
3265
34
  isl_map_free(map);
3266
34
  isl_map_free(test);
3267
34
3268
34
  return local;
3269
34
}
3270
3271
/* For each conditional validity constraint that is adjacent
3272
 * to a condition with domain in condition_source or range in condition_sink,
3273
 * turn it into an unconditional validity constraint.
3274
 */
3275
static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
3276
  __isl_take isl_union_set *condition_source,
3277
  __isl_take isl_union_set *condition_sink)
3278
10
{
3279
10
  int i;
3280
10
3281
10
  condition_source = isl_union_set_coalesce(condition_source);
3282
10
  condition_sink = isl_union_set_coalesce(condition_sink);
3283
10
3284
51
  for (i = 0; i < graph->n_edge; 
++i41
) {
3285
41
    int adjacent;
3286
41
    isl_union_map *validity;
3287
41
3288
41
    if (!is_conditional_validity(&graph->edge[i]))
3289
18
      continue;
3290
23
    if (is_validity(&graph->edge[i]))
3291
4
      continue;
3292
19
3293
19
    validity = graph->edge[i].tagged_validity;
3294
19
    adjacent = domain_intersects(validity, condition_sink);
3295
19
    if (adjacent >= 0 && !adjacent)
3296
18
      adjacent = range_intersects(validity, condition_source);
3297
19
    if (adjacent < 0)
3298
0
      goto error;
3299
19
    if (!adjacent)
3300
17
      continue;
3301
2
3302
2
    set_validity(&graph->edge[i]);
3303
2
  }
3304
10
3305
10
  isl_union_set_free(condition_source);
3306
10
  isl_union_set_free(condition_sink);
3307
10
  return 0;
3308
0
error:
3309
0
  isl_union_set_free(condition_source);
3310
0
  isl_union_set_free(condition_sink);
3311
0
  return -1;
3312
10
}
3313
3314
/* Update the dependence relations of all edges based on the current schedule
3315
 * and enforce conditional validity constraints that are adjacent
3316
 * to satisfied condition constraints.
3317
 *
3318
 * First check if any of the condition constraints are satisfied
3319
 * (i.e., not local to the outer schedule) and keep track of
3320
 * their domain and range.
3321
 * Then update all dependence relations (which removes the non-local
3322
 * constraints).
3323
 * Finally, if any condition constraints turned out to be satisfied,
3324
 * then turn all adjacent conditional validity constraints into
3325
 * unconditional validity constraints.
3326
 */
3327
static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
3328
190
{
3329
190
  int i;
3330
190
  int any = 0;
3331
190
  isl_union_set *source, *sink;
3332
190
3333
190
  source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3334
190
  sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3335
473
  for (i = 0; i < graph->n_edge; 
++i283
) {
3336
283
    int local;
3337
283
    isl_union_set *uset;
3338
283
    isl_union_map *umap;
3339
283
3340
283
    if (!is_condition(&graph->edge[i]))
3341
245
      continue;
3342
38
    if (is_local(&graph->edge[i]))
3343
17
      continue;
3344
21
    local = is_condition_false(&graph->edge[i]);
3345
21
    if (local < 0)
3346
0
      goto error;
3347
21
    if (local)
3348
10
      continue;
3349
11
3350
11
    any = 1;
3351
11
3352
11
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3353
11
    uset = isl_union_map_domain(umap);
3354
11
    source = isl_union_set_union(source, uset);
3355
11
3356
11
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3357
11
    uset = isl_union_map_range(umap);
3358
11
    sink = isl_union_set_union(sink, uset);
3359
11
  }
3360
190
3361
473
  
for (i = 0; 190
i < graph->n_edge;
++i283
) {
3362
283
    if (update_edge(ctx, graph, &graph->edge[i]) < 0)
3363
0
      goto error;
3364
283
  }
3365
190
3366
190
  if (any)
3367
10
    return unconditionalize_adjacent_validity(graph, source, sink);
3368
180
3369
180
  isl_union_set_free(source);
3370
180
  isl_union_set_free(sink);
3371
180
  return 0;
3372
0
error:
3373
0
  isl_union_set_free(source);
3374
0
  isl_union_set_free(sink);
3375
0
  return -1;
3376
180
}
3377
3378
static void next_band(struct isl_sched_graph *graph)
3379
192
{
3380
192
  graph->band_start = graph->n_total_row;
3381
192
}
3382
3383
/* Return the union of the universe domains of the nodes in "graph"
3384
 * that satisfy "pred".
3385
 */
3386
static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
3387
  struct isl_sched_graph *graph,
3388
  int (*pred)(struct isl_sched_node *node, int data), int data)
3389
150
{
3390
150
  int i;
3391
150
  isl_set *set;
3392
150
  isl_union_set *dom;
3393
150
3394
252
  for (i = 0; i < graph->n; 
++i102
)
3395
252
    if (pred(&graph->node[i], data))
3396
150
      break;
3397
150
3398
150
  if (i >= graph->n)
3399
150
    
isl_die0
(ctx, isl_error_internal,
3400
150
      "empty component", return NULL);
3401
150
3402
150
  set = isl_set_universe(isl_space_copy(graph->node[i].space));
3403
150
  dom = isl_union_set_from_set(set);
3404
150
3405
275
  for (i = i + 1; i < graph->n; 
++i125
) {
3406
125
    if (!pred(&graph->node[i], data))
3407
112
      continue;
3408
13
    set = isl_set_universe(isl_space_copy(graph->node[i].space));
3409
13
    dom = isl_union_set_union(dom, isl_union_set_from_set(set));
3410
13
  }
3411
150
3412
150
  return dom;
3413
150
}
3414
3415
/* Return a list of unions of universe domains, where each element
3416
 * in the list corresponds to an SCC (or WCC) indexed by node->scc.
3417
 */
3418
static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
3419
  struct isl_sched_graph *graph)
3420
67
{
3421
67
  int i;
3422
67
  isl_union_set_list *filters;
3423
67
3424
67
  filters = isl_union_set_list_alloc(ctx, graph->scc);
3425
213
  for (i = 0; i < graph->scc; 
++i146
) {
3426
146
    isl_union_set *dom;
3427
146
3428
146
    dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
3429
146
    filters = isl_union_set_list_add(filters, dom);
3430
146
  }
3431
67
3432
67
  return filters;
3433
67
}
3434
3435
/* Return a list of two unions of universe domains, one for the SCCs up
3436
 * to and including graph->src_scc and another for the other SCCs.
3437
 */
3438
static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
3439
  struct isl_sched_graph *graph)
3440
2
{
3441
2
  isl_union_set *dom;
3442
2
  isl_union_set_list *filters;
3443
2
3444
2
  filters = isl_union_set_list_alloc(ctx, 2);
3445
2
  dom = isl_sched_graph_domain(ctx, graph,
3446
2
          &node_scc_at_most, graph->src_scc);
3447
2
  filters = isl_union_set_list_add(filters, dom);
3448
2
  dom = isl_sched_graph_domain(ctx, graph,
3449
2
          &node_scc_at_least, graph->src_scc + 1);
3450
2
  filters = isl_union_set_list_add(filters, dom);
3451
2
3452
2
  return filters;
3453
2
}
3454
3455
/* Copy nodes that satisfy node_pred from the src dependence graph
3456
 * to the dst dependence graph.
3457
 */
3458
static isl_stat copy_nodes(struct isl_sched_graph *dst,
3459
  struct isl_sched_graph *src,
3460
  int (*node_pred)(struct isl_sched_node *node, int data), int data)
3461
194
{
3462
194
  int i;
3463
194
3464
194
  dst->n = 0;
3465
694
  for (i = 0; i < src->n; 
++i500
) {
3466
500
    int j;
3467
500
3468
500
    if (!node_pred(&src->node[i], data))
3469
276
      continue;
3470
224
3471
224
    j = dst->n;
3472
224
    dst->node[j].space = isl_space_copy(src->node[i].space);
3473
224
    dst->node[j].compressed = src->node[i].compressed;
3474
224
    dst->node[j].hull = isl_set_copy(src->node[i].hull);
3475
224
    dst->node[j].compress =
3476
224
      isl_multi_aff_copy(src->node[i].compress);
3477
224
    dst->node[j].decompress =
3478
224
      isl_multi_aff_copy(src->node[i].decompress);
3479
224
    dst->node[j].nvar = src->node[i].nvar;
3480
224
    dst->node[j].nparam = src->node[i].nparam;
3481
224
    dst->node[j].sched = isl_mat_copy(src->node[i].sched);
3482
224
    dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
3483
224
    dst->node[j].coincident = src->node[i].coincident;
3484
224
    dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
3485
224
    dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds);
3486
224
    dst->node[j].max = isl_vec_copy(src->node[i].max);
3487
224
    dst->n++;
3488
224
3489
224
    if (!dst->node[j].space || !dst->node[j].sched)
3490
0
      return isl_stat_error;
3491
224
    if (dst->node[j].compressed &&
3492
224
        
(14
!dst->node[j].hull14
||
!dst->node[j].compress14
||
3493
14
         !dst->node[j].decompress))
3494
0
      return isl_stat_error;
3495
224
  }
3496
194
3497
194
  return isl_stat_ok;
3498
194
}
3499
3500
/* Copy non-empty edges that satisfy edge_pred from the src dependence graph
3501
 * to the dst dependence graph.
3502
 * If the source or destination node of the edge is not in the destination
3503
 * graph, then it must be a backward proximity edge and it should simply
3504
 * be ignored.
3505
 */
3506
static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
3507
  struct isl_sched_graph *src,
3508
  int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
3509
194
{
3510
194
  int i;
3511
194
3512
194
  dst->n_edge = 0;
3513
724
  for (i = 0; i < src->n_edge; 
++i530
) {
3514
530
    struct isl_sched_edge *edge = &src->edge[i];
3515
530
    isl_map *map;
3516
530
    isl_union_map *tagged_condition;
3517
530
    isl_union_map *tagged_validity;
3518
530
    struct isl_sched_node *dst_src, *dst_dst;
3519
530
3520
530
    if (!edge_pred(edge, data))
3521
418
      continue;
3522
112
3523
112
    if (isl_map_plain_is_empty(edge->map))
3524
31
      continue;
3525
81
3526
81
    dst_src = graph_find_node(ctx, dst, edge->src->space);
3527
81
    dst_dst = graph_find_node(ctx, dst, edge->dst->space);
3528
81
    if (!dst_src || !dst_dst)
3529
0
      return isl_stat_error;
3530
81
    if (!is_node(dst, dst_src) || !is_node(dst, dst_dst)) {
3531
0
      if (is_validity(edge) || is_conditional_validity(edge))
3532
0
        isl_die(ctx, isl_error_internal,
3533
0
          "backward (conditional) validity edge",
3534
0
          return isl_stat_error);
3535
0
      continue;
3536
81
    }
3537
81
3538
81
    map = isl_map_copy(edge->map);
3539
81
    tagged_condition = isl_union_map_copy(edge->tagged_condition);
3540
81
    tagged_validity = isl_union_map_copy(edge->tagged_validity);
3541
81
3542
81
    dst->edge[dst->n_edge].src = dst_src;
3543
81
    dst->edge[dst->n_edge].dst = dst_dst;
3544
81
    dst->edge[dst->n_edge].map = map;
3545
81
    dst->edge[dst->n_edge].tagged_condition = tagged_condition;
3546
81
    dst->edge[dst->n_edge].tagged_validity = tagged_validity;
3547
81
    dst->edge[dst->n_edge].types = edge->types;
3548
81
    dst->n_edge++;
3549
81
3550
81
    if (edge->tagged_condition && 
!tagged_condition11
)
3551
0
      return isl_stat_error;
3552
81
    if (edge->tagged_validity && 
!tagged_validity6
)
3553
0
      return isl_stat_error;
3554
81
3555
81
    if (graph_edge_tables_add(ctx, dst,
3556
81
              &dst->edge[dst->n_edge - 1]) < 0)
3557
0
      return isl_stat_error;
3558
81
  }
3559
194
3560
194
  return isl_stat_ok;
3561
194
}
3562
3563
/* Compute the maximal number of variables over all nodes.
3564
 * This is the maximal number of linearly independent schedule
3565
 * rows that we need to compute.
3566
 * Just in case we end up in a part of the dependence graph
3567
 * with only lower-dimensional domains, we make sure we will
3568
 * compute the required amount of extra linearly independent rows.
3569
 */
3570
static int compute_maxvar(struct isl_sched_graph *graph)
3571
446
{
3572
446
  int i;
3573
446
3574
446
  graph->maxvar = 0;
3575
1.07k
  for (i = 0; i < graph->n; 
++i628
) {
3576
628
    struct isl_sched_node *node = &graph->node[i];
3577
628
    int nvar;
3578
628
3579
628
    if (node_update_vmap(node) < 0)
3580
0
      return -1;
3581
628
    nvar = node->nvar + graph->n_row - node->rank;
3582
628
    if (nvar > graph->maxvar)
3583
439
      graph->maxvar = nvar;
3584
628
  }
3585
446
3586
446
  return 0;
3587
446
}
3588
3589
/* Extract the subgraph of "graph" that consists of the nodes satisfying
3590
 * "node_pred" and the edges satisfying "edge_pred" and store
3591
 * the result in "sub".
3592
 */
3593
static isl_stat extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
3594
  int (*node_pred)(struct isl_sched_node *node, int data),
3595
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3596
  int data, struct isl_sched_graph *sub)
3597
194
{
3598
194
  int i, n = 0, n_edge = 0;
3599
194
  int t;
3600
194
3601
694
  for (i = 0; i < graph->n; 
++i500
)
3602
500
    if (node_pred(&graph->node[i], data))
3603
224
      ++n;
3604
724
  for (i = 0; i < graph->n_edge; 
++i530
)
3605
530
    if (edge_pred(&graph->edge[i], data))
3606
112
      ++n_edge;
3607
194
  if (graph_alloc(ctx, sub, n, n_edge) < 0)
3608
0
    return isl_stat_error;
3609
194
  sub->root = graph->root;
3610
194
  if (copy_nodes(sub, graph, node_pred, data) < 0)
3611
0
    return isl_stat_error;
3612
194
  if (graph_init_table(ctx, sub) < 0)
3613
0
    return isl_stat_error;
3614
1.16k
  
for (t = 0; 194
t <= isl_edge_last;
++t970
)
3615
970
    sub->max_edge[t] = graph->max_edge[t];
3616
194
  if (graph_init_edge_tables(ctx, sub) < 0)
3617
0
    return isl_stat_error;
3618
194
  if (copy_edges(ctx, sub, graph, edge_pred, data) < 0)
3619
0
    return isl_stat_error;
3620
194
  sub->n_row = graph->n_row;
3621
194
  sub->max_row = graph->max_row;
3622
194
  sub->n_total_row = graph->n_total_row;
3623
194
  sub->band_start = graph->band_start;
3624
194
3625
194
  return isl_stat_ok;
3626
194
}
3627
3628
static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
3629
  struct isl_sched_graph *graph);
3630
static __isl_give isl_schedule_node *compute_schedule_wcc(
3631
  isl_schedule_node *node, struct isl_sched_graph *graph);
3632
3633
/* Compute a schedule for a subgraph of "graph".  In particular, for
3634
 * the graph composed of nodes that satisfy node_pred and edges that
3635
 * that satisfy edge_pred.
3636
 * If the subgraph is known to consist of a single component, then wcc should
3637
 * be set and then we call compute_schedule_wcc on the constructed subgraph.
3638
 * Otherwise, we call compute_schedule, which will check whether the subgraph
3639
 * is connected.
3640
 *
3641
 * The schedule is inserted at "node" and the updated schedule node
3642
 * is returned.
3643
 */
3644
static __isl_give isl_schedule_node *compute_sub_schedule(
3645
  __isl_take isl_schedule_node *node, isl_ctx *ctx,
3646
  struct isl_sched_graph *graph,
3647
  int (*node_pred)(struct isl_sched_node *node, int data),
3648
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3649
  int data, int wcc)
3650
76
{
3651
76
  struct isl_sched_graph split = { 0 };
3652
76
3653
76
  if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
3654
76
        &split) < 0)
3655
0
    goto error;
3656
76
3657
76
  if (wcc)
3658
72
    node = compute_schedule_wcc(node, &split);
3659
4
  else
3660
4
    node = compute_schedule(node, &split);
3661
76
3662
76
  graph_free(ctx, &split);
3663
76
  return node;
3664
0
error:
3665
0
  graph_free(ctx, &split);
3666
0
  return isl_schedule_node_free(node);
3667
76
}
3668
3669
static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
3670
766
{
3671
766
  return edge->src->scc == scc && 
edge->dst->scc == scc306
;
3672
766
}
3673
3674
static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
3675
14
{
3676
14
  return edge->dst->scc <= scc;
3677
14
}
3678
3679
static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
3680
14
{
3681
14
  return edge->src->scc >= scc;
3682
14
}
3683
3684
/* Reset the current band by dropping all its schedule rows.
3685
 */
3686
static isl_stat reset_band(struct isl_sched_graph *graph)
3687
8
{
3688
8
  int i;
3689
8
  int drop;
3690
8
3691
8
  drop = graph->n_total_row - graph->band_start;
3692
8
  graph->n_total_row -= drop;
3693
8
  graph->n_row -= drop;
3694
8
3695
28
  for (i = 0; i < graph->n; 
++i20
) {
3696
20
    struct isl_sched_node *node = &graph->node[i];
3697
20
3698
20
    isl_map_free(node->sched_map);
3699
20
    node->sched_map = NULL;
3700
20
3701
20
    node->sched = isl_mat_drop_rows(node->sched,
3702
20
            graph->band_start, drop);
3703
20
3704
20
    if (!node->sched)
3705
0
      return isl_stat_error;
3706
20
  }
3707
8
3708
8
  return isl_stat_ok;
3709
8
}
3710
3711
/* Split the current graph into two parts and compute a schedule for each
3712
 * part individually.  In particular, one part consists of all SCCs up
3713
 * to and including graph->src_scc, while the other part contains the other
3714
 * SCCs.  The split is enforced by a sequence node inserted at position "node"
3715
 * in the schedule tree.  Return the updated schedule node.
3716
 * If either of these two parts consists of a sequence, then it is spliced
3717
 * into the sequence containing the two parts.
3718
 *
3719
 * The current band is reset. It would be possible to reuse
3720
 * the previously computed rows as the first rows in the next
3721
 * band, but recomputing them may result in better rows as we are looking
3722
 * at a smaller part of the dependence graph.
3723
 */
3724
static __isl_give isl_schedule_node *compute_split_schedule(
3725
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3726
2
{
3727
2
  int is_seq;
3728
2
  isl_ctx *ctx;
3729
2
  isl_union_set_list *filters;
3730
2
3731
2
  if (!node)
3732
0
    return NULL;
3733
2
3734
2
  if (reset_band(graph) < 0)
3735
0
    return isl_schedule_node_free(node);
3736
2
3737
2
  next_band(graph);
3738
2
3739
2
  ctx = isl_schedule_node_get_ctx(node);
3740
2
  filters = extract_split(ctx, graph);
3741
2
  node = isl_schedule_node_insert_sequence(node, filters);
3742
2
  node = isl_schedule_node_child(node, 1);
3743
2
  node = isl_schedule_node_child(node, 0);
3744
2
3745
2
  node = compute_sub_schedule(node, ctx, graph,
3746
2
        &node_scc_at_least, &edge_src_scc_at_least,
3747
2
        graph->src_scc + 1, 0);
3748
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3749
2
  node = isl_schedule_node_parent(node);
3750
2
  node = isl_schedule_node_parent(node);
3751
2
  if (is_seq)
3752
0
    node = isl_schedule_node_sequence_splice_child(node, 1);
3753
2
  node = isl_schedule_node_child(node, 0);
3754
2
  node = isl_schedule_node_child(node, 0);
3755
2
  node = compute_sub_schedule(node, ctx, graph,
3756
2
        &node_scc_at_most, &edge_dst_scc_at_most,
3757
2
        graph->src_scc, 0);
3758
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3759
2
  node = isl_schedule_node_parent(node);
3760
2
  node = isl_schedule_node_parent(node);
3761
2
  if (is_seq)
3762
0
    node = isl_schedule_node_sequence_splice_child(node, 0);
3763
2
3764
2
  return node;
3765
2
}
3766
3767
/* Insert a band node at position "node" in the schedule tree corresponding
3768
 * to the current band in "graph".  Mark the band node permutable
3769
 * if "permutable" is set.
3770
 * The partial schedules and the coincidence property are extracted
3771
 * from the graph nodes.
3772
 * Return the updated schedule node.
3773
 */
3774
static __isl_give isl_schedule_node *insert_current_band(
3775
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3776
  int permutable)
3777
172
{
3778
172
  int i;
3779
172
  int start, end, n;
3780
172
  isl_multi_aff *ma;
3781
172
  isl_multi_pw_aff *mpa;
3782
172
  isl_multi_union_pw_aff *mupa;
3783
172
3784
172
  if (!node)
3785
0
    return NULL;
3786
172
3787
172
  if (graph->n < 1)
3788
172
    
isl_die0
(isl_schedule_node_get_ctx(node), isl_error_internal,
3789
172
      "graph should have at least one node",
3790
172
      return isl_schedule_node_free(node));
3791
172
3792
172
  start = graph->band_start;
3793
172
  end = graph->n_total_row;
3794
172
  n = end - start;
3795
172
3796
172
  ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
3797
172
  mpa = isl_multi_pw_aff_from_multi_aff(ma);
3798
172
  mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3799
172
3800
250
  for (i = 1; i < graph->n; 
++i78
) {
3801
78
    isl_multi_union_pw_aff *mupa_i;
3802
78
3803
78
    ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
3804
78
                start, n);
3805
78
    mpa = isl_multi_pw_aff_from_multi_aff(ma);
3806
78
    mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3807
78
    mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
3808
78
  }
3809
172
  node = isl_schedule_node_insert_partial_schedule(node, mupa);
3810
172
3811
451
  for (i = 0; i < n; 
++i279
)
3812
279
    node = isl_schedule_node_band_member_set_coincident(node, i,
3813
279
          graph->node[0].coincident[start + i]);
3814
172
  node = isl_schedule_node_band_set_permutable(node, permutable);
3815
172
3816
172
  return node;
3817
172
}
3818
3819
/* Update the dependence relations based on the current schedule,
3820
 * add the current band to "node" and then continue with the computation
3821
 * of the next band.
3822
 * Return the updated schedule node.
3823
 */
3824
static __isl_give isl_schedule_node *compute_next_band(
3825
  __isl_take isl_schedule_node *node,
3826
  struct isl_sched_graph *graph, int permutable)
3827
172
{
3828
172
  isl_ctx *ctx;
3829
172
3830
172
  if (!node)
3831
0
    return NULL;
3832
172
3833
172
  ctx = isl_schedule_node_get_ctx(node);
3834
172
  if (update_edges(ctx, graph) < 0)
3835
0
    return isl_schedule_node_free(node);
3836
172
  node = insert_current_band(node, graph, permutable);
3837
172
  next_band(graph);
3838
172
3839
172
  node = isl_schedule_node_child(node, 0);
3840
172
  node = compute_schedule(node, graph);
3841
172
  node = isl_schedule_node_parent(node);
3842
172
3843
172
  return node;
3844
172
}
3845
3846
/* Add the constraints "coef" derived from an edge from "node" to itself
3847
 * to graph->lp in order to respect the dependences and to try and carry them.
3848
 * "pos" is the sequence number of the edge that needs to be carried.
3849
 * "coef" represents general constraints on coefficients (c_0, c_x)
3850
 * of valid constraints for (y - x) with x and y instances of the node.
3851
 *
3852
 * The constraints added to graph->lp need to enforce
3853
 *
3854
 *  (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
3855
 *  = c_j_x (y - x) >= e_i
3856
 *
3857
 * for each (x,y) in the dependence relation of the edge.
3858
 * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
3859
 * taking into account that each coefficient in c_j_x is represented
3860
 * as a pair of non-negative coefficients.
3861
 */
3862
static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
3863
  struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
3864
46
{
3865
46
  int offset;
3866
46
  isl_ctx *ctx;
3867
46
  isl_dim_map *dim_map;
3868
46
3869
46
  if (!coef)
3870
0
    return isl_stat_error;
3871
46
3872
46
  ctx = isl_basic_set_get_ctx(coef);
3873
46
  offset = coef_var_offset(coef);
3874
46
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
3875
46
  isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3876
46
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3877
46
3878
46
  return isl_stat_ok;
3879
46
}
3880
3881
/* Add the constraints "coef" derived from an edge from "src" to "dst"
3882
 * to graph->lp in order to respect the dependences and to try and carry them.
3883
 * "pos" is the sequence number of the edge that needs to be carried or
3884
 * -1 if no attempt should be made to carry the dependences.
3885
 * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
3886
 * of valid constraints for (x, y) with x and y instances of "src" and "dst".
3887
 *
3888
 * The constraints added to graph->lp need to enforce
3889
 *
3890
 *  (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
3891
 *
3892
 * for each (x,y) in the dependence relation of the edge or
3893
 *
3894
 *  (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
3895
 *
3896
 * if pos is -1.
3897
 * That is,
3898
 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3899
 * or
3900
 * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3901
 * needs to be plugged in for (c_0, c_n, c_x, c_y),
3902
 * taking into account that each coefficient in c_j_x and c_k_x is represented
3903
 * as a pair of non-negative coefficients.
3904
 */
3905
static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
3906
  struct isl_sched_node *src, struct isl_sched_node *dst,
3907
  __isl_take isl_basic_set *coef, int pos)
3908
18
{
3909
18
  int offset;
3910
18
  isl_ctx *ctx;
3911
18
  isl_dim_map *dim_map;
3912
18
3913
18
  if (!coef)
3914
0
    return isl_stat_error;
3915
18
3916
18
  ctx = isl_basic_set_get_ctx(coef);
3917
18
  offset = coef_var_offset(coef);
3918
18
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
3919
18
  if (pos >= 0)
3920
18
    isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3921
18
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3922
18
3923
18
  return isl_stat_ok;
3924
18
}
3925
3926
/* Data structure for keeping track of the data needed
3927
 * to exploit non-trivial lineality spaces.
3928
 *
3929
 * "any_non_trivial" is true if there are any non-trivial lineality spaces.
3930
 * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
3931
 * "equivalent" connects instances to other instances on the same line(s).
3932
 * "mask" contains the domain spaces of "equivalent".
3933
 * Any instance set not in "mask" does not have a non-trivial lineality space.
3934
 */
3935
struct isl_exploit_lineality_data {
3936
  isl_bool any_non_trivial;
3937
  isl_union_map *equivalent;
3938
  isl_union_set *mask;
3939
};
3940
3941
/* Data structure collecting information used during the construction
3942
 * of an LP for carrying dependences.
3943
 *
3944
 * "intra" is a sequence of coefficient constraints for intra-node edges.
3945
 * "inter" is a sequence of coefficient constraints for inter-node edges.
3946
 * "lineality" contains data used to exploit non-trivial lineality spaces.
3947
 */
3948
struct isl_carry {
3949
  isl_basic_set_list *intra;
3950
  isl_basic_set_list *inter;
3951
  struct isl_exploit_lineality_data lineality;
3952
};
3953
3954
/* Free all the data stored in "carry".
3955
 */
3956
static void isl_carry_clear(struct isl_carry *carry)
3957
32
{
3958
32
  isl_basic_set_list_free(carry->intra);
3959
32
  isl_basic_set_list_free(carry->inter);
3960
32
  isl_union_map_free(carry->lineality.equivalent);
3961
32
  isl_union_set_free(carry->lineality.mask);
3962
32
}
3963
3964
/* Return a pointer to the node in "graph" that lives in "space".
3965
 * If the requested node has been compressed, then "space"
3966
 * corresponds to the compressed space.
3967
 * The graph is assumed to have such a node.
3968
 * Return NULL in case of error.
3969
 *
3970
 * First try and see if "space" is the space of an uncompressed node.
3971
 * If so, return that node.
3972
 * Otherwise, "space" was constructed by construct_compressed_id and
3973
 * contains a user pointer pointing to the node in the tuple id.
3974
 * However, this node belongs to the original dependence graph.
3975
 * If "graph" is a subgraph of this original dependence graph,
3976
 * then the node with the same space still needs to be looked up
3977
 * in the current graph.
3978
 */
3979
static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
3980
  struct isl_sched_graph *graph, __isl_keep isl_space *space)
3981
106
{
3982
106
  isl_id *id;
3983
106
  struct isl_sched_node *node;
3984
106
3985
106
  if (!space)
3986
0
    return NULL;
3987
106
3988
106
  node = graph_find_node(ctx, graph, space);
3989
106
  if (!node)
3990
0
    return NULL;
3991
106
  if (is_node(graph, node))
3992
106
    return node;
3993
0
3994
0
  id = isl_space_get_tuple_id(space, isl_dim_set);
3995
0
  node = isl_id_get_user(id);
3996
0
  isl_id_free(id);
3997
0
3998
0
  if (!node)
3999
0
    return NULL;
4000
0
4001
0
  if (!is_node(graph->root, node))
4002
0
    isl_die(ctx, isl_error_internal,
4003
0
      "space points to invalid node", return NULL);
4004
0
  if (graph != graph->root)
4005
0
    node = graph_find_node(ctx, graph, node->space);
4006
0
  if (!is_node(graph, node))
4007
0
    isl_die(ctx, isl_error_internal,
4008
0
      "unable to find node", return NULL);
4009
0
4010
0
  return node;
4011
0
}
4012
4013
/* Internal data structure for add_all_constraints.
4014
 *
4015
 * "graph" is the schedule constraint graph for which an LP problem
4016
 * is being constructed.
4017
 * "carry_inter" indicates whether inter-node edges should be carried.
4018
 * "pos" is the position of the next edge that needs to be carried.
4019
 */
4020
struct isl_add_all_constraints_data {
4021
  isl_ctx *ctx;
4022
  struct isl_sched_graph *graph;
4023
  int carry_inter;
4024
  int pos;
4025
};
4026
4027
/* Add the constraints "coef" derived from an edge from a node to itself
4028
 * to data->graph->lp in order to respect the dependences and
4029
 * to try and carry them.
4030
 *
4031
 * The space of "coef" is of the form
4032
 *
4033
 *  coefficients[[c_cst] -> S[c_x]]
4034
 *
4035
 * with S[c_x] the (compressed) space of the node.
4036
 * Extract the node from the space and call add_intra_constraints.
4037
 */
4038
static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
4039
46
{
4040
46
  struct isl_add_all_constraints_data *data = user;
4041
46
  isl_space *space;
4042
46
  struct isl_sched_node *node;
4043
46
4044
46
  space = isl_basic_set_get_space(coef);
4045
46
  space = isl_space_range(isl_space_unwrap(space));
4046
46
  node = graph_find_compressed_node(data->ctx, data->graph, space);
4047
46
  isl_space_free(space);
4048
46
  return add_intra_constraints(data->graph, node, coef, data->pos++);
4049
46
}
4050
4051
/* Add the constraints "coef" derived from an edge from a node j
4052
 * to a node k to data->graph->lp in order to respect the dependences and
4053
 * to try and carry them (provided data->carry_inter is set).
4054
 *
4055
 * The space of "coef" is of the form
4056
 *
4057
 *  coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
4058
 *
4059
 * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
4060
 * Extract the nodes from the space and call add_inter_constraints.
4061
 */
4062
static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
4063
18
{
4064
18
  struct isl_add_all_constraints_data *data = user;
4065
18
  isl_space *space, *dom;
4066
18
  struct isl_sched_node *src, *dst;
4067
18
  int pos;
4068
18
4069
18
  space = isl_basic_set_get_space(coef);
4070
18
  space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
4071
18
  dom = isl_space_domain(isl_space_copy(space));
4072
18
  src = graph_find_compressed_node(data->ctx, data->graph, dom);
4073
18
  isl_space_free(dom);
4074
18
  space = isl_space_range(space);
4075
18
  dst = graph_find_compressed_node(data->ctx, data->graph, space);
4076
18
  isl_space_free(space);
4077
18
4078
18
  pos = data->carry_inter ? data->pos++ : 
-10
;
4079
18
  return add_inter_constraints(data->graph, src, dst, coef, pos);
4080
18
}
4081
4082
/* Add constraints to graph->lp that force all (conditional) validity
4083
 * dependences to be respected and attempt to carry them.
4084
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4085
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4086
 * "carry_inter" indicates whether inter-node edges should be carried or
4087
 * only respected.
4088
 */
4089
static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
4090
  __isl_keep isl_basic_set_list *intra,
4091
  __isl_keep isl_basic_set_list *inter, int carry_inter)
4092
32
{
4093
32
  struct isl_add_all_constraints_data data = { ctx, graph, carry_inter };
4094
32
4095
32
  data.pos = 0;
4096
32
  if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
4097
0
    return isl_stat_error;
4098
32
  if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0)
4099
0
    return isl_stat_error;
4100
32
  return isl_stat_ok;
4101
32
}
4102
4103
/* Internal data structure for count_all_constraints
4104
 * for keeping track of the number of equality and inequality constraints.
4105
 */
4106
struct isl_sched_count {
4107
  int n_eq;
4108
  int n_ineq;
4109
};
4110
4111
/* Add the number of equality and inequality constraints of "bset"
4112
 * to data->n_eq and data->n_ineq.
4113
 */
4114
static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
4115
64
{
4116
64
  struct isl_sched_count *data = user;
4117
64
4118
64
  return update_count(bset, 1, &data->n_eq, &data->n_ineq);
4119
64
}
4120
4121
/* Count the number of equality and inequality constraints
4122
 * that will be added to the carry_lp problem.
4123
 * We count each edge exactly once.
4124
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4125
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4126
 */
4127
static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
4128
  __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
4129
32
{
4130
32
  struct isl_sched_count data;
4131
32
4132
32
  data.n_eq = data.n_ineq = 0;
4133
32
  if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
4134
0
    return isl_stat_error;
4135
32
  if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0)
4136
0
    return isl_stat_error;
4137
32
4138
32
  *n_eq = data.n_eq;
4139
32
  *n_ineq = data.n_ineq;
4140
32
4141
32
  return isl_stat_ok;
4142
32
}
4143
4144
/* Construct an LP problem for finding schedule coefficients
4145
 * such that the schedule carries as many validity dependences as possible.
4146
 * In particular, for each dependence i, we bound the dependence distance
4147
 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
4148
 * of all e_i's.  Dependences with e_i = 0 in the solution are simply
4149
 * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
4150
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4151
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4152
 * "n_edge" is the total number of edges.
4153
 * "carry_inter" indicates whether inter-node edges should be carried or
4154
 * only respected.  That is, if "carry_inter" is not set, then
4155
 * no e_i variables are introduced for the inter-node edges.
4156
 *
4157
 * All variables of the LP are non-negative.  The actual coefficients
4158
 * may be negative, so each coefficient is represented as the difference
4159
 * of two non-negative variables.  The negative part always appears
4160
 * immediately before the positive part.
4161
 * Other than that, the variables have the following order
4162
 *
4163
 *  - sum of (1 - e_i) over all edges
4164
 *  - sum of all c_n coefficients
4165
 *    (unconstrained when computing non-parametric schedules)
4166
 *  - sum of positive and negative parts of all c_x coefficients
4167
 *  - for each edge
4168
 *    - e_i
4169
 *  - for each node
4170
 *    - positive and negative parts of c_i_x, in opposite order
4171
 *    - c_i_n (if parametric)
4172
 *    - c_i_0
4173
 *
4174
 * The constraints are those from the (validity) edges plus three equalities
4175
 * to express the sums and n_edge inequalities to express e_i <= 1.
4176
 */
4177
static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
4178
  int n_edge, __isl_keep isl_basic_set_list *intra,
4179
  __isl_keep isl_basic_set_list *inter, int carry_inter)
4180
32
{
4181
32
  int i;
4182
32
  int k;
4183
32
  isl_space *dim;
4184
32
  unsigned total;
4185
32
  int n_eq, n_ineq;
4186
32
4187
32
  total = 3 + n_edge;
4188
76
  for (i = 0; i < graph->n; 
++i44
) {
4189
44
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
4190
44
    node->start = total;
4191
44
    total += 1 + node->nparam + 2 * node->nvar;
4192
44
  }
4193
32
4194
32
  if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
4195
0
    return isl_stat_error;
4196
32
4197
32
  dim = isl_space_set_alloc(ctx, 0, total);
4198
32
  isl_basic_set_free(graph->lp);
4199
32
  n_eq += 3;
4200
32
  n_ineq += n_edge;
4201
32
  graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
4202
32
  graph->lp = isl_basic_set_set_rational(graph->lp);
4203
32
4204
32
  k = isl_basic_set_alloc_equality(graph->lp);
4205
32
  if (k < 0)
4206
0
    return isl_stat_error;
4207
32
  isl_seq_clr(graph->lp->eq[k], 1 + total);
4208
32
  isl_int_set_si(graph->lp->eq[k][0], -n_edge);
4209
32
  isl_int_set_si(graph->lp->eq[k][1], 1);
4210
96
  for (i = 0; i < n_edge; 
++i64
)
4211
64
    isl_int_set_si(graph->lp->eq[k][4 + i], 1);
4212
32
4213
32
  if (add_param_sum_constraint(graph, 1) < 0)
4214
0
    return isl_stat_error;
4215
32
  if (add_var_sum_constraint(graph, 2) < 0)
4216
0
    return isl_stat_error;
4217
32
4218
96
  
for (i = 0; 32
i < n_edge;
++i64
) {
4219
64
    k = isl_basic_set_alloc_inequality(graph->lp);
4220
64
    if (k < 0)
4221
0
      return isl_stat_error;
4222
64
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
4223
64
    isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
4224
64
    isl_int_set_si(graph->lp->ineq[k][0], 1);
4225
64
  }
4226
32
4227
32
  if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0)
4228
0
    return isl_stat_error;
4229
32
4230
32
  return isl_stat_ok;
4231
32
}
4232
4233
static __isl_give isl_schedule_node *compute_component_schedule(
4234
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4235
  int wcc);
4236
4237
/* If the schedule_split_scaled option is set and if the linear
4238
 * parts of the scheduling rows for all nodes in the graphs have
4239
 * a non-trivial common divisor, then remove this
4240
 * common divisor from the linear part.
4241
 * Otherwise, insert a band node directly and continue with
4242
 * the construction of the schedule.
4243
 *
4244
 * If a non-trivial common divisor is found, then
4245
 * the linear part is reduced and the remainder is ignored.
4246
 * The pieces of the graph that are assigned different remainders
4247
 * form (groups of) strongly connected components within
4248
 * the scaled down band.  If needed, they can therefore
4249
 * be ordered along this remainder in a sequence node.
4250
 * However, this ordering is not enforced here in order to allow
4251
 * the scheduler to combine some of the strongly connected components.
4252
 */
4253
static __isl_give isl_schedule_node *split_scaled(
4254
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4255
30
{
4256
30
  int i;
4257
30
  int row;
4258
30
  isl_ctx *ctx;
4259
30
  isl_int gcd, gcd_i;
4260
30
4261
30
  if (!node)
4262
0
    return NULL;
4263
30
4264
30
  ctx = isl_schedule_node_get_ctx(node);
4265
30
  if (!ctx->opt->schedule_split_scaled)
4266
0
    return compute_next_band(node, graph, 0);
4267
30
  if (graph->n <= 1)
4268
24
    return compute_next_band(node, graph, 0);
4269
6
4270
6
  isl_int_init(gcd);
4271
6
  isl_int_init(gcd_i);
4272
6
4273
6
  isl_int_set_si(gcd, 0);
4274
6
4275
6
  row = isl_mat_rows(graph->node[0].sched) - 1;
4276
6
4277
20
  for (i = 0; i < graph->n; 
++i14
) {
4278
14
    struct isl_sched_node *node = &graph->node[i];
4279
14
    int cols = isl_mat_cols(node->sched);
4280
14
4281
14
    isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
4282
14
    isl_int_gcd(gcd, gcd, gcd_i);
4283
14
  }
4284
6
4285
6
  isl_int_clear(gcd_i);
4286
6
4287
6
  if (isl_int_cmp_si(gcd, 1) <= 0) {
4288
2
    isl_int_clear(gcd);
4289
2
    return compute_next_band(node, graph, 0);
4290
2
  }
4291
4
4292
14
  
for (i = 0; 4
i < graph->n;
++i10
) {
4293
10
    struct isl_sched_node *node = &graph->node[i];
4294
10
4295
10
    isl_int_fdiv_q(node->sched->row[row][0],
4296
10
             node->sched->row[row][0], gcd);
4297
10
    isl_int_mul(node->sched->row[row][0],
4298
10
          node->sched->row[row][0], gcd);
4299
10
    node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
4300
10
    if (!node->sched)
4301
0
      goto error;
4302
10
  }
4303
4
4304
4
  isl_int_clear(gcd);
4305
4
4306
4
  return compute_next_band(node, graph, 0);
4307
0
error:
4308
0
  isl_int_clear(gcd);
4309
0
  return isl_schedule_node_free(node);
4310
4
}
4311
4312
/* Is the schedule row "sol" trivial on node "node"?
4313
 * That is, is the solution zero on the dimensions linearly independent of
4314
 * the previously found solutions?
4315
 * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
4316
 *
4317
 * Each coefficient is represented as the difference between
4318
 * two non-negative values in "sol".
4319
 * We construct the schedule row s and check if it is linearly
4320
 * independent of previously computed schedule rows
4321
 * by computing T s, with T the linear combinations that are zero
4322
 * on linearly dependent schedule rows.
4323
 * If the result consists of all zeros, then the solution is trivial.
4324
 */
4325
static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
4326
36
{
4327
36
  int trivial;
4328
36
  isl_vec *node_sol;
4329
36
4330
36
  if (!sol)
4331
0
    return -1;
4332
36
  if (node->nvar == node->rank)
4333
4
    return 0;
4334
32
4335
32
  node_sol = extract_var_coef(node, sol);
4336
32
  node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol);
4337
32
  if (!node_sol)
4338
0
    return -1;
4339
32
4340
32
  trivial = isl_seq_first_non_zero(node_sol->el,
4341
32
          node->nvar - node->rank) == -1;
4342
32
4343
32
  isl_vec_free(node_sol);
4344
32
4345
32
  return trivial;
4346
32
}
4347
4348
/* Is the schedule row "sol" trivial on any node where it should
4349
 * not be trivial?
4350
 * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
4351
 */
4352
static int is_any_trivial(struct isl_sched_graph *graph,
4353
  __isl_keep isl_vec *sol)
4354
32
{
4355
32
  int i;
4356
32
4357
64
  for (i = 0; i < graph->n; 
++i32
) {
4358
36
    struct isl_sched_node *node = &graph->node[i];
4359
36
    int trivial;
4360
36
4361
36
    if (!needs_row(graph, node))
4362
0
      continue;
4363
36
    trivial = is_trivial(node, sol);
4364
36
    if (trivial < 0 || trivial)
4365
4
      return trivial;
4366
36
  }
4367
32
4368
32
  
return 028
;
4369
32
}
4370
4371
/* Does the schedule represented by "sol" perform loop coalescing on "node"?
4372
 * If so, return the position of the coalesced dimension.
4373
 * Otherwise, return node->nvar or -1 on error.
4374
 *
4375
 * In particular, look for pairs of coefficients c_i and c_j such that
4376
 * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
4377
 * If any such pair is found, then return i.
4378
 * If size_i is infinity, then no check on c_i needs to be performed.
4379
 */
4380
static int find_node_coalescing(struct isl_sched_node *node,
4381
  __isl_keep isl_vec *sol)
4382
50
{
4383
50
  int i, j;
4384
50
  isl_int max;
4385
50
  isl_vec *csol;
4386
50
4387
50
  if (node->nvar <= 1)
4388
23
    return node->nvar;
4389
27
4390
27
  csol = extract_var_coef(node, sol);
4391
27
  if (!csol)
4392
0
    return -1;
4393
27
  isl_int_init(max);
4394
77
  for (i = 0; i < node->nvar; 
++i50
) {
4395
54
    isl_val *v;
4396
54
4397
54
    if (isl_int_is_zero(csol->el[i]))
4398
54
      
continue20
;
4399
34
    v = isl_multi_val_get_val(node->sizes, i);
4400
34
    if (!v)
4401
0
      goto error;
4402
34
    if (!isl_val_is_int(v)) {
4403
14
      isl_val_free(v);
4404
14
      continue;
4405
14
    }
4406
20
    v = isl_val_div_ui(v, 2);
4407
20
    v = isl_val_ceil(v);
4408
20
    if (!v)
4409
0
      goto error;
4410
20
    isl_int_mul(max, v->n, csol->el[i]);
4411
20
    isl_val_free(v);
4412
20
4413
52
    for (j = 0; j < node->nvar; 
++j32
) {
4414
36
      if (j == i)
4415
16
        continue;
4416
20
      if (isl_int_abs_gt(csol->el[j], max))
4417
20
        
break4
;
4418
20
    }
4419
20
    if (j < node->nvar)
4420
4
      break;
4421
20
  }
4422
27
4423
27
  isl_int_clear(max);
4424
27
  isl_vec_free(csol);
4425
27
  return i;
4426
0
error:
4427
0
  isl_int_clear(max);
4428
0
  isl_vec_free(csol);
4429
0
  return -1;
4430
27
}
4431
4432
/* Force the schedule coefficient at position "pos" of "node" to be zero
4433
 * in "tl".
4434
 * The coefficient is encoded as the difference between two non-negative
4435
 * variables.  Force these two variables to have the same value.
4436
 */
4437
static __isl_give isl_tab_lexmin *zero_out_node_coef(
4438
  __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
4439
4
{
4440
4
  int dim;
4441
4
  isl_ctx *ctx;
4442
4
  isl_vec *eq;
4443
4
4444
4
  ctx = isl_space_get_ctx(node->space);
4445
4
  dim = isl_tab_lexmin_dim(tl);
4446
4
  if (dim < 0)
4447
0
    return isl_tab_lexmin_free(tl);
4448
4
  eq = isl_vec_alloc(ctx, 1 + dim);
4449
4
  eq = isl_vec_clr(eq);
4450
4
  if (!eq)
4451
0
    return isl_tab_lexmin_free(tl);
4452
4
4453
4
  pos = 1 + node_var_coef_pos(node, pos);
4454
4
  isl_int_set_si(eq->el[pos], 1);
4455
4
  isl_int_set_si(eq->el[pos + 1], -1);
4456
4
  tl = isl_tab_lexmin_add_eq(tl, eq->el);
4457
4
  isl_vec_free(eq);
4458
4
4459
4
  return tl;
4460
4
}
4461
4462
/* Return the lexicographically smallest rational point in the basic set
4463
 * from which "tl" was constructed, double checking that this input set
4464
 * was not empty.
4465
 */
4466
static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
4467
38
{
4468
38
  isl_vec *sol;
4469
38
4470
38
  sol = isl_tab_lexmin_get_solution(tl);
4471
38
  if (!sol)
4472
0
    return NULL;
4473
38
  if (sol->size == 0)
4474
38
    
isl_die0
(isl_vec_get_ctx(sol), isl_error_internal,
4475
38
      "error in schedule construction",
4476
38
      return isl_vec_free(sol));
4477
38
  return sol;
4478
38
}
4479
4480
/* Does the solution "sol" of the LP problem constructed by setup_carry_lp
4481
 * carry any of the "n_edge" groups of dependences?
4482
 * The value in the first position is the sum of (1 - e_i) over all "n_edge"
4483
 * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
4484
 * by the edge are carried by the solution.
4485
 * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
4486
 * one of those is carried.
4487
 *
4488
 * Note that despite the fact that the problem is solved using a rational
4489
 * solver, the solution is guaranteed to be integral.
4490
 * Specifically, the dependence distance lower bounds e_i (and therefore
4491
 * also their sum) are integers.  See Lemma 5 of [1].
4492
 *
4493
 * Any potential denominator of the sum is cleared by this function.
4494
 * The denominator is not relevant for any of the other elements
4495
 * in the solution.
4496
 *
4497
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4498
 *     Problem, Part II: Multi-Dimensional Time.
4499
 *     In Intl. Journal of Parallel Programming, 1992.
4500
 */
4501
static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
4502
38
{
4503
38
  isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
4504
38
  isl_int_set_si(sol->el[0], 1);
4505
38
  return isl_int_cmp_si(sol->el[1], n_edge) < 0;
4506
38
}
4507
4508
/* Return the lexicographically smallest rational point in "lp",
4509
 * assuming that all variables are non-negative and performing some
4510
 * additional sanity checks.
4511
 * If "want_integral" is set, then compute the lexicographically smallest
4512
 * integer point instead.
4513
 * In particular, "lp" should not be empty by construction.
4514
 * Double check that this is the case.
4515
 * If dependences are not carried for any of the "n_edge" edges,
4516
 * then return an empty vector.
4517
 *
4518
 * If the schedule_treat_coalescing option is set and
4519
 * if the computed schedule performs loop coalescing on a given node,
4520
 * i.e., if it is of the form
4521
 *
4522
 *  c_i i + c_j j + ...
4523
 *
4524
 * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
4525
 * to cut out this solution.  Repeat this process until no more loop
4526
 * coalescing occurs or until no more dependences can be carried.
4527
 * In the latter case, revert to the previously computed solution.
4528
 *
4529
 * If the caller requests an integral solution and if coalescing should
4530
 * be treated, then perform the coalescing treatment first as
4531
 * an integral solution computed before coalescing treatment
4532
 * would carry the same number of edges and would therefore probably
4533
 * also be coalescing.
4534
 *
4535
 * To allow the coalescing treatment to be performed first,
4536
 * the initial solution is allowed to be rational and it is only
4537
 * cut out (if needed) in the next iteration, if no coalescing measures
4538
 * were taken.
4539
 */
4540
static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
4541
  __isl_take isl_basic_set *lp, int n_edge, int want_integral)
4542
32
{
4543
32
  int i, pos, cut;
4544
32
  isl_ctx *ctx;
4545
32
  isl_tab_lexmin *tl;
4546
32
  isl_vec *sol = NULL, *prev;
4547
32
  int treat_coalescing;
4548
32
  int try_again;
4549
32
4550
32
  if (!lp)
4551
0
    return NULL;
4552
32
  ctx = isl_basic_set_get_ctx(lp);
4553
32
  treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
4554
32
  tl = isl_tab_lexmin_from_basic_set(lp);
4555
32
4556
32
  cut = 0;
4557
38
  do {
4558
38
    int integral;
4559
38
4560
38
    try_again = 0;
4561
38
    if (cut)
4562
2
      tl = isl_tab_lexmin_cut_to_integer(tl);
4563
38
    prev = sol;
4564
38
    sol = non_empty_solution(tl);
4565
38
    if (!sol)
4566
0
      goto error;
4567
38
4568
38
    integral = isl_int_is_one(sol->el[0]);
4569
38
    if (!carries_dependences(sol, n_edge)) {
4570
2
      if (!prev)
4571
0
        prev = isl_vec_alloc(ctx, 0);
4572
2
      isl_vec_free(sol);
4573
2
      sol = prev;
4574
2
      break;
4575
2
    }
4576
36
    prev = isl_vec_free(prev);
4577
36
    cut = want_integral && 
!integral22
;
4578
36
    if (cut)
4579
2
      try_again = 1;
4580
36
    if (!treat_coalescing)
4581
0
      continue;
4582
82
    
for (i = 0; 36
i < graph->n;
++i46
) {
4583
50
      struct isl_sched_node *node = &graph->node[i];
4584
50
4585
50
      pos = find_node_coalescing(node, sol);
4586
50
      if (pos < 0)
4587
0
        goto error;
4588
50
      if (pos < node->nvar)
4589
4
        break;
4590
50
    }
4591
36
    if (i < graph->n) {
4592
4
      try_again = 1;
4593
4
      tl = zero_out_node_coef(tl, &graph->node[i], pos);
4594
4
      cut = 0;
4595
4
    }
4596
36
  } while (try_again);
4597
32
4598
32
  isl_tab_lexmin_free(tl);
4599
32
4600
32
  return sol;
4601
0
error:
4602
0
  isl_tab_lexmin_free(tl);
4603
0
  isl_vec_free(prev);
4604
0
  isl_vec_free(sol);
4605
0
  return NULL;
4606
32
}
4607
4608
/* If "edge" is an edge from a node to itself, then add the corresponding
4609
 * dependence relation to "umap".
4610
 * If "node" has been compressed, then the dependence relation
4611
 * is also compressed first.
4612
 */
4613
static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
4614
  struct isl_sched_edge *edge)
4615
46
{
4616
46
  isl_map *map;
4617
46
  struct isl_sched_node *node = edge->src;
4618
46
4619
46
  if (edge->src != edge->dst)
4620
20
    return umap;
4621
26
4622
26
  map = isl_map_copy(edge->map);
4623
26
  if (node->compressed) {
4624
0
    map = isl_map_preimage_domain_multi_aff(map,
4625
0
            isl_multi_aff_copy(node->decompress));
4626
0
    map = isl_map_preimage_range_multi_aff(map,
4627
0
            isl_multi_aff_copy(node->decompress));
4628
0
  }
4629
26
  umap = isl_union_map_add_map(umap, map);
4630
26
  return umap;
4631
26
}
4632
4633
/* If "edge" is an edge from a node to another node, then add the corresponding
4634
 * dependence relation to "umap".
4635
 * If the source or destination nodes of "edge" have been compressed,
4636
 * then the dependence relation is also compressed first.
4637
 */
4638
static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
4639
  struct isl_sched_edge *edge)
4640
46
{
4641
46
  isl_map *map;
4642
46
4643
46
  if (edge->src == edge->dst)
4644
26
    return umap;
4645
20
4646
20
  map = isl_map_copy(edge->map);
4647
20
  if (edge->src->compressed)
4648
0
    map = isl_map_preimage_domain_multi_aff(map,
4649
0
            isl_multi_aff_copy(edge->src->decompress));
4650
20
  if (edge->dst->compressed)
4651
0
    map = isl_map_preimage_range_multi_aff(map,
4652
0
            isl_multi_aff_copy(edge->dst->decompress));
4653
20
  umap = isl_union_map_add_map(umap, map);
4654
20
  return umap;
4655
20
}
4656
4657
/* Internal data structure used by union_drop_coalescing_constraints
4658
 * to collect bounds on all relevant statements.
4659
 *
4660
 * "graph" is the schedule constraint graph for which an LP problem
4661
 * is being constructed.
4662
 * "bounds" collects the bounds.
4663
 */
4664
struct isl_collect_bounds_data {
4665
  isl_ctx *ctx;
4666
  struct isl_sched_graph *graph;
4667
  isl_union_set *bounds;
4668
};
4669
4670
/* Add the size bounds for the node with instance deltas in "set"
4671
 * to data->bounds.
4672
 */
4673
static isl_stat collect_bounds(__isl_take isl_set *set, void *user)
4674
24
{
4675
24
  struct isl_collect_bounds_data *data = user;
4676
24
  struct isl_sched_node *node;
4677
24
  isl_space *space;
4678
24
  isl_set *bounds;
4679
24
4680
24
  space = isl_set_get_space(set);
4681
24
  isl_set_free(set);
4682
24
4683
24
  node = graph_find_compressed_node(data->ctx, data->graph, space);
4684
24
  isl_space_free(space);
4685
24
4686
24
  bounds = isl_set_from_basic_set(get_size_bounds(node));
4687
24
  data->bounds = isl_union_set_add_set(data->bounds, bounds);
4688
24
4689
24
  return isl_stat_ok;
4690
24
}
4691
4692
/* Drop some constraints from "delta" that could be exploited
4693
 * to construct loop coalescing schedules.
4694
 * In particular, drop those constraint that bound the difference
4695
 * to the size of the domain.
4696
 * Do this for each set/node in "delta" separately.
4697
 * The parameters are assumed to have been projected out by the caller.
4698
 */
4699
static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx,
4700
  struct isl_sched_graph *graph, __isl_take isl_union_set *delta)
4701
32
{
4702
32
  struct isl_collect_bounds_data data = { ctx, graph };
4703
32
4704
32
  data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4705
32
  if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0)
4706
0
    data.bounds = isl_union_set_free(data.bounds);
4707
32
  delta = isl_union_set_plain_gist(delta, data.bounds);
4708
32
4709
32
  return delta;
4710
32
}
4711
4712
/* Given a non-trivial lineality space "lineality", add the corresponding
4713
 * universe set to data->mask and add a map from elements to
4714
 * other elements along the lines in "lineality" to data->equivalent.
4715
 * If this is the first time this function gets called
4716
 * (data->any_non_trivial is still false), then set data->any_non_trivial and
4717
 * initialize data->mask and data->equivalent.
4718
 *
4719
 * In particular, if the lineality space is defined by equality constraints
4720
 *
4721
 *  E x = 0
4722
 *
4723
 * then construct an affine mapping
4724
 *
4725
 *  f : x -> E x
4726
 *
4727
 * and compute the equivalence relation of having the same image under f:
4728
 *
4729
 *  { x -> x' : E x = E x' }
4730
 */
4731
static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality,
4732
  struct isl_exploit_lineality_data *data)
4733
2
{
4734
2
  isl_mat *eq;
4735
2
  isl_space *space;
4736
2
  isl_set *univ;
4737
2
  isl_multi_aff *ma;
4738
2
  isl_multi_pw_aff *mpa;
4739
2
  isl_map *map;
4740
2
  int n;
4741
2
4742
2
  if (!lineality)
4743
0
    return isl_stat_error;
4744
2
  if (isl_basic_set_dim(lineality, isl_dim_div) != 0)
4745
2
    
isl_die0
(isl_basic_set_get_ctx(lineality), isl_error_internal,
4746
2
      "local variables not allowed", goto error);
4747
2
4748
2
  space = isl_basic_set_get_space(lineality);
4749
2
  if (!data->any_non_trivial) {
4750
2
    data->equivalent = isl_union_map_empty(isl_space_copy(space));
4751
2
    data->mask = isl_union_set_empty(isl_space_copy(space));
4752
2
  }
4753
2
  data->any_non_trivial = isl_bool_true;
4754
2
4755
2
  univ = isl_set_universe(isl_space_copy(space));
4756
2
  data->mask = isl_union_set_add_set(data->mask, univ);
4757
2
4758
2
  eq = isl_basic_set_extract_equalities(lineality);
4759
2
  n = isl_mat_rows(eq);
4760
2
  eq = isl_mat_insert_zero_rows(eq, 0, 1);
4761
2
  eq = isl_mat_set_element_si(eq, 0, 0, 1);
4762
2
  space = isl_space_from_domain(space);
4763
2
  space = isl_space_add_dims(space, isl_dim_out, n);
4764
2
  ma = isl_multi_aff_from_aff_mat(space, eq);
4765
2
  mpa = isl_multi_pw_aff_from_multi_aff(ma);
4766
2
  map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa));
4767
2
  data->equivalent = isl_union_map_add_map(data->equivalent, map);
4768
2
4769
2
  isl_basic_set_free(lineality);
4770
2
  return isl_stat_ok;
4771
0
error:
4772
0
  isl_basic_set_free(lineality);
4773
0
  return isl_stat_error;
4774
2
}
4775
4776
/* Check if the lineality space "set" is non-trivial (i.e., is not just
4777
 * the origin or, in other words, satisfies a number of equality constraints
4778
 * that is smaller than the dimension of the set).
4779
 * If so, extend data->mask and data->equivalent accordingly.
4780
 *
4781
 * The input should not have any local variables already, but
4782
 * isl_set_remove_divs is called to make sure it does not.
4783
 */
4784
static isl_stat add_lineality(__isl_take isl_set *set, void *user)
4785
24
{
4786
24
  struct isl_exploit_lineality_data *data = user;
4787
24
  isl_basic_set *hull;
4788
24
  int dim, n_eq;
4789
24
4790
24
  set = isl_set_remove_divs(set);
4791
24
  hull = isl_set_unshifted_simple_hull(set);
4792
24
  dim = isl_basic_set_dim(hull, isl_dim_set);
4793
24
  n_eq = isl_basic_set_n_equality(hull);
4794
24
  if (!hull)
4795
0
    return isl_stat_error;
4796
24
  if (dim != n_eq)
4797
2
    return add_non_trivial_lineality(hull, data);
4798
22
  isl_basic_set_free(hull);
4799
22
  return isl_stat_ok;
4800
22
}
4801
4802
/* Check if the difference set on intra-node schedule constraints "intra"
4803
 * has any non-trivial lineality space.
4804
 * If so, then extend the difference set to a difference set
4805
 * on equivalent elements.  That is, if "intra" is
4806
 *
4807
 *  { y - x : (x,y) \in V }
4808
 *
4809
 * and elements are equivalent if they have the same image under f,
4810
 * then return
4811
 *
4812
 *  { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4813
 *
4814
 * or, since f is linear,
4815
 *
4816
 *  { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
4817
 *
4818
 * The results of the search for non-trivial lineality spaces is stored
4819
 * in "data".
4820
 */
4821
static __isl_give isl_union_set *exploit_intra_lineality(
4822
  __isl_take isl_union_set *intra,
4823
  struct isl_exploit_lineality_data *data)
4824
32
{
4825
32
  isl_union_set *lineality;
4826
32
  isl_union_set *uset;
4827
32
4828
32
  data->any_non_trivial = isl_bool_false;
4829
32
  lineality = isl_union_set_copy(intra);
4830
32
  lineality = isl_union_set_combined_lineality_space(lineality);
4831
32
  if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0)
4832
0
    data->any_non_trivial = isl_bool_error;
4833
32
  isl_union_set_free(lineality);
4834
32
4835
32
  if (data->any_non_trivial < 0)
4836
0
    return isl_union_set_free(intra);
4837
32
  if (!data->any_non_trivial)
4838
30
    return intra;
4839
2
4840
2
  uset = isl_union_set_copy(intra);
4841
2
  intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask));
4842
2
  uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent));
4843
2
  intra = isl_union_set_union(intra, uset);
4844
2
4845
2
  intra = isl_union_set_remove_divs(intra);
4846
2
4847
2
  return intra;
4848
2
}
4849
4850
/* If the difference set on intra-node schedule constraints was found to have
4851
 * any non-trivial lineality space by exploit_intra_lineality,
4852
 * as recorded in "data", then extend the inter-node
4853
 * schedule constraints "inter" to schedule constraints on equivalent elements.
4854
 * That is, if "inter" is V and
4855
 * elements are equivalent if they have the same image under f, then return
4856
 *
4857
 *  { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4858
 */
4859
static __isl_give isl_union_map *exploit_inter_lineality(
4860
  __isl_take isl_union_map *inter,
4861
  struct isl_exploit_lineality_data *data)
4862
32
{
4863
32
  isl_union_map *umap;
4864
32
4865
32
  if (data->any_non_trivial < 0)
4866
0
    return isl_union_map_free(inter);
4867
32
  if (!data->any_non_trivial)
4868
30
    return inter;
4869
2
4870
2
  umap = isl_union_map_copy(inter);
4871
2
  inter = isl_union_map_subtract_range(inter,
4872
2
        isl_union_set_copy(data->mask));
4873
2
  umap = isl_union_map_apply_range(umap,
4874
2
        isl_union_map_copy(data->equivalent));
4875
2
  inter = isl_union_map_union(inter, umap);
4876
2
  umap = isl_union_map_copy(inter);
4877
2
  inter = isl_union_map_subtract_domain(inter,
4878
2
        isl_union_set_copy(data->mask));
4879
2
  umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent),
4880
2
        umap);
4881
2
  inter = isl_union_map_union(inter, umap);
4882
2
4883
2
  inter = isl_union_map_remove_divs(inter);
4884
2
4885
2
  return inter;
4886
2
}
4887
4888
/* For each (conditional) validity edge in "graph",
4889
 * add the corresponding dependence relation using "add"
4890
 * to a collection of dependence relations and return the result.
4891
 * If "coincidence" is set, then coincidence edges are considered as well.
4892
 */
4893
static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
4894
  __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
4895
    struct isl_sched_edge *edge), int coincidence)
4896
64
{
4897
64
  int i;
4898
64
  isl_space *space;
4899
64
  isl_union_map *umap;
4900
64
4901
64
  space = isl_space_copy(graph->node[0].space);
4902
64
  umap = isl_union_map_empty(space);
4903
64
4904
156
  for (i = 0; i < graph->n_edge; 
++i92
) {
4905
92
    struct isl_sched_edge *edge = &graph->edge[i];
4906
92
4907
92
    if (!is_any_validity(edge) &&
4908
92
        
(0
!coincidence0
||
!is_coincidence(edge)0
))
4909
0
      continue;
4910
92
4911
92
    umap = add(umap, edge);
4912
92
  }
4913
64
4914
64
  return umap;
4915
64
}
4916
4917
/* Project out all parameters from "uset" and return the result.
4918
 */
4919
static __isl_give isl_union_set *union_set_drop_parameters(
4920
  __isl_take isl_union_set *uset)
4921
32
{
4922
32
  unsigned nparam;
4923
32
4924
32
  nparam = isl_union_set_dim(uset, isl_dim_param);
4925
32
  return isl_union_set_project_out(uset, isl_dim_param, 0, nparam);
4926
32
}
4927
4928
/* For each dependence relation on a (conditional) validity edge
4929
 * from a node to itself,
4930
 * construct the set of coefficients of valid constraints for elements
4931
 * in that dependence relation and collect the results.
4932
 * If "coincidence" is set, then coincidence edges are considered as well.
4933
 *
4934
 * In particular, for each dependence relation R, constraints
4935
 * on coefficients (c_0, c_x) are constructed such that
4936
 *
4937
 *  c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
4938
 *
4939
 * If the schedule_treat_coalescing option is set, then some constraints
4940
 * that could be exploited to construct coalescing schedules
4941
 * are removed before the dual is computed, but after the parameters
4942
 * have been projected out.
4943
 * The entire computation is essentially the same as that performed
4944
 * by intra_coefficients, except that it operates on multiple
4945
 * edges together and that the parameters are always projected out.
4946
 *
4947
 * Additionally, exploit any non-trivial lineality space
4948
 * in the difference set after removing coalescing constraints and
4949
 * store the results of the non-trivial lineality space detection in "data".
4950
 * The procedure is currently run unconditionally, but it is unlikely
4951
 * to find any non-trivial lineality spaces if no coalescing constraints
4952
 * have been removed.
4953
 *
4954
 * Note that if a dependence relation is a union of basic maps,
4955
 * then each basic map needs to be treated individually as it may only
4956
 * be possible to carry the dependences expressed by some of those
4957
 * basic maps and not all of them.
4958
 * The collected validity constraints are therefore not coalesced and
4959
 * it is assumed that they are not coalesced automatically.
4960
 * Duplicate basic maps can be removed, however.
4961
 * In particular, if the same basic map appears as a disjunct
4962
 * in multiple edges, then it only needs to be carried once.
4963
 */
4964
static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx,
4965
  struct isl_sched_graph *graph, int coincidence,
4966
  struct isl_exploit_lineality_data *data)
4967
32
{
4968
32
  isl_union_map *intra;
4969
32
  isl_union_set *delta;
4970
32
  isl_basic_set_list *list;
4971
32
4972
32
  intra = collect_validity(graph, &add_intra, coincidence);
4973
32
  delta = isl_union_map_deltas(intra);
4974
32
  delta = union_set_drop_parameters(delta);
4975
32
  delta = isl_union_set_remove_divs(delta);
4976
32
  if (isl_options_get_schedule_treat_coalescing(ctx))
4977
32
    delta = union_drop_coalescing_constraints(ctx, graph, delta);
4978
32
  delta = exploit_intra_lineality(delta, data);
4979
32
  list = isl_union_set_get_basic_set_list(delta);
4980
32
  isl_union_set_free(delta);
4981
32
4982
32
  return isl_basic_set_list_coefficients(list);
4983
32
}
4984
4985
/* For each dependence relation on a (conditional) validity edge
4986
 * from a node to some other node,
4987
 * construct the set of coefficients of valid constraints for elements
4988
 * in that dependence relation and collect the results.
4989
 * If "coincidence" is set, then coincidence edges are considered as well.
4990
 *
4991
 * In particular, for each dependence relation R, constraints
4992
 * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
4993
 *
4994
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
4995
 *
4996
 * This computation is essentially the same as that performed
4997
 * by inter_coefficients, except that it operates on multiple
4998
 * edges together.
4999
 *
5000
 * Additionally, exploit any non-trivial lineality space
5001
 * that may have been discovered by collect_intra_validity
5002
 * (as stored in "data").
5003
 *
5004
 * Note that if a dependence relation is a union of basic maps,
5005
 * then each basic map needs to be treated individually as it may only
5006
 * be possible to carry the dependences expressed by some of those
5007
 * basic maps and not all of them.
5008
 * The collected validity constraints are therefore not coalesced and
5009
 * it is assumed that they are not coalesced automatically.
5010
 * Duplicate basic maps can be removed, however.
5011
 * In particular, if the same basic map appears as a disjunct
5012
 * in multiple edges, then it only needs to be carried once.
5013
 */
5014
static __isl_give isl_basic_set_list *collect_inter_validity(
5015
  struct isl_sched_graph *graph, int coincidence,
5016
  struct isl_exploit_lineality_data *data)
5017
32
{
5018
32
  isl_union_map *inter;
5019
32
  isl_union_set *wrap;
5020
32
  isl_basic_set_list *list;
5021
32
5022
32
  inter = collect_validity(graph, &add_inter, coincidence);
5023
32
  inter = exploit_inter_lineality(inter, data);
5024
32
  inter = isl_union_map_remove_divs(inter);
5025
32
  wrap = isl_union_map_wrap(inter);
5026
32
  list = isl_union_set_get_basic_set_list(wrap);
5027
32
  isl_union_set_free(wrap);
5028
32
  return isl_basic_set_list_coefficients(list);
5029
32
}
5030
5031
/* Construct an LP problem for finding schedule coefficients
5032
 * such that the schedule carries as many of the "n_edge" groups of
5033
 * dependences as possible based on the corresponding coefficient
5034
 * constraints and return the lexicographically smallest non-trivial solution.
5035
 * "intra" is the sequence of coefficient constraints for intra-node edges.
5036
 * "inter" is the sequence of coefficient constraints for inter-node edges.
5037
 * If "want_integral" is set, then compute an integral solution
5038
 * for the coefficients rather than using the numerators
5039
 * of a rational solution.
5040
 * "carry_inter" indicates whether inter-node edges should be carried or
5041
 * only respected.
5042
 *
5043
 * If none of the "n_edge" groups can be carried
5044
 * then return an empty vector.
5045
 */
5046
static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx,
5047
  struct isl_sched_graph *graph, int n_edge,
5048
  __isl_keep isl_basic_set_list *intra,
5049
  __isl_keep isl_basic_set_list *inter, int want_integral,
5050
  int carry_inter)
5051
32
{
5052
32
  isl_basic_set *lp;
5053
32
5054
32
  if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0)
5055
0
    return NULL;
5056
32
5057
32
  lp = isl_basic_set_copy(graph->lp);
5058
32
  return non_neg_lexmin(graph, lp, n_edge, want_integral);
5059
32
}
5060
5061
/* Construct an LP problem for finding schedule coefficients
5062
 * such that the schedule carries as many of the validity dependences
5063
 * as possible and
5064
 * return the lexicographically smallest non-trivial solution.
5065
 * If "fallback" is set, then the carrying is performed as a fallback
5066
 * for the Pluto-like scheduler.
5067
 * If "coincidence" is set, then try and carry coincidence edges as well.
5068
 *
5069
 * The variable "n_edge" stores the number of groups that should be carried.
5070
 * If none of the "n_edge" groups can be carried
5071
 * then return an empty vector.
5072
 * If, moreover, "n_edge" is zero, then the LP problem does not even
5073
 * need to be constructed.
5074
 *
5075
 * If a fallback solution is being computed, then compute an integral solution
5076
 * for the coefficients rather than using the numerators
5077
 * of a rational solution.
5078
 *
5079
 * If a fallback solution is being computed, if there are any intra-node
5080
 * dependences, and if requested by the user, then first try
5081
 * to only carry those intra-node dependences.
5082
 * If this fails to carry any dependences, then try again
5083
 * with the inter-node dependences included.
5084
 */
5085
static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
5086
  struct isl_sched_graph *graph, int fallback, int coincidence)
5087
32
{
5088
32
  int n_intra, n_inter;
5089
32
  int n_edge;
5090
32
  struct isl_carry carry = { 0 };
5091
32
  isl_vec *sol;
5092
32
5093
32
  carry.intra = collect_intra_validity(ctx, graph, coincidence,
5094
32
            &carry.lineality);
5095
32
  carry.inter = collect_inter_validity(graph, coincidence,
5096
32
            &carry.lineality);
5097
32
  if (!carry.intra || !carry.inter)
5098
0
    goto error;
5099
32
  n_intra = isl_basic_set_list_n_basic_set(carry.intra);
5100
32
  n_inter = isl_basic_set_list_n_basic_set(carry.inter);
5101
32
5102
32
  if (fallback && 
n_intra > 018
&&
5103
32
      
isl_options_get_schedule_carry_self_first(ctx)14
) {
5104
14
    sol = compute_carrying_sol_coef(ctx, graph, n_intra,
5105
14
        carry.intra, carry.inter, fallback, 0);
5106
14
    if (!sol || sol->size != 0 || 
n_inter == 00
) {
5107
14
      isl_carry_clear(&carry);
5108
14
      return sol;
5109
14
    }
5110
0
    isl_vec_free(sol);
5111
0
  }
5112
32
5113
32
  n_edge = n_intra + n_inter;
5114
18
  if (n_edge == 0) {
5115
0
    isl_carry_clear(&carry);
5116
0
    return isl_vec_alloc(ctx, 0);
5117
0
  }
5118
18
5119
18
  sol = compute_carrying_sol_coef(ctx, graph, n_edge,
5120
18
        carry.intra, carry.inter, fallback, 1);
5121
18
  isl_carry_clear(&carry);
5122
18
  return sol;
5123
0
error:
5124
0
  isl_carry_clear(&carry);
5125
0
  return NULL;
5126
18
}
5127
5128
/* Construct a schedule row for each node such that as many validity dependences
5129
 * as possible are carried and then continue with the next band.
5130
 * If "fallback" is set, then the carrying is performed as a fallback
5131
 * for the Pluto-like scheduler.
5132
 * If "coincidence" is set, then try and carry coincidence edges as well.
5133
 *
5134
 * If there are no validity dependences, then no dependence can be carried and
5135
 * the procedure is guaranteed to fail.  If there is more than one component,
5136
 * then try computing a schedule on each component separately
5137
 * to prevent or at least postpone this failure.
5138
 *
5139
 * If a schedule row is computed, then check that dependences are carried
5140
 * for at least one of the edges.
5141
 *
5142
 * If the computed schedule row turns out to be trivial on one or
5143
 * more nodes where it should not be trivial, then we throw it away
5144
 * and try again on each component separately.
5145
 *
5146
 * If there is only one component, then we accept the schedule row anyway,
5147
 * but we do not consider it as a complete row and therefore do not
5148
 * increment graph->n_row.  Note that the ranks of the nodes that
5149
 * do get a non-trivial schedule part will get updated regardless and
5150
 * graph->maxvar is computed based on these ranks.  The test for
5151
 * whether more schedule rows are required in compute_schedule_wcc
5152
 * is therefore not affected.
5153
 *
5154
 * Insert a band corresponding to the schedule row at position "node"
5155
 * of the schedule tree and continue with the construction of the schedule.
5156
 * This insertion and the continued construction is performed by split_scaled
5157
 * after optionally checking for non-trivial common divisors.
5158
 */
5159
static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node,
5160
  struct isl_sched_graph *graph, int fallback, int coincidence)
5161
32
{
5162
32
  int trivial;
5163
32
  isl_ctx *ctx;
5164
32
  isl_vec *sol;
5165
32
5166
32
  if (!node)
5167
0
    return NULL;
5168
32
5169
32
  ctx = isl_schedule_node_get_ctx(node);
5170
32
  sol = compute_carrying_sol(ctx, graph, fallback, coincidence);
5171
32
  if (!sol)
5172
0
    return isl_schedule_node_free(node);
5173
32
  if (sol->size == 0) {
5174
0
    isl_vec_free(sol);
5175
0
    if (graph->scc > 1)
5176
0
      return compute_component_schedule(node, graph, 1);
5177
0
    isl_die(ctx, isl_error_unknown, "unable to carry dependences",
5178
0
      return isl_schedule_node_free(node));
5179
0
  }
5180
32
5181
32
  trivial = is_any_trivial(graph, sol);
5182
32
  if (trivial < 0) {
5183
0
    sol = isl_vec_free(sol);
5184
32
  } else if (trivial && 
graph->scc > 14
) {
5185
2
    isl_vec_free(sol);
5186
2
    return compute_component_schedule(node, graph, 1);
5187
2
  }
5188
30
5189
30
  if (update_schedule(graph, sol, 0) < 0)
5190
0
    return isl_schedule_node_free(node);
5191
30
  if (trivial)
5192
2
    graph->n_row--;
5193
30
5194
30
  return split_scaled(node, graph);
5195
30
}
5196
5197
/* Construct a schedule row for each node such that as many validity dependences
5198
 * as possible are carried and then continue with the next band.
5199
 * Do so as a fallback for the Pluto-like scheduler.
5200
 * If "coincidence" is set, then try and carry coincidence edges as well.
5201
 */
5202
static __isl_give isl_schedule_node *carry_fallback(
5203
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5204
  int coincidence)
5205
18
{
5206
18
  return carry(node, graph, 1, coincidence);
5207
18
}
5208
5209
/* Construct a schedule row for each node such that as many validity dependences
5210
 * as possible are carried and then continue with the next band.
5211
 * Do so for the case where the Feautrier scheduler was selected
5212
 * by the user.
5213
 */
5214
static __isl_give isl_schedule_node *carry_feautrier(
5215
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5216
14
{
5217
14
  return carry(node, graph, 0, 0);
5218
14
}
5219
5220
/* Construct a schedule row for each node such that as many validity dependences
5221
 * as possible are carried and then continue with the next band.
5222
 * Do so as a fallback for the Pluto-like scheduler.
5223
 */
5224
static __isl_give isl_schedule_node *carry_dependences(
5225
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5226
9
{
5227
9
  return carry_fallback(node, graph, 0);
5228
9
}
5229
5230
/* Construct a schedule row for each node such that as many validity or
5231
 * coincidence dependences as possible are carried and
5232
 * then continue with the next band.
5233
 * Do so as a fallback for the Pluto-like scheduler.
5234
 */
5235
static __isl_give isl_schedule_node *carry_coincidence(
5236
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5237
9
{
5238
9
  return carry_fallback(node, graph, 1);
5239
9
}
5240
5241
/* Topologically sort statements mapped to the same schedule iteration
5242
 * and add insert a sequence node in front of "node"
5243
 * corresponding to this order.
5244
 * If "initialized" is set, then it may be assumed that compute_maxvar
5245
 * has been called on the current band.  Otherwise, call
5246
 * compute_maxvar if and before carry_dependences gets called.
5247
 *
5248
 * If it turns out to be impossible to sort the statements apart,
5249
 * because different dependences impose different orderings
5250
 * on the statements, then we extend the schedule such that
5251
 * it carries at least one more dependence.
5252
 */
5253
static __isl_give isl_schedule_node *sort_statements(
5254
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5255
  int initialized)
5256
171
{
5257
171
  isl_ctx *ctx;
5258
171
  isl_union_set_list *filters;
5259
171
5260
171
  if (!node)
5261
0
    return NULL;
5262
171
5263
171
  ctx = isl_schedule_node_get_ctx(node);
5264
171
  if (graph->n < 1)
5265
171
    
isl_die0
(ctx, isl_error_internal,
5266
171
      "graph should have at least one node",
5267
171
      return isl_schedule_node_free(node));
5268
171
5269
171
  if (graph->n == 1)
5270
153
    return node;
5271
18
5272
18
  if (update_edges(ctx, graph) < 0)
5273
0
    return isl_schedule_node_free(node);
5274
18
5275
18
  if (graph->n_edge == 0)
5276
0
    return node;
5277
18
5278
18
  if (detect_sccs(ctx, graph) < 0)
5279
0
    return isl_schedule_node_free(node);
5280
18
5281
18
  next_band(graph);
5282
18
  if (graph->scc < graph->n) {
5283
2
    if (!initialized && 
compute_maxvar(graph) < 00
)
5284
0
      return isl_schedule_node_free(node);
5285
2
    return carry_dependences(node, graph);
5286
2
  }
5287
16
5288
16
  filters = extract_sccs(ctx, graph);
5289
16
  node = isl_schedule_node_insert_sequence(node, filters);
5290
16
5291
16
  return node;
5292
16
}
5293
5294
/* Are there any (non-empty) (conditional) validity edges in the graph?
5295
 */
5296
static int has_validity_edges(struct isl_sched_graph *graph)
5297
34
{
5298
34
  int i;
5299
34
5300
46
  for (i = 0; i < graph->n_edge; 
++i12
) {
5301
26
    int empty;
5302
26
5303
26
    empty = isl_map_plain_is_empty(graph->edge[i].map);
5304
26
    if (empty < 0)
5305
0
      return -1;
5306
26
    if (empty)
5307
10
      continue;
5308
16
    if (is_any_validity(&graph->edge[i]))
5309
14
      return 1;
5310
16
  }
5311
34
5312
34
  
return 020
;
5313
34
}
5314
5315
/* Should we apply a Feautrier step?
5316
 * That is, did the user request the Feautrier algorithm and are
5317
 * there any validity dependences (left)?
5318
 */
5319
static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
5320
325
{
5321
325
  if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
5322
325
    
return 0291
;
5323
34
5324
34
  return has_validity_edges(graph);
5325
34
}
5326
5327
/* Compute a schedule for a connected dependence graph using Feautrier's
5328
 * multi-dimensional scheduling algorithm and return the updated schedule node.
5329
 *
5330
 * The original algorithm is described in [1].
5331
 * The main idea is to minimize the number of scheduling dimensions, by
5332
 * trying to satisfy as many dependences as possible per scheduling dimension.
5333
 *
5334
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
5335
 *     Problem, Part II: Multi-Dimensional Time.
5336
 *     In Intl. Journal of Parallel Programming, 1992.
5337
 */
5338
static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
5339
  isl_schedule_node *node, struct isl_sched_graph *graph)
5340
14
{
5341
14
  return carry_feautrier(node, graph);
5342
14
}
5343
5344
/* Turn off the "local" bit on all (condition) edges.
5345
 */
5346
static void clear_local_edges(struct isl_sched_graph *graph)
5347
385
{
5348
385
  int i;
5349
385
5350
743
  for (i = 0; i < graph->n_edge; 
++i358
)
5351
358
    if (is_condition(&graph->edge[i]))
5352
44
      clear_local(&graph->edge[i]);
5353
385
}
5354
5355
/* Does "graph" have both condition and conditional validity edges?
5356
 */
5357
static int need_condition_check(struct isl_sched_graph *graph)
5358
385
{
5359
385
  int i;
5360
385
  int any_condition = 0;
5361
385
  int any_conditional_validity = 0;
5362
385
5363
743
  for (i = 0; i < graph->n_edge; 
++i358
) {
5364
358
    if (is_condition(&graph->edge[i]))
5365
44
      any_condition = 1;
5366
358
    if (is_conditional_validity(&graph->edge[i]))
5367
45
      any_conditional_validity = 1;
5368
358
  }
5369
385
5370
385
  return any_condition && 
any_conditional_validity27
;
5371
385
}
5372
5373
/* Does "graph" contain any coincidence edge?
5374
 */
5375
static int has_any_coincidence(struct isl_sched_graph *graph)
5376
385
{
5377
385
  int i;
5378
385
5379
532
  for (i = 0; i < graph->n_edge; 
++i147
)
5380
304
    if (is_coincidence(&graph->edge[i]))
5381
157
      return 1;
5382
385
5383
385
  
return 0228
;
5384
385
}
5385
5386
/* Extract the final schedule row as a map with the iteration domain
5387
 * of "node" as domain.
5388
 */
5389
static __isl_give isl_map *final_row(struct isl_sched_node *node)
5390
142
{
5391
142
  isl_multi_aff *ma;
5392
142
  int row;
5393
142
5394
142
  row = isl_mat_rows(node->sched) - 1;
5395
142
  ma = node_extract_partial_schedule_multi_aff(node, row, 1);
5396
142
  return isl_map_from_multi_aff(ma);
5397
142
}
5398
5399
/* Is the conditional validity dependence in the edge with index "edge_index"
5400
 * violated by the latest (i.e., final) row of the schedule?
5401
 * That is, is i scheduled after j
5402
 * for any conditional validity dependence i -> j?
5403
 */
5404
static int is_violated(struct isl_sched_graph *graph, int edge_index)
5405
71
{
5406
71
  isl_map *src_sched, *dst_sched, *map;
5407
71
  struct isl_sched_edge *edge = &graph->edge[edge_index];
5408
71
  int empty;
5409
71
5410
71
  src_sched = final_row(edge->src);
5411
71
  dst_sched = final_row(edge->dst);
5412
71
  map = isl_map_copy(edge->map);
5413
71
  map = isl_map_apply_domain(map, src_sched);
5414
71
  map = isl_map_apply_range(map, dst_sched);
5415
71
  map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
5416
71
  empty = isl_map_is_empty(map);
5417
71
  isl_map_free(map);
5418
71
5419
71
  if (empty < 0)
5420
0
    return -1;
5421
71
5422
71
  return !empty;
5423
71
}
5424
5425
/* Does "graph" have any satisfied condition edges that
5426
 * are adjacent to the conditional validity constraint with
5427
 * domain "conditional_source" and range "conditional_sink"?
5428
 *