Coverage Report

Created: 2018-10-23 09:19

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/External/isl/isl_scheduler.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2011      INRIA Saclay
3
 * Copyright 2012-2014 Ecole Normale Superieure
4
 * Copyright 2015-2016 Sven Verdoolaege
5
 * Copyright 2016      INRIA Paris
6
 * Copyright 2017      Sven Verdoolaege
7
 *
8
 * Use of this software is governed by the MIT license
9
 *
10
 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
11
 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
12
 * 91893 Orsay, France
13
 * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14
 * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
15
 * CS 42112, 75589 Paris Cedex 12, France
16
 */
17
18
#include <isl_ctx_private.h>
19
#include <isl_map_private.h>
20
#include <isl_space_private.h>
21
#include <isl_aff_private.h>
22
#include <isl/hash.h>
23
#include <isl/id.h>
24
#include <isl/constraint.h>
25
#include <isl/schedule.h>
26
#include <isl_schedule_constraints.h>
27
#include <isl/schedule_node.h>
28
#include <isl_mat_private.h>
29
#include <isl_vec_private.h>
30
#include <isl/set.h>
31
#include <isl_union_set_private.h>
32
#include <isl_seq.h>
33
#include <isl_tab.h>
34
#include <isl_dim_map.h>
35
#include <isl/map_to_basic_set.h>
36
#include <isl_sort.h>
37
#include <isl_options_private.h>
38
#include <isl_tarjan.h>
39
#include <isl_morph.h>
40
#include <isl/ilp.h>
41
#include <isl_val_private.h>
42
43
/*
44
 * The scheduling algorithm implemented in this file was inspired by
45
 * Bondhugula et al., "Automatic Transformations for Communication-Minimized
46
 * Parallelization and Locality Optimization in the Polyhedral Model".
47
 *
48
 * For a detailed description of the variant implemented in isl,
49
 * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017).
50
 */
51
52
53
/* Internal information about a node that is used during the construction
54
 * of a schedule.
55
 * space represents the original space in which the domain lives;
56
 *  that is, the space is not affected by compression
57
 * sched is a matrix representation of the schedule being constructed
58
 *  for this node; if compressed is set, then this schedule is
59
 *  defined over the compressed domain space
60
 * sched_map is an isl_map representation of the same (partial) schedule
61
 *  sched_map may be NULL; if compressed is set, then this map
62
 *  is defined over the uncompressed domain space
63
 * rank is the number of linearly independent rows in the linear part
64
 *  of sched
65
 * the rows of "vmap" represent a change of basis for the node
66
 *  variables; the first rank rows span the linear part of
67
 *  the schedule rows; the remaining rows are linearly independent
68
 * the rows of "indep" represent linear combinations of the schedule
69
 * coefficients that are non-zero when the schedule coefficients are
70
 * linearly independent of previously computed schedule rows.
71
 * start is the first variable in the LP problem in the sequences that
72
 *  represents the schedule coefficients of this node
73
 * nvar is the dimension of the (compressed) domain
74
 * nparam is the number of parameters or 0 if we are not constructing
75
 *  a parametric schedule
76
 *
77
 * If compressed is set, then hull represents the constraints
78
 * that were used to derive the compression, while compress and
79
 * decompress map the original space to the compressed space and
80
 * vice versa.
81
 *
82
 * scc is the index of SCC (or WCC) this node belongs to
83
 *
84
 * "cluster" is only used inside extract_clusters and identifies
85
 * the cluster of SCCs that the node belongs to.
86
 *
87
 * coincident contains a boolean for each of the rows of the schedule,
88
 * indicating whether the corresponding scheduling dimension satisfies
89
 * the coincidence constraints in the sense that the corresponding
90
 * dependence distances are zero.
91
 *
92
 * If the schedule_treat_coalescing option is set, then
93
 * "sizes" contains the sizes of the (compressed) instance set
94
 * in each direction.  If there is no fixed size in a given direction,
95
 * then the corresponding size value is set to infinity.
96
 * If the schedule_treat_coalescing option or the schedule_max_coefficient
97
 * option is set, then "max" contains the maximal values for
98
 * schedule coefficients of the (compressed) variables.  If no bound
99
 * needs to be imposed on a particular variable, then the corresponding
100
 * value is negative.
101
 * If not NULL, then "bounds" contains a non-parametric set
102
 * in the compressed space that is bounded by the size in each direction.
103
 */
104
struct isl_sched_node {
105
  isl_space *space;
106
  int compressed;
107
  isl_set *hull;
108
  isl_multi_aff *compress;
109
  isl_multi_aff *decompress;
110
  isl_mat *sched;
111
  isl_map *sched_map;
112
  int  rank;
113
  isl_mat *indep;
114
  isl_mat *vmap;
115
  int  start;
116
  int  nvar;
117
  int  nparam;
118
119
  int  scc;
120
  int  cluster;
121
122
  int *coincident;
123
124
  isl_multi_val *sizes;
125
  isl_basic_set *bounds;
126
  isl_vec *max;
127
};
128
129
static int node_has_tuples(const void *entry, const void *val)
130
1.71k
{
131
1.71k
  struct isl_sched_node *node = (struct isl_sched_node *)entry;
132
1.71k
  isl_space *space = (isl_space *) val;
133
1.71k
134
1.71k
  return isl_space_has_equal_tuples(node->space, space);
135
1.71k
}
136
137
static int node_scc_exactly(struct isl_sched_node *node, int scc)
138
1.11k
{
139
1.11k
  return node->scc == scc;
140
1.11k
}
141
142
static int node_scc_at_most(struct isl_sched_node *node, int scc)
143
18
{
144
18
  return node->scc <= scc;
145
18
}
146
147
static int node_scc_at_least(struct isl_sched_node *node, int scc)
148
18
{
149
18
  return node->scc >= scc;
150
18
}
151
152
/* An edge in the dependence graph.  An edge may be used to
153
 * ensure validity of the generated schedule, to minimize the dependence
154
 * distance or both
155
 *
156
 * map is the dependence relation, with i -> j in the map if j depends on i
157
 * tagged_condition and tagged_validity contain the union of all tagged
158
 *  condition or conditional validity dependence relations that
159
 *  specialize the dependence relation "map"; that is,
160
 *  if (i -> a) -> (j -> b) is an element of "tagged_condition"
161
 *  or "tagged_validity", then i -> j is an element of "map".
162
 *  If these fields are NULL, then they represent the empty relation.
163
 * src is the source node
164
 * dst is the sink node
165
 *
166
 * types is a bit vector containing the types of this edge.
167
 * validity is set if the edge is used to ensure correctness
168
 * coincidence is used to enforce zero dependence distances
169
 * proximity is set if the edge is used to minimize dependence distances
170
 * condition is set if the edge represents a condition
171
 *  for a conditional validity schedule constraint
172
 * local can only be set for condition edges and indicates that
173
 *  the dependence distance over the edge should be zero
174
 * conditional_validity is set if the edge is used to conditionally
175
 *  ensure correctness
176
 *
177
 * For validity edges, start and end mark the sequence of inequality
178
 * constraints in the LP problem that encode the validity constraint
179
 * corresponding to this edge.
180
 *
181
 * During clustering, an edge may be marked "no_merge" if it should
182
 * not be used to merge clusters.
183
 * The weight is also only used during clustering and it is
184
 * an indication of how many schedule dimensions on either side
185
 * of the schedule constraints can be aligned.
186
 * If the weight is negative, then this means that this edge was postponed
187
 * by has_bounded_distances or any_no_merge.  The original weight can
188
 * be retrieved by adding 1 + graph->max_weight, with "graph"
189
 * the graph containing this edge.
190
 */
191
struct isl_sched_edge {
192
  isl_map *map;
193
  isl_union_map *tagged_condition;
194
  isl_union_map *tagged_validity;
195
196
  struct isl_sched_node *src;
197
  struct isl_sched_node *dst;
198
199
  unsigned types;
200
201
  int start;
202
  int end;
203
204
  int no_merge;
205
  int weight;
206
};
207
208
/* Is "edge" marked as being of type "type"?
209
 */
210
static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
211
12.7k
{
212
12.7k
  return ISL_FL_ISSET(edge->types, 1 << type);
213
12.7k
}
214
215
/* Mark "edge" as being of type "type".
216
 */
217
static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
218
677
{
219
677
  ISL_FL_SET(edge->types, 1 << type);
220
677
}
221
222
/* No longer mark "edge" as being of type "type"?
223
 */
224
static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
225
44
{
226
44
  ISL_FL_CLR(edge->types, 1 << type);
227
44
}
228
229
/* Is "edge" marked as a validity edge?
230
 */
231
static int is_validity(struct isl_sched_edge *edge)
232
3.39k
{
233
3.39k
  return is_type(edge, isl_edge_validity);
234
3.39k
}
235
236
/* Mark "edge" as a validity edge.
237
 */
238
static void set_validity(struct isl_sched_edge *edge)
239
2
{
240
2
  set_type(edge, isl_edge_validity);
241
2
}
242
243
/* Is "edge" marked as a proximity edge?
244
 */
245
static int is_proximity(struct isl_sched_edge *edge)
246
1.78k
{
247
1.78k
  return is_type(edge, isl_edge_proximity);
248
1.78k
}
249
250
/* Is "edge" marked as a local edge?
251
 */
252
static int is_local(struct isl_sched_edge *edge)
253
2.21k
{
254
2.21k
  return is_type(edge, isl_edge_local);
255
2.21k
}
256
257
/* Mark "edge" as a local edge.
258
 */
259
static void set_local(struct isl_sched_edge *edge)
260
15
{
261
15
  set_type(edge, isl_edge_local);
262
15
}
263
264
/* No longer mark "edge" as a local edge.
265
 */
266
static void clear_local(struct isl_sched_edge *edge)
267
44
{
268
44
  clear_type(edge, isl_edge_local);
269
44
}
270
271
/* Is "edge" marked as a coincidence edge?
272
 */
273
static int is_coincidence(struct isl_sched_edge *edge)
274
1.02k
{
275
1.02k
  return is_type(edge, isl_edge_coincidence);
276
1.02k
}
277
278
/* Is "edge" marked as a condition edge?
279
 */
280
static int is_condition(struct isl_sched_edge *edge)
281
2.00k
{
282
2.00k
  return is_type(edge, isl_edge_condition);
283
2.00k
}
284
285
/* Is "edge" marked as a conditional validity edge?
286
 */
287
static int is_conditional_validity(struct isl_sched_edge *edge)
288
1.47k
{
289
1.47k
  return is_type(edge, isl_edge_conditional_validity);
290
1.47k
}
291
292
/* Is "edge" of a type that can appear multiple times between
293
 * the same pair of nodes?
294
 *
295
 * Condition edges and conditional validity edges may have tagged
296
 * dependence relations, in which case an edge is added for each
297
 * pair of tags.
298
 */
299
static int is_multi_edge_type(struct isl_sched_edge *edge)
300
110
{
301
110
  return is_condition(edge) || 
is_conditional_validity(edge)87
;
302
110
}
303
304
/* Internal information about the dependence graph used during
305
 * the construction of the schedule.
306
 *
307
 * intra_hmap is a cache, mapping dependence relations to their dual,
308
 *  for dependences from a node to itself, possibly without
309
 *  coefficients for the parameters
310
 * intra_hmap_param is a cache, mapping dependence relations to their dual,
311
 *  for dependences from a node to itself, including coefficients
312
 *  for the parameters
313
 * inter_hmap is a cache, mapping dependence relations to their dual,
314
 *  for dependences between distinct nodes
315
 * if compression is involved then the key for these maps
316
 * is the original, uncompressed dependence relation, while
317
 * the value is the dual of the compressed dependence relation.
318
 *
319
 * n is the number of nodes
320
 * node is the list of nodes
321
 * maxvar is the maximal number of variables over all nodes
322
 * max_row is the allocated number of rows in the schedule
323
 * n_row is the current (maximal) number of linearly independent
324
 *  rows in the node schedules
325
 * n_total_row is the current number of rows in the node schedules
326
 * band_start is the starting row in the node schedules of the current band
327
 * root is set to the original dependence graph from which this graph
328
 *  is derived through splitting.  If this graph is not the result of
329
 *  splitting, then the root field points to the graph itself.
330
 *
331
 * sorted contains a list of node indices sorted according to the
332
 *  SCC to which a node belongs
333
 *
334
 * n_edge is the number of edges
335
 * edge is the list of edges
336
 * max_edge contains the maximal number of edges of each type;
337
 *  in particular, it contains the number of edges in the inital graph.
338
 * edge_table contains pointers into the edge array, hashed on the source
339
 *  and sink spaces; there is one such table for each type;
340
 *  a given edge may be referenced from more than one table
341
 *  if the corresponding relation appears in more than one of the
342
 *  sets of dependences; however, for each type there is only
343
 *  a single edge between a given pair of source and sink space
344
 *  in the entire graph
345
 *
346
 * node_table contains pointers into the node array, hashed on the space tuples
347
 *
348
 * region contains a list of variable sequences that should be non-trivial
349
 *
350
 * lp contains the (I)LP problem used to obtain new schedule rows
351
 *
352
 * src_scc and dst_scc are the source and sink SCCs of an edge with
353
 *  conflicting constraints
354
 *
355
 * scc represents the number of components
356
 * weak is set if the components are weakly connected
357
 *
358
 * max_weight is used during clustering and represents the maximal
359
 * weight of the relevant proximity edges.
360
 */
361
struct isl_sched_graph {
362
  isl_map_to_basic_set *intra_hmap;
363
  isl_map_to_basic_set *intra_hmap_param;
364
  isl_map_to_basic_set *inter_hmap;
365
366
  struct isl_sched_node *node;
367
  int n;
368
  int maxvar;
369
  int max_row;
370
  int n_row;
371
372
  int *sorted;
373
374
  int n_total_row;
375
  int band_start;
376
377
  struct isl_sched_graph *root;
378
379
  struct isl_sched_edge *edge;
380
  int n_edge;
381
  int max_edge[isl_edge_last + 1];
382
  struct isl_hash_table *edge_table[isl_edge_last + 1];
383
384
  struct isl_hash_table *node_table;
385
  struct isl_trivial_region *region;
386
387
  isl_basic_set *lp;
388
389
  int src_scc;
390
  int dst_scc;
391
392
  int scc;
393
  int weak;
394
395
  int max_weight;
396
};
397
398
/* Initialize node_table based on the list of nodes.
399
 */
400
static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
401
349
{
402
349
  int i;
403
349
404
349
  graph->node_table = isl_hash_table_alloc(ctx, graph->n);
405
349
  if (!graph->node_table)
406
0
    return -1;
407
349
408
855
  
for (i = 0; 349
i < graph->n;
++i506
) {
409
506
    struct isl_hash_table_entry *entry;
410
506
    uint32_t hash;
411
506
412
506
    hash = isl_space_get_tuple_hash(graph->node[i].space);
413
506
    entry = isl_hash_table_find(ctx, graph->node_table, hash,
414
506
              &node_has_tuples,
415
506
              graph->node[i].space, 1);
416
506
    if (!entry)
417
0
      return -1;
418
506
    entry->data = &graph->node[i];
419
506
  }
420
349
421
349
  return 0;
422
349
}
423
424
/* Return a pointer to the node that lives within the given space,
425
 * an invalid node if there is no such node, or NULL in case of error.
426
 */
427
static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
428
  struct isl_sched_graph *graph, __isl_keep isl_space *space)
429
1.74k
{
430
1.74k
  struct isl_hash_table_entry *entry;
431
1.74k
  uint32_t hash;
432
1.74k
433
1.74k
  if (!space)
434
0
    return NULL;
435
1.74k
436
1.74k
  hash = isl_space_get_tuple_hash(space);
437
1.74k
  entry = isl_hash_table_find(ctx, graph->node_table, hash,
438
1.74k
            &node_has_tuples, space, 0);
439
1.74k
440
1.74k
  return entry ? 
entry->data1.71k
:
graph->node + graph->n24
;
441
1.74k
}
442
443
/* Is "node" a node in "graph"?
444
 */
445
static int is_node(struct isl_sched_graph *graph,
446
  struct isl_sched_node *node)
447
1.74k
{
448
1.74k
  return node && node >= &graph->node[0] && node < &graph->node[graph->n];
449
1.74k
}
450
451
static int edge_has_src_and_dst(const void *entry, const void *val)
452
1.21k
{
453
1.21k
  const struct isl_sched_edge *edge = entry;
454
1.21k
  const struct isl_sched_edge *temp = val;
455
1.21k
456
1.21k
  return edge->src == temp->src && edge->dst == temp->dst;
457
1.21k
}
458
459
/* Add the given edge to graph->edge_table[type].
460
 */
461
static isl_stat graph_edge_table_add(isl_ctx *ctx,
462
  struct isl_sched_graph *graph, enum isl_edge_type type,
463
  struct isl_sched_edge *edge)
464
947
{
465
947
  struct isl_hash_table_entry *entry;
466
947
  uint32_t hash;
467
947
468
947
  hash = isl_hash_init();
469
947
  hash = isl_hash_builtin(hash, edge->src);
470
947
  hash = isl_hash_builtin(hash, edge->dst);
471
947
  entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
472
947
            &edge_has_src_and_dst, edge, 1);
473
947
  if (!entry)
474
0
    return isl_stat_error;
475
947
  entry->data = edge;
476
947
477
947
  return isl_stat_ok;
478
947
}
479
480
/* Add "edge" to all relevant edge tables.
481
 * That is, for every type of the edge, add it to the corresponding table.
482
 */
483
static isl_stat graph_edge_tables_add(isl_ctx *ctx,
484
  struct isl_sched_graph *graph, struct isl_sched_edge *edge)
485
115
{
486
115
  enum isl_edge_type t;
487
115
488
690
  for (t = isl_edge_first; t <= isl_edge_last; 
++t575
) {
489
575
    if (!is_type(edge, t))
490
288
      continue;
491
287
    if (graph_edge_table_add(ctx, graph, t, edge) < 0)
492
0
      return isl_stat_error;
493
287
  }
494
115
495
115
  return isl_stat_ok;
496
115
}
497
498
/* Allocate the edge_tables based on the maximal number of edges of
499
 * each type.
500
 */
501
static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
502
349
{
503
349
  int i;
504
349
505
2.09k
  for (i = 0; i <= isl_edge_last; 
++i1.74k
) {
506
1.74k
    graph->edge_table[i] = isl_hash_table_alloc(ctx,
507
1.74k
                  graph->max_edge[i]);
508
1.74k
    if (!graph->edge_table[i])
509
0
      return -1;
510
1.74k
  }
511
349
512
349
  return 0;
513
349
}
514
515
/* If graph->edge_table[type] contains an edge from the given source
516
 * to the given destination, then return the hash table entry of this edge.
517
 * Otherwise, return NULL.
518
 */
519
static struct isl_hash_table_entry *graph_find_edge_entry(
520
  struct isl_sched_graph *graph,
521
  enum isl_edge_type type,
522
  struct isl_sched_node *src, struct isl_sched_node *dst)
523
4.79k
{
524
4.79k
  isl_ctx *ctx = isl_space_get_ctx(src->space);
525
4.79k
  uint32_t hash;
526
4.79k
  struct isl_sched_edge temp = { .src = src, .dst = dst };
527
4.79k
528
4.79k
  hash = isl_hash_init();
529
4.79k
  hash = isl_hash_builtin(hash, temp.src);
530
4.79k
  hash = isl_hash_builtin(hash, temp.dst);
531
4.79k
  return isl_hash_table_find(ctx, graph->edge_table[type], hash,
532
4.79k
            &edge_has_src_and_dst, &temp, 0);
533
4.79k
}
534
535
536
/* If graph->edge_table[type] contains an edge from the given source
537
 * to the given destination, then return this edge.
538
 * Otherwise, return NULL.
539
 */
540
static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
541
  enum isl_edge_type type,
542
  struct isl_sched_node *src, struct isl_sched_node *dst)
543
3.95k
{
544
3.95k
  struct isl_hash_table_entry *entry;
545
3.95k
546
3.95k
  entry = graph_find_edge_entry(graph, type, src, dst);
547
3.95k
  if (!entry)
548
3.14k
    return NULL;
549
802
550
802
  return entry->data;
551
802
}
552
553
/* Check whether the dependence graph has an edge of the given type
554
 * between the given two nodes.
555
 */
556
static isl_bool graph_has_edge(struct isl_sched_graph *graph,
557
  enum isl_edge_type type,
558
  struct isl_sched_node *src, struct isl_sched_node *dst)
559
2.23k
{
560
2.23k
  struct isl_sched_edge *edge;
561
2.23k
  isl_bool empty;
562
2.23k
563
2.23k
  edge = graph_find_edge(graph, type, src, dst);
564
2.23k
  if (!edge)
565
1.84k
    return isl_bool_false;
566
389
567
389
  empty = isl_map_plain_is_empty(edge->map);
568
389
  if (empty < 0)
569
0
    return isl_bool_error;
570
389
571
389
  return !empty;
572
389
}
573
574
/* Look for any edge with the same src, dst and map fields as "model".
575
 *
576
 * Return the matching edge if one can be found.
577
 * Return "model" if no matching edge is found.
578
 * Return NULL on error.
579
 */
580
static struct isl_sched_edge *graph_find_matching_edge(
581
  struct isl_sched_graph *graph, struct isl_sched_edge *model)
582
660
{
583
660
  enum isl_edge_type i;
584
660
  struct isl_sched_edge *edge;
585
660
586
1.98k
  for (i = isl_edge_first; i <= isl_edge_last; 
++i1.32k
) {
587
1.72k
    int is_equal;
588
1.72k
589
1.72k
    edge = graph_find_edge(graph, i, model->src, model->dst);
590
1.72k
    if (!edge)
591
1.30k
      continue;
592
413
    is_equal = isl_map_plain_is_equal(model->map, edge->map);
593
413
    if (is_equal < 0)
594
0
      return NULL;
595
413
    if (is_equal)
596
395
      return edge;
597
413
  }
598
660
599
660
  
return model265
;
600
660
}
601
602
/* Remove the given edge from all the edge_tables that refer to it.
603
 */
604
static void graph_remove_edge(struct isl_sched_graph *graph,
605
  struct isl_sched_edge *edge)
606
168
{
607
168
  isl_ctx *ctx = isl_map_get_ctx(edge->map);
608
168
  enum isl_edge_type i;
609
168
610
1.00k
  for (i = isl_edge_first; i <= isl_edge_last; 
++i840
) {
611
840
    struct isl_hash_table_entry *entry;
612
840
613
840
    entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
614
840
    if (!entry)
615
501
      continue;
616
339
    if (entry->data != edge)
617
9
      continue;
618
330
    isl_hash_table_remove(ctx, graph->edge_table[i], entry);
619
330
  }
620
168
}
621
622
/* Check whether the dependence graph has any edge
623
 * between the given two nodes.
624
 */
625
static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
626
  struct isl_sched_node *src, struct isl_sched_node *dst)
627
483
{
628
483
  enum isl_edge_type i;
629
483
  isl_bool r;
630
483
631
1.80k
  for (i = isl_edge_first; i <= isl_edge_last; 
++i1.32k
) {
632
1.56k
    r = graph_has_edge(graph, i, src, dst);
633
1.56k
    if (r < 0 || r)
634
244
      return r;
635
1.56k
  }
636
483
637
483
  
return r239
;
638
483
}
639
640
/* Check whether the dependence graph has a validity edge
641
 * between the given two nodes.
642
 *
643
 * Conditional validity edges are essentially validity edges that
644
 * can be ignored if the corresponding condition edges are iteration private.
645
 * Here, we are only checking for the presence of validity
646
 * edges, so we need to consider the conditional validity edges too.
647
 * In particular, this function is used during the detection
648
 * of strongly connected components and we cannot ignore
649
 * conditional validity edges during this detection.
650
 */
651
static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
652
  struct isl_sched_node *src, struct isl_sched_node *dst)
653
395
{
654
395
  isl_bool r;
655
395
656
395
  r = graph_has_edge(graph, isl_edge_validity, src, dst);
657
395
  if (r < 0 || r)
658
126
    return r;
659
269
660
269
  return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
661
269
}
662
663
/* Perform all the required memory allocations for a schedule graph "graph"
664
 * with "n_node" nodes and "n_edge" edge and initialize the corresponding
665
 * fields.
666
 */
667
static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
668
  int n_node, int n_edge)
669
349
{
670
349
  int i;
671
349
672
349
  graph->n = n_node;
673
349
  graph->n_edge = n_edge;
674
349
  graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
675
349
  graph->sorted = isl_calloc_array(ctx, int, graph->n);
676
349
  graph->region = isl_alloc_array(ctx,
677
349
          struct isl_trivial_region, graph->n);
678
349
  graph->edge = isl_calloc_array(ctx,
679
349
          struct isl_sched_edge, graph->n_edge);
680
349
681
349
  graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
682
349
  graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
683
349
  graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
684
349
685
349
  if (!graph->node || !graph->region || (graph->n_edge && 
!graph->edge199
) ||
686
349
      !graph->sorted)
687
0
    return isl_stat_error;
688
349
689
857
  
for(i = 0; 349
i < graph->n;
++i508
)
690
508
    graph->sorted[i] = i;
691
349
692
349
  return isl_stat_ok;
693
349
}
694
695
/* Free the memory associated to node "node" in "graph".
696
 * The "coincident" field is shared by nodes in a graph and its subgraph.
697
 * It therefore only needs to be freed for the original dependence graph,
698
 * i.e., one that is not the result of splitting.
699
 */
700
static void clear_node(struct isl_sched_graph *graph,
701
  struct isl_sched_node *node)
702
506
{
703
506
  isl_space_free(node->space);
704
506
  isl_set_free(node->hull);
705
506
  isl_multi_aff_free(node->compress);
706
506
  isl_multi_aff_free(node->decompress);
707
506
  isl_mat_free(node->sched);
708
506
  isl_map_free(node->sched_map);
709
506
  isl_mat_free(node->indep);
710
506
  isl_mat_free(node->vmap);
711
506
  if (graph->root == graph)
712
280
    free(node->coincident);
713
506
  isl_multi_val_free(node->sizes);
714
506
  isl_basic_set_free(node->bounds);
715
506
  isl_vec_free(node->max);
716
506
}
717
718
static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
719
367
{
720
367
  int i;
721
367
722
367
  isl_map_to_basic_set_free(graph->intra_hmap);
723
367
  isl_map_to_basic_set_free(graph->intra_hmap_param);
724
367
  isl_map_to_basic_set_free(graph->inter_hmap);
725
367
726
367
  if (graph->node)
727
855
    
for (i = 0; 349
i < graph->n;
++i506
)
728
506
      clear_node(graph, &graph->node[i]);
729
367
  free(graph->node);
730
367
  free(graph->sorted);
731
367
  if (graph->edge)
732
696
    
for (i = 0; 349
i < graph->n_edge;
++i347
) {
733
347
      isl_map_free(graph->edge[i].map);
734
347
      isl_union_map_free(graph->edge[i].tagged_condition);
735
347
      isl_union_map_free(graph->edge[i].tagged_validity);
736
347
    }
737
367
  free(graph->edge);
738
367
  free(graph->region);
739
2.20k
  for (i = 0; i <= isl_edge_last; 
++i1.83k
)
740
1.83k
    isl_hash_table_free(ctx, graph->edge_table[i]);
741
367
  isl_hash_table_free(ctx, graph->node_table);
742
367
  isl_basic_set_free(graph->lp);
743
367
}
744
745
/* For each "set" on which this function is called, increment
746
 * graph->n by one and update graph->maxvar.
747
 */
748
static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
749
282
{
750
282
  struct isl_sched_graph *graph = user;
751
282
  int nvar = isl_set_dim(set, isl_dim_set);
752
282
753
282
  graph->n++;
754
282
  if (nvar > graph->maxvar)
755
156
    graph->maxvar = nvar;
756
282
757
282
  isl_set_free(set);
758
282
759
282
  return isl_stat_ok;
760
282
}
761
762
/* Compute the number of rows that should be allocated for the schedule.
763
 * In particular, we need one row for each variable or one row
764
 * for each basic map in the dependences.
765
 * Note that it is practically impossible to exhaust both
766
 * the number of dependences and the number of variables.
767
 */
768
static isl_stat compute_max_row(struct isl_sched_graph *graph,
769
  __isl_keep isl_schedule_constraints *sc)
770
153
{
771
153
  int n_edge;
772
153
  isl_stat r;
773
153
  isl_union_set *domain;
774
153
775
153
  graph->n = 0;
776
153
  graph->maxvar = 0;
777
153
  domain = isl_schedule_constraints_get_domain(sc);
778
153
  r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
779
153
  isl_union_set_free(domain);
780
153
  if (r < 0)
781
0
    return isl_stat_error;
782
153
  n_edge = isl_schedule_constraints_n_basic_map(sc);
783
153
  if (n_edge < 0)
784
0
    return isl_stat_error;
785
153
  graph->max_row = n_edge + graph->maxvar;
786
153
787
153
  return isl_stat_ok;
788
153
}
789
790
/* Does "bset" have any defining equalities for its set variables?
791
 */
792
static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
793
280
{
794
280
  int i, n;
795
280
796
280
  if (!bset)
797
0
    return isl_bool_error;
798
280
799
280
  n = isl_basic_set_dim(bset, isl_dim_set);
800
653
  for (i = 0; i < n; 
++i373
) {
801
381
    isl_bool has;
802
381
803
381
    has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
804
381
              NULL);
805
381
    if (has < 0 || has)
806
8
      return has;
807
381
  }
808
280
809
280
  
return isl_bool_false272
;
810
280
}
811
812
/* Set the entries of node->max to the value of the schedule_max_coefficient
813
 * option, if set.
814
 */
815
static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
816
4
{
817
4
  int max;
818
4
819
4
  max = isl_options_get_schedule_max_coefficient(ctx);
820
4
  if (max == -1)
821
4
    return isl_stat_ok;
822
0
823
0
  node->max = isl_vec_alloc(ctx, node->nvar);
824
0
  node->max = isl_vec_set_si(node->max, max);
825
0
  if (!node->max)
826
0
    return isl_stat_error;
827
0
828
0
  return isl_stat_ok;
829
0
}
830
831
/* Set the entries of node->max to the minimum of the schedule_max_coefficient
832
 * option (if set) and half of the minimum of the sizes in the other
833
 * dimensions.  Round up when computing the half such that
834
 * if the minimum of the sizes is one, half of the size is taken to be one
835
 * rather than zero.
836
 * If the global minimum is unbounded (i.e., if both
837
 * the schedule_max_coefficient is not set and the sizes in the other
838
 * dimensions are unbounded), then store a negative value.
839
 * If the schedule coefficient is close to the size of the instance set
840
 * in another dimension, then the schedule may represent a loop
841
 * coalescing transformation (especially if the coefficient
842
 * in that other dimension is one).  Forcing the coefficient to be
843
 * smaller than or equal to half the minimal size should avoid this
844
 * situation.
845
 */
846
static isl_stat compute_max_coefficient(isl_ctx *ctx,
847
  struct isl_sched_node *node)
848
276
{
849
276
  int max;
850
276
  int i, j;
851
276
  isl_vec *v;
852
276
853
276
  max = isl_options_get_schedule_max_coefficient(ctx);
854
276
  v = isl_vec_alloc(ctx, node->nvar);
855
276
  if (!v)
856
0
    return isl_stat_error;
857
276
858
639
  
for (i = 0; 276
i < node->nvar;
++i363
) {
859
363
    isl_int_set_si(v->el[i], max);
860
363
    isl_int_mul_si(v->el[i], v->el[i], 2);
861
363
  }
862
276
863
639
  for (i = 0; i < node->nvar; 
++i363
) {
864
363
    isl_val *size;
865
363
866
363
    size = isl_multi_val_get_val(node->sizes, i);
867
363
    if (!size)
868
0
      goto error;
869
363
    if (!isl_val_is_int(size)) {
870
207
      isl_val_free(size);
871
207
      continue;
872
207
    }
873
497
    
for (j = 0; 156
j < node->nvar;
++j341
) {
874
341
      if (j == i)
875
156
        continue;
876
185
      if (isl_int_is_neg(v->el[j]) ||
877
185
          
isl_int_gt163
(v->el[j], size->n))
878
185
        
isl_int_set24
(v->el[j], size->n);
879
185
    }
880
156
    isl_val_free(size);
881
156
  }
882
276
883
639
  
for (i = 0; 276
i < node->nvar;
++i363
)
884
363
    isl_int_cdiv_q_ui(v->el[i], v->el[i], 2);
885
276
886
276
  node->max = v;
887
276
  return isl_stat_ok;
888
0
error:
889
0
  isl_vec_free(v);
890
0
  return isl_stat_error;
891
276
}
892
893
/* Compute and return the size of "set" in dimension "dim".
894
 * The size is taken to be the difference in values for that variable
895
 * for fixed values of the other variables.
896
 * This assumes that "set" is convex.
897
 * In particular, the variable is first isolated from the other variables
898
 * in the range of a map
899
 *
900
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
901
 *
902
 * and then duplicated
903
 *
904
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
905
 *
906
 * The shared variables are then projected out and the maximal value
907
 * of i_dim' - i_dim is computed.
908
 */
909
static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
910
363
{
911
363
  isl_map *map;
912
363
  isl_local_space *ls;
913
363
  isl_aff *obj;
914
363
  isl_val *v;
915
363
916
363
  map = isl_set_project_onto_map(set, isl_dim_set, dim, 1);
917
363
  map = isl_map_project_out(map, isl_dim_in, dim, 1);
918
363
  map = isl_map_range_product(map, isl_map_copy(map));
919
363
  map = isl_set_unwrap(isl_map_range(map));
920
363
  set = isl_map_deltas(map);
921
363
  ls = isl_local_space_from_space(isl_set_get_space(set));
922
363
  obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
923
363
  v = isl_set_max_val(set, obj);
924
363
  isl_aff_free(obj);
925
363
  isl_set_free(set);
926
363
927
363
  return v;
928
363
}
929
930
/* Compute the size of the instance set "set" of "node", after compression,
931
 * as well as bounds on the corresponding coefficients, if needed.
932
 *
933
 * The sizes are needed when the schedule_treat_coalescing option is set.
934
 * The bounds are needed when the schedule_treat_coalescing option or
935
 * the schedule_max_coefficient option is set.
936
 *
937
 * If the schedule_treat_coalescing option is not set, then at most
938
 * the bounds need to be set and this is done in set_max_coefficient.
939
 * Otherwise, compress the domain if needed, compute the size
940
 * in each direction and store the results in node->size.
941
 * If the domain is not convex, then the sizes are computed
942
 * on a convex superset in order to avoid picking up sizes
943
 * that are valid for the individual disjuncts, but not for
944
 * the domain as a whole.
945
 * Finally, set the bounds on the coefficients based on the sizes
946
 * and the schedule_max_coefficient option in compute_max_coefficient.
947
 */
948
static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
949
  __isl_take isl_set *set)
950
280
{
951
280
  int j, n;
952
280
  isl_multi_val *mv;
953
280
954
280
  if (!isl_options_get_schedule_treat_coalescing(ctx)) {
955
4
    isl_set_free(set);
956
4
    return set_max_coefficient(ctx, node);
957
4
  }
958
276
959
276
  if (node->compressed)
960
8
    set = isl_set_preimage_multi_aff(set,
961
8
          isl_multi_aff_copy(node->decompress));
962
276
  set = isl_set_from_basic_set(isl_set_simple_hull(set));
963
276
  mv = isl_multi_val_zero(isl_set_get_space(set));
964
276
  n = isl_set_dim(set, isl_dim_set);
965
639
  for (j = 0; j < n; 
++j363
) {
966
363
    isl_val *v;
967
363
968
363
    v = compute_size(isl_set_copy(set), j);
969
363
    mv = isl_multi_val_set_val(mv, j, v);
970
363
  }
971
276
  node->sizes = mv;
972
276
  isl_set_free(set);
973
276
  if (!node->sizes)
974
0
    return isl_stat_error;
975
276
  return compute_max_coefficient(ctx, node);
976
276
}
977
978
/* Add a new node to the graph representing the given instance set.
979
 * "nvar" is the (possibly compressed) number of variables and
980
 * may be smaller than then number of set variables in "set"
981
 * if "compressed" is set.
982
 * If "compressed" is set, then "hull" represents the constraints
983
 * that were used to derive the compression, while "compress" and
984
 * "decompress" map the original space to the compressed space and
985
 * vice versa.
986
 * If "compressed" is not set, then "hull", "compress" and "decompress"
987
 * should be NULL.
988
 *
989
 * Compute the size of the instance set and bounds on the coefficients,
990
 * if needed.
991
 */
992
static isl_stat add_node(struct isl_sched_graph *graph,
993
  __isl_take isl_set *set, int nvar, int compressed,
994
  __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
995
  __isl_take isl_multi_aff *decompress)
996
280
{
997
280
  int nparam;
998
280
  isl_ctx *ctx;
999
280
  isl_mat *sched;
1000
280
  isl_space *space;
1001
280
  int *coincident;
1002
280
  struct isl_sched_node *node;
1003
280
1004
280
  if (!set)
1005
0
    return isl_stat_error;
1006
280
1007
280
  ctx = isl_set_get_ctx(set);
1008
280
  nparam = isl_set_dim(set, isl_dim_param);
1009
280
  if (!ctx->opt->schedule_parametric)
1010
28
    nparam = 0;
1011
280
  sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
1012
280
  node = &graph->node[graph->n];
1013
280
  graph->n++;
1014
280
  space = isl_set_get_space(set);
1015
280
  node->space = space;
1016
280
  node->nvar = nvar;
1017
280
  node->nparam = nparam;
1018
280
  node->sched = sched;
1019
280
  node->sched_map = NULL;
1020
280
  coincident = isl_calloc_array(ctx, int, graph->max_row);
1021
280
  node->coincident = coincident;
1022
280
  node->compressed = compressed;
1023
280
  node->hull = hull;
1024
280
  node->compress = compress;
1025
280
  node->decompress = decompress;
1026
280
  if (compute_sizes_and_max(ctx, node, set) < 0)
1027
0
    return isl_stat_error;
1028
280
1029
280
  if (!space || !sched || (graph->max_row && 
!coincident278
))
1030
0
    return isl_stat_error;
1031
280
  if (compressed && 
(8
!hull8
||
!compress8
||
!decompress8
))
1032
0
    return isl_stat_error;
1033
280
1034
280
  return isl_stat_ok;
1035
280
}
1036
1037
/* Construct an identifier for node "node", which will represent "set".
1038
 * The name of the identifier is either "compressed" or
1039
 * "compressed_<name>", with <name> the name of the space of "set".
1040
 * The user pointer of the identifier points to "node".
1041
 */
1042
static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
1043
  struct isl_sched_node *node)
1044
8
{
1045
8
  isl_bool has_name;
1046
8
  isl_ctx *ctx;
1047
8
  isl_id *id;
1048
8
  isl_printer *p;
1049
8
  const char *name;
1050
8
  char *id_name;
1051
8
1052
8
  has_name = isl_set_has_tuple_name(set);
1053
8
  if (has_name < 0)
1054
0
    return NULL;
1055
8
1056
8
  ctx = isl_set_get_ctx(set);
1057
8
  if (!has_name)
1058
0
    return isl_id_alloc(ctx, "compressed", node);
1059
8
1060
8
  p = isl_printer_to_str(ctx);
1061
8
  name = isl_set_get_tuple_name(set);
1062
8
  p = isl_printer_print_str(p, "compressed_");
1063
8
  p = isl_printer_print_str(p, name);
1064
8
  id_name = isl_printer_get_str(p);
1065
8
  isl_printer_free(p);
1066
8
1067
8
  id = isl_id_alloc(ctx, id_name, node);
1068
8
  free(id_name);
1069
8
1070
8
  return id;
1071
8
}
1072
1073
/* Add a new node to the graph representing the given set.
1074
 *
1075
 * If any of the set variables is defined by an equality, then
1076
 * we perform variable compression such that we can perform
1077
 * the scheduling on the compressed domain.
1078
 * In this case, an identifier is used that references the new node
1079
 * such that each compressed space is unique and
1080
 * such that the node can be recovered from the compressed space.
1081
 */
1082
static isl_stat extract_node(__isl_take isl_set *set, void *user)
1083
280
{
1084
280
  int nvar;
1085
280
  isl_bool has_equality;
1086
280
  isl_id *id;
1087
280
  isl_basic_set *hull;
1088
280
  isl_set *hull_set;
1089
280
  isl_morph *morph;
1090
280
  isl_multi_aff *compress, *decompress;
1091
280
  struct isl_sched_graph *graph = user;
1092
280
1093
280
  hull = isl_set_affine_hull(isl_set_copy(set));
1094
280
  hull = isl_basic_set_remove_divs(hull);
1095
280
  nvar = isl_set_dim(set, isl_dim_set);
1096
280
  has_equality = has_any_defining_equality(hull);
1097
280
1098
280
  if (has_equality < 0)
1099
0
    goto error;
1100
280
  if (!has_equality) {
1101
272
    isl_basic_set_free(hull);
1102
272
    return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
1103
272
  }
1104
8
1105
8
  id = construct_compressed_id(set, &graph->node[graph->n]);
1106
8
  morph = isl_basic_set_variable_compression_with_id(hull,
1107
8
                  isl_dim_set, id);
1108
8
  isl_id_free(id);
1109
8
  nvar = isl_morph_ran_dim(morph, isl_dim_set);
1110
8
  compress = isl_morph_get_var_multi_aff(morph);
1111
8
  morph = isl_morph_inverse(morph);
1112
8
  decompress = isl_morph_get_var_multi_aff(morph);
1113
8
  isl_morph_free(morph);
1114
8
1115
8
  hull_set = isl_set_from_basic_set(hull);
1116
8
  return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
1117
0
error:
1118
0
  isl_basic_set_free(hull);
1119
0
  isl_set_free(set);
1120
0
  return isl_stat_error;
1121
8
}
1122
1123
struct isl_extract_edge_data {
1124
  enum isl_edge_type type;
1125
  struct isl_sched_graph *graph;
1126
};
1127
1128
/* Merge edge2 into edge1, freeing the contents of edge2.
1129
 * Return 0 on success and -1 on failure.
1130
 *
1131
 * edge1 and edge2 are assumed to have the same value for the map field.
1132
 */
1133
static int merge_edge(struct isl_sched_edge *edge1,
1134
  struct isl_sched_edge *edge2)
1135
395
{
1136
395
  edge1->types |= edge2->types;
1137
395
  isl_map_free(edge2->map);
1138
395
1139
395
  if (is_condition(edge2)) {
1140
23
    if (!edge1->tagged_condition)
1141
23
      edge1->tagged_condition = edge2->tagged_condition;
1142
0
    else
1143
0
      edge1->tagged_condition =
1144
0
        isl_union_map_union(edge1->tagged_condition,
1145
0
                edge2->tagged_condition);
1146
23
  }
1147
395
1148
395
  if (is_conditional_validity(edge2)) {
1149
3
    if (!edge1->tagged_validity)
1150
3
      edge1->tagged_validity = edge2->tagged_validity;
1151
0
    else
1152
0
      edge1->tagged_validity =
1153
0
        isl_union_map_union(edge1->tagged_validity,
1154
0
                edge2->tagged_validity);
1155
3
  }
1156
395
1157
395
  if (is_condition(edge2) && 
!edge1->tagged_condition23
)
1158
0
    return -1;
1159
395
  if (is_conditional_validity(edge2) && 
!edge1->tagged_validity3
)
1160
0
    return -1;
1161
395
1162
395
  return 0;
1163
395
}
1164
1165
/* Insert dummy tags in domain and range of "map".
1166
 *
1167
 * In particular, if "map" is of the form
1168
 *
1169
 *  A -> B
1170
 *
1171
 * then return
1172
 *
1173
 *  [A -> dummy_tag] -> [B -> dummy_tag]
1174
 *
1175
 * where the dummy_tags are identical and equal to any dummy tags
1176
 * introduced by any other call to this function.
1177
 */
1178
static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1179
12
{
1180
12
  static char dummy;
1181
12
  isl_ctx *ctx;
1182
12
  isl_id *id;
1183
12
  isl_space *space;
1184
12
  isl_set *domain, *range;
1185
12
1186
12
  ctx = isl_map_get_ctx(map);
1187
12
1188
12
  id = isl_id_alloc(ctx, NULL, &dummy);
1189
12
  space = isl_space_params(isl_map_get_space(map));
1190
12
  space = isl_space_set_from_params(space);
1191
12
  space = isl_space_set_tuple_id(space, isl_dim_set, id);
1192
12
  space = isl_space_map_from_set(space);
1193
12
1194
12
  domain = isl_map_wrap(map);
1195
12
  range = isl_map_wrap(isl_map_universe(space));
1196
12
  map = isl_map_from_domain_and_range(domain, range);
1197
12
  map = isl_map_zip(map);
1198
12
1199
12
  return map;
1200
12
}
1201
1202
/* Given that at least one of "src" or "dst" is compressed, return
1203
 * a map between the spaces of these nodes restricted to the affine
1204
 * hull that was used in the compression.
1205
 */
1206
static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1207
  struct isl_sched_node *dst)
1208
22
{
1209
22
  isl_set *dom, *ran;
1210
22
1211
22
  if (src->compressed)
1212
14
    dom = isl_set_copy(src->hull);
1213
8
  else
1214
8
    dom = isl_set_universe(isl_space_copy(src->space));
1215
22
  if (dst->compressed)
1216
14
    ran = isl_set_copy(dst->hull);
1217
8
  else
1218
8
    ran = isl_set_universe(isl_space_copy(dst->space));
1219
22
1220
22
  return isl_map_from_domain_and_range(dom, ran);
1221
22
}
1222
1223
/* Intersect the domains of the nested relations in domain and range
1224
 * of "tagged" with "map".
1225
 */
1226
static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1227
  __isl_keep isl_map *map)
1228
14
{
1229
14
  isl_set *set;
1230
14
1231
14
  tagged = isl_map_zip(tagged);
1232
14
  set = isl_map_wrap(isl_map_copy(map));
1233
14
  tagged = isl_map_intersect_domain(tagged, set);
1234
14
  tagged = isl_map_zip(tagged);
1235
14
  return tagged;
1236
14
}
1237
1238
/* Return a pointer to the node that lives in the domain space of "map",
1239
 * an invalid node if there is no such node, or NULL in case of error.
1240
 */
1241
static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1242
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1243
684
{
1244
684
  struct isl_sched_node *node;
1245
684
  isl_space *space;
1246
684
1247
684
  space = isl_space_domain(isl_map_get_space(map));
1248
684
  node = graph_find_node(ctx, graph, space);
1249
684
  isl_space_free(space);
1250
684
1251
684
  return node;
1252
684
}
1253
1254
/* Return a pointer to the node that lives in the range space of "map",
1255
 * an invalid node if there is no such node, or NULL in case of error.
1256
 */
1257
static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1258
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1259
684
{
1260
684
  struct isl_sched_node *node;
1261
684
  isl_space *space;
1262
684
1263
684
  space = isl_space_range(isl_map_get_space(map));
1264
684
  node = graph_find_node(ctx, graph, space);
1265
684
  isl_space_free(space);
1266
684
1267
684
  return node;
1268
684
}
1269
1270
/* Refrain from adding a new edge based on "map".
1271
 * Instead, just free the map.
1272
 * "tagged" is either a copy of "map" with additional tags or NULL.
1273
 */
1274
static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged)
1275
24
{
1276
24
  isl_map_free(map);
1277
24
  isl_map_free(tagged);
1278
24
1279
24
  return isl_stat_ok;
1280
24
}
1281
1282
/* Add a new edge to the graph based on the given map
1283
 * and add it to data->graph->edge_table[data->type].
1284
 * If a dependence relation of a given type happens to be identical
1285
 * to one of the dependence relations of a type that was added before,
1286
 * then we don't create a new edge, but instead mark the original edge
1287
 * as also representing a dependence of the current type.
1288
 *
1289
 * Edges of type isl_edge_condition or isl_edge_conditional_validity
1290
 * may be specified as "tagged" dependence relations.  That is, "map"
1291
 * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1292
 * the dependence on iterations and a and b are tags.
1293
 * edge->map is set to the relation containing the elements i -> j,
1294
 * while edge->tagged_condition and edge->tagged_validity contain
1295
 * the union of all the "map" relations
1296
 * for which extract_edge is called that result in the same edge->map.
1297
 *
1298
 * If the source or the destination node is compressed, then
1299
 * intersect both "map" and "tagged" with the constraints that
1300
 * were used to construct the compression.
1301
 * This ensures that there are no schedule constraints defined
1302
 * outside of these domains, while the scheduler no longer has
1303
 * any control over those outside parts.
1304
 */
1305
static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1306
684
{
1307
684
  isl_bool empty;
1308
684
  isl_ctx *ctx = isl_map_get_ctx(map);
1309
684
  struct isl_extract_edge_data *data = user;
1310
684
  struct isl_sched_graph *graph = data->graph;
1311
684
  struct isl_sched_node *src, *dst;
1312
684
  struct isl_sched_edge *edge;
1313
684
  isl_map *tagged = NULL;
1314
684
1315
684
  if (data->type == isl_edge_condition ||
1316
684
      
data->type == isl_edge_conditional_validity655
) {
1317
61
    if (isl_map_can_zip(map)) {
1318
49
      tagged = isl_map_copy(map);
1319
49
      map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1320
49
    } else {
1321
12
      tagged = insert_dummy_tags(isl_map_copy(map));
1322
12
    }
1323
61
  }
1324
684
1325
684
  src = find_domain_node(ctx, graph, map);
1326
684
  dst = find_range_node(ctx, graph, map);
1327
684
1328
684
  if (!src || !dst)
1329
0
    goto error;
1330
684
  if (!is_node(graph, src) || !is_node(graph, dst))
1331
24
    return skip_edge(map, tagged);
1332
660
1333
660
  if (src->compressed || 
dst->compressed646
) {
1334
22
    isl_map *hull;
1335
22
    hull = extract_hull(src, dst);
1336
22
    if (tagged)
1337
14
      tagged = map_intersect_domains(tagged, hull);
1338
22
    map = isl_map_intersect(map, hull);
1339
22
  }
1340
660
1341
660
  empty = isl_map_plain_is_empty(map);
1342
660
  if (empty < 0)
1343
0
    goto error;
1344
660
  if (empty)
1345
0
    return skip_edge(map, tagged);
1346
660
1347
660
  graph->edge[graph->n_edge].src = src;
1348
660
  graph->edge[graph->n_edge].dst = dst;
1349
660
  graph->edge[graph->n_edge].map = map;
1350
660
  graph->edge[graph->n_edge].types = 0;
1351
660
  graph->edge[graph->n_edge].tagged_condition = NULL;
1352
660
  graph->edge[graph->n_edge].tagged_validity = NULL;
1353
660
  set_type(&graph->edge[graph->n_edge], data->type);
1354
660
  if (data->type == isl_edge_condition)
1355
29
    graph->edge[graph->n_edge].tagged_condition =
1356
29
          isl_union_map_from_map(tagged);
1357
660
  if (data->type == isl_edge_conditional_validity)
1358
32
    graph->edge[graph->n_edge].tagged_validity =
1359
32
          isl_union_map_from_map(tagged);
1360
660
1361
660
  edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1362
660
  if (!edge) {
1363
0
    graph->n_edge++;
1364
0
    return isl_stat_error;
1365
0
  }
1366
660
  if (edge == &graph->edge[graph->n_edge])
1367
265
    return graph_edge_table_add(ctx, graph, data->type,
1368
265
            &graph->edge[graph->n_edge++]);
1369
395
1370
395
  if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0)
1371
0
    return isl_stat_error;
1372
395
1373
395
  return graph_edge_table_add(ctx, graph, data->type, edge);
1374
0
error:
1375
0
  isl_map_free(map);
1376
0
  isl_map_free(tagged);
1377
0
  return isl_stat_error;
1378
395
}
1379
1380
/* Initialize the schedule graph "graph" from the schedule constraints "sc".
1381
 *
1382
 * The context is included in the domain before the nodes of
1383
 * the graphs are extracted in order to be able to exploit
1384
 * any possible additional equalities.
1385
 * Note that this intersection is only performed locally here.
1386
 */
1387
static isl_stat graph_init(struct isl_sched_graph *graph,
1388
  __isl_keep isl_schedule_constraints *sc)
1389
153
{
1390
153
  isl_ctx *ctx;
1391
153
  isl_union_set *domain;
1392
153
  isl_union_map *c;
1393
153
  struct isl_extract_edge_data data;
1394
153
  enum isl_edge_type i;
1395
153
  isl_stat r;
1396
153
1397
153
  if (!sc)
1398
0
    return isl_stat_error;
1399
153
1400
153
  ctx = isl_schedule_constraints_get_ctx(sc);
1401
153
1402
153
  domain = isl_schedule_constraints_get_domain(sc);
1403
153
  graph->n = isl_union_set_n_set(domain);
1404
153
  isl_union_set_free(domain);
1405
153
1406
153
  if (graph_alloc(ctx, graph, graph->n,
1407
153
      isl_schedule_constraints_n_map(sc)) < 0)
1408
0
    return isl_stat_error;
1409
153
1410
153
  if (compute_max_row(graph, sc) < 0)
1411
0
    return isl_stat_error;
1412
153
  graph->root = graph;
1413
153
  graph->n = 0;
1414
153
  domain = isl_schedule_constraints_get_domain(sc);
1415
153
  domain = isl_union_set_intersect_params(domain,
1416
153
            isl_schedule_constraints_get_context(sc));
1417
153
  r = isl_union_set_foreach_set(domain, &extract_node, graph);
1418
153
  isl_union_set_free(domain);
1419
153
  if (r < 0)
1420
0
    return isl_stat_error;
1421
153
  if (graph_init_table(ctx, graph) < 0)
1422
0
    return isl_stat_error;
1423
918
  
for (i = isl_edge_first; 153
i <= isl_edge_last;
++i765
) {
1424
765
    c = isl_schedule_constraints_get(sc, i);
1425
765
    graph->max_edge[i] = isl_union_map_n_map(c);
1426
765
    isl_union_map_free(c);
1427
765
    if (!c)
1428
0
      return isl_stat_error;
1429
765
  }
1430
153
  if (graph_init_edge_tables(ctx, graph) < 0)
1431
0
    return isl_stat_error;
1432
153
  graph->n_edge = 0;
1433
153
  data.graph = graph;
1434
918
  for (i = isl_edge_first; i <= isl_edge_last; 
++i765
) {
1435
765
    isl_stat r;
1436
765
1437
765
    data.type = i;
1438
765
    c = isl_schedule_constraints_get(sc, i);
1439
765
    r = isl_union_map_foreach_map(c, &extract_edge, &data);
1440
765
    isl_union_map_free(c);
1441
765
    if (r < 0)
1442
0
      return isl_stat_error;
1443
765
  }
1444
153
1445
153
  return isl_stat_ok;
1446
153
}
1447
1448
/* Check whether there is any dependence from node[j] to node[i]
1449
 * or from node[i] to node[j].
1450
 */
1451
static isl_bool node_follows_weak(int i, int j, void *user)
1452
315
{
1453
315
  isl_bool f;
1454
315
  struct isl_sched_graph *graph = user;
1455
315
1456
315
  f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
1457
315
  if (f < 0 || f)
1458
147
    return f;
1459
168
  return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
1460
168
}
1461
1462
/* Check whether there is a (conditional) validity dependence from node[j]
1463
 * to node[i], forcing node[i] to follow node[j].
1464
 */
1465
static isl_bool node_follows_strong(int i, int j, void *user)
1466
311
{
1467
311
  struct isl_sched_graph *graph = user;
1468
311
1469
311
  return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
1470
311
}
1471
1472
/* Use Tarjan's algorithm for computing the strongly connected components
1473
 * in the dependence graph only considering those edges defined by "follows".
1474
 */
1475
static isl_stat detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
1476
  isl_bool (*follows)(int i, int j, void *user))
1477
656
{
1478
656
  int i, n;
1479
656
  struct isl_tarjan_graph *g = NULL;
1480
656
1481
656
  g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
1482
656
  if (!g)
1483
0
    return isl_stat_error;
1484
656
1485
656
  graph->scc = 0;
1486
656
  i = 0;
1487
656
  n = graph->n;
1488
1.49k
  while (n) {
1489
1.84k
    while (g->order[i] != -1) {
1490
1.01k
      graph->node[g->order[i]].scc = graph->scc;
1491
1.01k
      --n;
1492
1.01k
      ++i;
1493
1.01k
    }
1494
835
    ++i;
1495
835
    graph->scc++;
1496
835
  }
1497
656
1498
656
  isl_tarjan_graph_free(g);
1499
656
1500
656
  return isl_stat_ok;
1501
656
}
1502
1503
/* Apply Tarjan's algorithm to detect the strongly connected components
1504
 * in the dependence graph.
1505
 * Only consider the (conditional) validity dependences and clear "weak".
1506
 */
1507
static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1508
432
{
1509
432
  graph->weak = 0;
1510
432
  return detect_ccs(ctx, graph, &node_follows_strong);
1511
432
}
1512
1513
/* Apply Tarjan's algorithm to detect the (weakly) connected components
1514
 * in the dependence graph.
1515
 * Consider all dependences and set "weak".
1516
 */
1517
static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1518
195
{
1519
195
  graph->weak = 1;
1520
195
  return detect_ccs(ctx, graph, &node_follows_weak);
1521
195
}
1522
1523
static int cmp_scc(const void *a, const void *b, void *data)
1524
123
{
1525
123
  struct isl_sched_graph *graph = data;
1526
123
  const int *i1 = a;
1527
123
  const int *i2 = b;
1528
123
1529
123
  return graph->node[*i1].scc - graph->node[*i2].scc;
1530
123
}
1531
1532
/* Sort the elements of graph->sorted according to the corresponding SCCs.
1533
 */
1534
static int sort_sccs(struct isl_sched_graph *graph)
1535
381
{
1536
381
  return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
1537
381
}
1538
1539
/* Return a non-parametric set in the compressed space of "node" that is
1540
 * bounded by the size in each direction
1541
 *
1542
 *  { [x] : -S_i <= x_i <= S_i }
1543
 *
1544
 * If S_i is infinity in direction i, then there are no constraints
1545
 * in that direction.
1546
 *
1547
 * Cache the result in node->bounds.
1548
 */
1549
static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node)
1550
121
{
1551
121
  isl_space *space;
1552
121
  isl_basic_set *bounds;
1553
121
  int i;
1554
121
  unsigned nparam;
1555
121
1556
121
  if (node->bounds)
1557
26
    return isl_basic_set_copy(node->bounds);
1558
95
1559
95
  if (node->compressed)
1560
0
    space = isl_multi_aff_get_domain_space(node->decompress);
1561
95
  else
1562
95
    space = isl_space_copy(node->space);
1563
95
  nparam = isl_space_dim(space, isl_dim_param);
1564
95
  space = isl_space_drop_dims(space, isl_dim_param, 0, nparam);
1565
95
  bounds = isl_basic_set_universe(space);
1566
95
1567
283
  for (i = 0; i < node->nvar; 
++i188
) {
1568
188
    isl_val *size;
1569
188
1570
188
    size = isl_multi_val_get_val(node->sizes, i);
1571
188
    if (!size)
1572
0
      return isl_basic_set_free(bounds);
1573
188
    if (!isl_val_is_int(size)) {
1574
125
      isl_val_free(size);
1575
125
      continue;
1576
125
    }
1577
63
    bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i,
1578
63
              isl_val_copy(size));
1579
63
    bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i,
1580
63
              isl_val_neg(size));
1581
63
  }
1582
95
1583
95
  node->bounds = isl_basic_set_copy(bounds);
1584
95
  return bounds;
1585
95
}
1586
1587
/* Drop some constraints from "delta" that could be exploited
1588
 * to construct loop coalescing schedules.
1589
 * In particular, drop those constraint that bound the difference
1590
 * to the size of the domain.
1591
 * First project out the parameters to improve the effectiveness.
1592
 */
1593
static __isl_give isl_set *drop_coalescing_constraints(
1594
  __isl_take isl_set *delta, struct isl_sched_node *node)
1595
101
{
1596
101
  unsigned nparam;
1597
101
  isl_basic_set *bounds;
1598
101
1599
101
  bounds = get_size_bounds(node);
1600
101
1601
101
  nparam = isl_set_dim(delta, isl_dim_param);
1602
101
  delta = isl_set_project_out(delta, isl_dim_param, 0, nparam);
1603
101
  delta = isl_set_remove_divs(delta);
1604
101
  delta = isl_set_plain_gist_basic_set(delta, bounds);
1605
101
  return delta;
1606
101
}
1607
1608
/* Given a dependence relation R from "node" to itself,
1609
 * construct the set of coefficients of valid constraints for elements
1610
 * in that dependence relation.
1611
 * In particular, the result contains tuples of coefficients
1612
 * c_0, c_n, c_x such that
1613
 *
1614
 *  c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
1615
 *
1616
 * or, equivalently,
1617
 *
1618
 *  c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
1619
 *
1620
 * We choose here to compute the dual of delta R.
1621
 * Alternatively, we could have computed the dual of R, resulting
1622
 * in a set of tuples c_0, c_n, c_x, c_y, and then
1623
 * plugged in (c_0, c_n, c_x, -c_x).
1624
 *
1625
 * If "need_param" is set, then the resulting coefficients effectively
1626
 * include coefficients for the parameters c_n.  Otherwise, they may
1627
 * have been projected out already.
1628
 * Since the constraints may be different for these two cases,
1629
 * they are stored in separate caches.
1630
 * In particular, if no parameter coefficients are required and
1631
 * the schedule_treat_coalescing option is set, then the parameters
1632
 * are projected out and some constraints that could be exploited
1633
 * to construct coalescing schedules are removed before the dual
1634
 * is computed.
1635
 *
1636
 * If "node" has been compressed, then the dependence relation
1637
 * is also compressed before the set of coefficients is computed.
1638
 */
1639
static __isl_give isl_basic_set *intra_coefficients(
1640
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1641
  __isl_take isl_map *map, int need_param)
1642
993
{
1643
993
  isl_ctx *ctx;
1644
993
  isl_set *delta;
1645
993
  isl_map *key;
1646
993
  isl_basic_set *coef;
1647
993
  isl_maybe_isl_basic_set m;
1648
993
  isl_map_to_basic_set **hmap = &graph->intra_hmap;
1649
993
  int treat;
1650
993
1651
993
  if (!map)
1652
0
    return NULL;
1653
993
1654
993
  ctx = isl_map_get_ctx(map);
1655
993
  treat = !need_param && 
isl_options_get_schedule_treat_coalescing(ctx)707
;
1656
993
  if (!treat)
1657
318
    hmap = &graph->intra_hmap_param;
1658
993
  m = isl_map_to_basic_set_try_get(*hmap, map);
1659
993
  if (m.valid < 0 || m.valid) {
1660
799
    isl_map_free(map);
1661
799
    return m.value;
1662
799
  }
1663
194
1664
194
  key = isl_map_copy(map);
1665
194
  if (node->compressed) {
1666
0
    map = isl_map_preimage_domain_multi_aff(map,
1667
0
            isl_multi_aff_copy(node->decompress));
1668
0
    map = isl_map_preimage_range_multi_aff(map,
1669
0
            isl_multi_aff_copy(node->decompress));
1670
0
  }
1671
194
  delta = isl_map_deltas(map);
1672
194
  if (treat)
1673
101
    delta = drop_coalescing_constraints(delta, node);
1674
194
  delta = isl_set_remove_divs(delta);
1675
194
  coef = isl_set_coefficients(delta);
1676
194
  *hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef));
1677
194
1678
194
  return coef;
1679
194
}
1680
1681
/* Given a dependence relation R, construct the set of coefficients
1682
 * of valid constraints for elements in that dependence relation.
1683
 * In particular, the result contains tuples of coefficients
1684
 * c_0, c_n, c_x, c_y such that
1685
 *
1686
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
1687
 *
1688
 * If the source or destination nodes of "edge" have been compressed,
1689
 * then the dependence relation is also compressed before
1690
 * the set of coefficients is computed.
1691
 */
1692
static __isl_give isl_basic_set *inter_coefficients(
1693
  struct isl_sched_graph *graph, struct isl_sched_edge *edge,
1694
  __isl_take isl_map *map)
1695
604
{
1696
604
  isl_set *set;
1697
604
  isl_map *key;
1698
604
  isl_basic_set *coef;
1699
604
  isl_maybe_isl_basic_set m;
1700
604
1701
604
  m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
1702
604
  if (m.valid < 0 || m.valid) {
1703
519
    isl_map_free(map);
1704
519
    return m.value;
1705
519
  }
1706
85
1707
85
  key = isl_map_copy(map);
1708
85
  if (edge->src->compressed)
1709
3
    map = isl_map_preimage_domain_multi_aff(map,
1710
3
            isl_multi_aff_copy(edge->src->decompress));
1711
85
  if (edge->dst->compressed)
1712
3
    map = isl_map_preimage_range_multi_aff(map,
1713
3
            isl_multi_aff_copy(edge->dst->decompress));
1714
85
  set = isl_map_wrap(isl_map_remove_divs(map));
1715
85
  coef = isl_set_coefficients(set);
1716
85
  graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
1717
85
          isl_basic_set_copy(coef));
1718
85
1719
85
  return coef;
1720
85
}
1721
1722
/* Return the position of the coefficients of the variables in
1723
 * the coefficients constraints "coef".
1724
 *
1725
 * The space of "coef" is of the form
1726
 *
1727
 *  { coefficients[[cst, params] -> S] }
1728
 *
1729
 * Return the position of S.
1730
 */
1731
static int coef_var_offset(__isl_keep isl_basic_set *coef)
1732
1.02k
{
1733
1.02k
  int offset;
1734
1.02k
  isl_space *space;
1735
1.02k
1736
1.02k
  space = isl_space_unwrap(isl_basic_set_get_space(coef));
1737
1.02k
  offset = isl_space_dim(space, isl_dim_in);
1738
1.02k
  isl_space_free(space);
1739
1.02k
1740
1.02k
  return offset;
1741
1.02k
}
1742
1743
/* Return the offset of the coefficient of the constant term of "node"
1744
 * within the (I)LP.
1745
 *
1746
 * Within each node, the coefficients have the following order:
1747
 *  - positive and negative parts of c_i_x
1748
 *  - c_i_n (if parametric)
1749
 *  - c_i_0
1750
 */
1751
static int node_cst_coef_offset(struct isl_sched_node *node)
1752
1.43k
{
1753
1.43k
  return node->start + 2 * node->nvar + node->nparam;
1754
1.43k
}
1755
1756
/* Return the offset of the coefficients of the parameters of "node"
1757
 * within the (I)LP.
1758
 *
1759
 * Within each node, the coefficients have the following order:
1760
 *  - positive and negative parts of c_i_x
1761
 *  - c_i_n (if parametric)
1762
 *  - c_i_0
1763
 */
1764
static int node_par_coef_offset(struct isl_sched_node *node)
1765
1.91k
{
1766
1.91k
  return node->start + 2 * node->nvar;
1767
1.91k
}
1768
1769
/* Return the offset of the coefficients of the variables of "node"
1770
 * within the (I)LP.
1771
 *
1772
 * Within each node, the coefficients have the following order:
1773
 *  - positive and negative parts of c_i_x
1774
 *  - c_i_n (if parametric)
1775
 *  - c_i_0
1776
 */
1777
static int node_var_coef_offset(struct isl_sched_node *node)
1778
4.10k
{
1779
4.10k
  return node->start;
1780
4.10k
}
1781
1782
/* Return the position of the pair of variables encoding
1783
 * coefficient "i" of "node".
1784
 *
1785
 * The order of these variable pairs is the opposite of
1786
 * that of the coefficients, with 2 variables per coefficient.
1787
 */
1788
static int node_var_coef_pos(struct isl_sched_node *node, int i)
1789
2.42k
{
1790
2.42k
  return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i);
1791
2.42k
}
1792
1793
/* Construct an isl_dim_map for mapping constraints on coefficients
1794
 * for "node" to the corresponding positions in graph->lp.
1795
 * "offset" is the offset of the coefficients for the variables
1796
 * in the input constraints.
1797
 * "s" is the sign of the mapping.
1798
 *
1799
 * The input constraints are given in terms of the coefficients
1800
 * (c_0, c_x) or (c_0, c_n, c_x).
1801
 * The mapping produced by this function essentially plugs in
1802
 * (0, c_i_x^+ - c_i_x^-) if s = 1 and
1803
 * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
1804
 * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
1805
 * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
1806
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1807
 * Furthermore, the order of these pairs is the opposite of that
1808
 * of the corresponding coefficients.
1809
 *
1810
 * The caller can extend the mapping to also map the other coefficients
1811
 * (and therefore not plug in 0).
1812
 */
1813
static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
1814
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1815
  int offset, int s)
1816
610
{
1817
610
  int pos;
1818
610
  unsigned total;
1819
610
  isl_dim_map *dim_map;
1820
610
1821
610
  if (!node || !graph->lp)
1822
0
    return NULL;
1823
610
1824
610
  total = isl_basic_set_total_dim(graph->lp);
1825
610
  pos = node_var_coef_pos(node, 0);
1826
610
  dim_map = isl_dim_map_alloc(ctx, total);
1827
610
  isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s);
1828
610
  isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s);
1829
610
1830
610
  return dim_map;
1831
610
}
1832
1833
/* Construct an isl_dim_map for mapping constraints on coefficients
1834
 * for "src" (node i) and "dst" (node j) to the corresponding positions
1835
 * in graph->lp.
1836
 * "offset" is the offset of the coefficients for the variables of "src"
1837
 * in the input constraints.
1838
 * "s" is the sign of the mapping.
1839
 *
1840
 * The input constraints are given in terms of the coefficients
1841
 * (c_0, c_n, c_x, c_y).
1842
 * The mapping produced by this function essentially plugs in
1843
 * (c_j_0 - c_i_0, c_j_n - c_i_n,
1844
 *  -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
1845
 * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
1846
 *  c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
1847
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1848
 * Furthermore, the order of these pairs is the opposite of that
1849
 * of the corresponding coefficients.
1850
 *
1851
 * The caller can further extend the mapping.
1852
 */
1853
static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
1854
  struct isl_sched_graph *graph, struct isl_sched_node *src,
1855
  struct isl_sched_node *dst, int offset, int s)
1856
418
{
1857
418
  int pos;
1858
418
  unsigned total;
1859
418
  isl_dim_map *dim_map;
1860
418
1861
418
  if (!src || !dst || !graph->lp)
1862
0
    return NULL;
1863
418
1864
418
  total = isl_basic_set_total_dim(graph->lp);
1865
418
  dim_map = isl_dim_map_alloc(ctx, total);
1866
418
1867
418
  pos = node_cst_coef_offset(dst);
1868
418
  isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s);
1869
418
  pos = node_par_coef_offset(dst);
1870
418
  isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s);
1871
418
  pos = node_var_coef_pos(dst, 0);
1872
418
  isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1,
1873
418
        dst->nvar, -s);
1874
418
  isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1,
1875
418
        dst->nvar, s);
1876
418
1877
418
  pos = node_cst_coef_offset(src);
1878
418
  isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s);
1879
418
  pos = node_par_coef_offset(src);
1880
418
  isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s);
1881
418
  pos = node_var_coef_pos(src, 0);
1882
418
  isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s);
1883
418
  isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s);
1884
418
1885
418
  return dim_map;
1886
418
}
1887
1888
/* Add the constraints from "src" to "dst" using "dim_map",
1889
 * after making sure there is enough room in "dst" for the extra constraints.
1890
 */
1891
static __isl_give isl_basic_set *add_constraints_dim_map(
1892
  __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
1893
  __isl_take isl_dim_map *dim_map)
1894
1.02k
{
1895
1.02k
  int n_eq, n_ineq;
1896
1.02k
1897
1.02k
  n_eq = isl_basic_set_n_equality(src);
1898
1.02k
  n_ineq = isl_basic_set_n_inequality(src);
1899
1.02k
  dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
1900
1.02k
  dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
1901
1.02k
  return dst;
1902
1.02k
}
1903
1904
/* Add constraints to graph->lp that force validity for the given
1905
 * dependence from a node i to itself.
1906
 * That is, add constraints that enforce
1907
 *
1908
 *  (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
1909
 *  = c_i_x (y - x) >= 0
1910
 *
1911
 * for each (x,y) in R.
1912
 * We obtain general constraints on coefficients (c_0, c_x)
1913
 * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
1914
 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
1915
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1916
 * Note that the result of intra_coefficients may also contain
1917
 * parameter coefficients c_n, in which case 0 is plugged in for them as well.
1918
 */
1919
static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
1920
  struct isl_sched_edge *edge)
1921
284
{
1922
284
  int offset;
1923
284
  isl_map *map = isl_map_copy(edge->map);
1924
284
  isl_ctx *ctx = isl_map_get_ctx(map);
1925
284
  isl_dim_map *dim_map;
1926
284
  isl_basic_set *coef;
1927
284
  struct isl_sched_node *node = edge->src;
1928
284
1929
284
  coef = intra_coefficients(graph, node, map, 0);
1930
284
1931
284
  offset = coef_var_offset(coef);
1932
284
1933
284
  if (!coef)
1934
0
    return isl_stat_error;
1935
284
1936
284
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
1937
284
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1938
284
1939
284
  return isl_stat_ok;
1940
284
}
1941
1942
/* Add constraints to graph->lp that force validity for the given
1943
 * dependence from node i to node j.
1944
 * That is, add constraints that enforce
1945
 *
1946
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
1947
 *
1948
 * for each (x,y) in R.
1949
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1950
 * of valid constraints for R and then plug in
1951
 * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
1952
 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
1953
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1954
 */
1955
static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
1956
  struct isl_sched_edge *edge)
1957
195
{
1958
195
  int offset;
1959
195
  isl_map *map;
1960
195
  isl_ctx *ctx;
1961
195
  isl_dim_map *dim_map;
1962
195
  isl_basic_set *coef;
1963
195
  struct isl_sched_node *src = edge->src;
1964
195
  struct isl_sched_node *dst = edge->dst;
1965
195
1966
195
  if (!graph->lp)
1967
0
    return isl_stat_error;
1968
195
1969
195
  map = isl_map_copy(edge->map);
1970
195
  ctx = isl_map_get_ctx(map);
1971
195
  coef = inter_coefficients(graph, edge, map);
1972
195
1973
195
  offset = coef_var_offset(coef);
1974
195
1975
195
  if (!coef)
1976
0
    return isl_stat_error;
1977
195
1978
195
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
1979
195
1980
195
  edge->start = graph->lp->n_ineq;
1981
195
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1982
195
  if (!graph->lp)
1983
0
    return isl_stat_error;
1984
195
  edge->end = graph->lp->n_ineq;
1985
195
1986
195
  return isl_stat_ok;
1987
195
}
1988
1989
/* Add constraints to graph->lp that bound the dependence distance for the given
1990
 * dependence from a node i to itself.
1991
 * If s = 1, we add the constraint
1992
 *
1993
 *  c_i_x (y - x) <= m_0 + m_n n
1994
 *
1995
 * or
1996
 *
1997
 *  -c_i_x (y - x) + m_0 + m_n n >= 0
1998
 *
1999
 * for each (x,y) in R.
2000
 * If s = -1, we add the constraint
2001
 *
2002
 *  -c_i_x (y - x) <= m_0 + m_n n
2003
 *
2004
 * or
2005
 *
2006
 *  c_i_x (y - x) + m_0 + m_n n >= 0
2007
 *
2008
 * for each (x,y) in R.
2009
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
2010
 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
2011
 * with each coefficient (except m_0) represented as a pair of non-negative
2012
 * coefficients.
2013
 *
2014
 *
2015
 * If "local" is set, then we add constraints
2016
 *
2017
 *  c_i_x (y - x) <= 0
2018
 *
2019
 * or
2020
 *
2021
 *  -c_i_x (y - x) <= 0
2022
 *
2023
 * instead, forcing the dependence distance to be (less than or) equal to 0.
2024
 * That is, we plug in (0, 0, -s * c_i_x),
2025
 * intra_coefficients is not required to have c_n in its result when
2026
 * "local" is set.  If they are missing, then (0, -s * c_i_x) is plugged in.
2027
 * Note that dependences marked local are treated as validity constraints
2028
 * by add_all_validity_constraints and therefore also have
2029
 * their distances bounded by 0 from below.
2030
 */
2031
static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
2032
  struct isl_sched_edge *edge, int s, int local)
2033
286
{
2034
286
  int offset;
2035
286
  unsigned nparam;
2036
286
  isl_map *map = isl_map_copy(edge->map);
2037
286
  isl_ctx *ctx = isl_map_get_ctx(map);
2038
286
  isl_dim_map *dim_map;
2039
286
  isl_basic_set *coef;
2040
286
  struct isl_sched_node *node = edge->src;
2041
286
2042
286
  coef = intra_coefficients(graph, node, map, !local);
2043
286
2044
286
  offset = coef_var_offset(coef);
2045
286
2046
286
  if (!coef)
2047
0
    return isl_stat_error;
2048
286
2049
286
  nparam = isl_space_dim(node->space, isl_dim_param);
2050
286
  dim_map = intra_dim_map(ctx, graph, node, offset, -s);
2051
286
2052
286
  if (!local) {
2053
147
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2054
147
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2055
147
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2056
147
  }
2057
286
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2058
286
2059
286
  return isl_stat_ok;
2060
286
}
2061
2062
/* Add constraints to graph->lp that bound the dependence distance for the given
2063
 * dependence from node i to node j.
2064
 * If s = 1, we add the constraint
2065
 *
2066
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
2067
 *    <= m_0 + m_n n
2068
 *
2069
 * or
2070
 *
2071
 *  -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
2072
 *    m_0 + m_n n >= 0
2073
 *
2074
 * for each (x,y) in R.
2075
 * If s = -1, we add the constraint
2076
 *
2077
 *  -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
2078
 *    <= m_0 + m_n n
2079
 *
2080
 * or
2081
 *
2082
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
2083
 *    m_0 + m_n n >= 0
2084
 *
2085
 * for each (x,y) in R.
2086
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2087
 * of valid constraints for R and then plug in
2088
 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
2089
 *  s*c_i_x, -s*c_j_x)
2090
 * with each coefficient (except m_0, c_*_0 and c_*_n)
2091
 * represented as a pair of non-negative coefficients.
2092
 *
2093
 *
2094
 * If "local" is set (and s = 1), then we add constraints
2095
 *
2096
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
2097
 *
2098
 * or
2099
 *
2100
 *  -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
2101
 *
2102
 * instead, forcing the dependence distance to be (less than or) equal to 0.
2103
 * That is, we plug in
2104
 * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
2105
 * Note that dependences marked local are treated as validity constraints
2106
 * by add_all_validity_constraints and therefore also have
2107
 * their distances bounded by 0 from below.
2108
 */
2109
static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
2110
  struct isl_sched_edge *edge, int s, int local)
2111
205
{
2112
205
  int offset;
2113
205
  unsigned nparam;
2114
205
  isl_map *map = isl_map_copy(edge->map);
2115
205
  isl_ctx *ctx = isl_map_get_ctx(map);
2116
205
  isl_dim_map *dim_map;
2117
205
  isl_basic_set *coef;
2118
205
  struct isl_sched_node *src = edge->src;
2119
205
  struct isl_sched_node *dst = edge->dst;
2120
205
2121
205
  coef = inter_coefficients(graph, edge, map);
2122
205
2123
205
  offset = coef_var_offset(coef);
2124
205
2125
205
  if (!coef)
2126
0
    return isl_stat_error;
2127
205
2128
205
  nparam = isl_space_dim(src->space, isl_dim_param);
2129
205
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
2130
205
2131
205
  if (!local) {
2132
121
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2133
121
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2134
121
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2135
121
  }
2136
205
2137
205
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2138
205
2139
205
  return isl_stat_ok;
2140
205
}
2141
2142
/* Should the distance over "edge" be forced to zero?
2143
 * That is, is it marked as a local edge?
2144
 * If "use_coincidence" is set, then coincidence edges are treated
2145
 * as local edges.
2146
 */
2147
static int force_zero(struct isl_sched_edge *edge, int use_coincidence)
2148
2.15k
{
2149
2.15k
  return is_local(edge) || 
(2.07k
use_coincidence2.07k
&&
is_coincidence(edge)727
);
2150
2.15k
}
2151
2152
/* Add all validity constraints to graph->lp.
2153
 *
2154
 * An edge that is forced to be local needs to have its dependence
2155
 * distances equal to zero.  We take care of bounding them by 0 from below
2156
 * here.  add_all_proximity_constraints takes care of bounding them by 0
2157
 * from above.
2158
 *
2159
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2160
 * Otherwise, we ignore them.
2161
 */
2162
static int add_all_validity_constraints(struct isl_sched_graph *graph,
2163
  int use_coincidence)
2164
405
{
2165
405
  int i;
2166
405
2167
992
  for (i = 0; i < graph->n_edge; 
++i587
) {
2168
587
    struct isl_sched_edge *edge = &graph->edge[i];
2169
587
    int zero;
2170
587
2171
587
    zero = force_zero(edge, use_coincidence);
2172
587
    if (!is_validity(edge) && 
!zero112
)
2173
108
      continue;
2174
479
    if (edge->src != edge->dst)
2175
195
      continue;
2176
284
    if (add_intra_validity_constraints(graph, edge) < 0)
2177
0
      return -1;
2178
284
  }
2179
405
2180
992
  
for (i = 0; 405
i < graph->n_edge;
++i587
) {
2181
587
    struct isl_sched_edge *edge = &graph->edge[i];
2182
587
    int zero;
2183
587
2184
587
    zero = force_zero(edge, use_coincidence);
2185
587
    if (!is_validity(edge) && 
!zero112
)
2186
108
      continue;
2187
479
    if (edge->src == edge->dst)
2188
284
      continue;
2189
195
    if (add_inter_validity_constraints(graph, edge) < 0)
2190
0
      return -1;
2191
195
  }
2192
405
2193
405
  return 0;
2194
405
}
2195
2196
/* Add constraints to graph->lp that bound the dependence distance
2197
 * for all dependence relations.
2198
 * If a given proximity dependence is identical to a validity
2199
 * dependence, then the dependence distance is already bounded
2200
 * from below (by zero), so we only need to bound the distance
2201
 * from above.  (This includes the case of "local" dependences
2202
 * which are treated as validity dependence by add_all_validity_constraints.)
2203
 * Otherwise, we need to bound the distance both from above and from below.
2204
 *
2205
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2206
 * Otherwise, we ignore them.
2207
 */
2208
static int add_all_proximity_constraints(struct isl_sched_graph *graph,
2209
  int use_coincidence)
2210
405
{
2211
405
  int i;
2212
405
2213
992
  for (i = 0; i < graph->n_edge; 
++i587
) {
2214
587
    struct isl_sched_edge *edge = &graph->edge[i];
2215
587
    int zero;
2216
587
2217
587
    zero = force_zero(edge, use_coincidence);
2218
587
    if (!is_proximity(edge) && 
!zero117
)
2219
113
      continue;
2220
474
    if (edge->src == edge->dst &&
2221
474
        
add_intra_proximity_constraints(graph, edge, 1, zero) < 0278
)
2222
0
      return -1;
2223
474
    if (edge->src != edge->dst &&
2224
474
        
add_inter_proximity_constraints(graph, edge, 1, zero) < 0196
)
2225
0
      return -1;
2226
474
    if (is_validity(edge) || 
zero21
)
2227
457
      continue;
2228
17
    if (edge->src == edge->dst &&
2229
17
        
add_intra_proximity_constraints(graph, edge, -1, 0) < 08
)
2230
0
      return -1;
2231
17
    if (edge->src != edge->dst &&
2232
17
        
add_inter_proximity_constraints(graph, edge, -1, 0) < 09
)
2233
0
      return -1;
2234
17
  }
2235
405
2236
405
  return 0;
2237
405
}
2238
2239
/* Normalize the rows of "indep" such that all rows are lexicographically
2240
 * positive and such that each row contains as many final zeros as possible,
2241
 * given the choice for the previous rows.
2242
 * Do this by performing elementary row operations.
2243
 */
2244
static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep)
2245
1.27k
{
2246
1.27k
  indep = isl_mat_reverse_gauss(indep);
2247
1.27k
  indep = isl_mat_lexnonneg_rows(indep);
2248
1.27k
  return indep;
2249
1.27k
}
2250
2251
/* Compute a basis for the rows in the linear part of the schedule
2252
 * and extend this basis to a full basis.  The remaining rows
2253
 * can then be used to force linear independence from the rows
2254
 * in the schedule.
2255
 *
2256
 * In particular, given the schedule rows S, we compute
2257
 *
2258
 *  S   = H Q
2259
 *  S U = H
2260
 *
2261
 * with H the Hermite normal form of S.  That is, all but the
2262
 * first rank columns of H are zero and so each row in S is
2263
 * a linear combination of the first rank rows of Q.
2264
 * The matrix Q can be used as a variable transformation
2265
 * that isolates the directions of S in the first rank rows.
2266
 * Transposing S U = H yields
2267
 *
2268
 *  U^T S^T = H^T
2269
 *
2270
 * with all but the first rank rows of H^T zero.
2271
 * The last rows of U^T are therefore linear combinations
2272
 * of schedule coefficients that are all zero on schedule
2273
 * coefficients that are linearly dependent on the rows of S.
2274
 * At least one of these combinations is non-zero on
2275
 * linearly independent schedule coefficients.
2276
 * The rows are normalized to involve as few of the last
2277
 * coefficients as possible and to have a positive initial value.
2278
 */
2279
static int node_update_vmap(struct isl_sched_node *node)
2280
1.27k
{
2281
1.27k
  isl_mat *H, *U, *Q;
2282
1.27k
  int n_row = isl_mat_rows(node->sched);
2283
1.27k
2284
1.27k
  H = isl_mat_sub_alloc(node->sched, 0, n_row,
2285
1.27k
            1 + node->nparam, node->nvar);
2286
1.27k
2287
1.27k
  H = isl_mat_left_hermite(H, 0, &U, &Q);
2288
1.27k
  isl_mat_free(node->indep);
2289
1.27k
  isl_mat_free(node->vmap);
2290
1.27k
  node->vmap = Q;
2291
1.27k
  node->indep = isl_mat_transpose(U);
2292
1.27k
  node->rank = isl_mat_initial_non_zero_cols(H);
2293
1.27k
  node->indep = isl_mat_drop_rows(node->indep, 0, node->rank);
2294
1.27k
  node->indep = normalize_independent(node->indep);
2295
1.27k
  isl_mat_free(H);
2296
1.27k
2297
1.27k
  if (!node->indep || !node->vmap || node->rank < 0)
2298
0
    return -1;
2299
1.27k
  return 0;
2300
1.27k
}
2301
2302
/* Is "edge" marked as a validity or a conditional validity edge?
2303
 */
2304
static int is_any_validity(struct isl_sched_edge *edge)
2305
100
{
2306
100
  return is_validity(edge) || 
is_conditional_validity(edge)2
;
2307
100
}
2308
2309
/* How many times should we count the constraints in "edge"?
2310
 *
2311
 * We count as follows
2312
 * validity   -> 1 (>= 0)
2313
 * validity+proximity -> 2 (>= 0 and upper bound)
2314
 * proximity    -> 2 (lower and upper bound)
2315
 * local(+any)    -> 2 (>= 0 and <= 0)
2316
 *
2317
 * If an edge is only marked conditional_validity then it counts
2318
 * as zero since it is only checked afterwards.
2319
 *
2320
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2321
 * Otherwise, we ignore them.
2322
 */
2323
static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
2324
587
{
2325
587
  if (is_proximity(edge) || 
force_zero(edge, use_coincidence)117
)
2326
474
    return 2;
2327
113
  if (is_validity(edge))
2328
22
    return 1;
2329
91
  return 0;
2330
91
}
2331
2332
/* How many times should the constraints in "edge" be counted
2333
 * as a parametric intra-node constraint?
2334
 *
2335
 * Only proximity edges that are not forced zero need
2336
 * coefficient constraints that include coefficients for parameters.
2337
 * If the edge is also a validity edge, then only
2338
 * an upper bound is introduced.  Otherwise, both lower and upper bounds
2339
 * are introduced.
2340
 */
2341
static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge,
2342
  int use_coincidence)
2343
587
{
2344
587
  if (edge->src != edge->dst)
2345
266
    return 0;
2346
321
  if (!is_proximity(edge))
2347
44
    return 0;
2348
277
  if (force_zero(edge, use_coincidence))
2349
138
    return 0;
2350
139
  if (is_validity(edge))
2351
131
    return 1;
2352
8
  else
2353
8
    return 2;
2354
139
}
2355
2356
/* Add "f" times the number of equality and inequality constraints of "bset"
2357
 * to "n_eq" and "n_ineq" and free "bset".
2358
 */
2359
static isl_stat update_count(__isl_take isl_basic_set *bset,
2360
  int f, int *n_eq, int *n_ineq)
2361
685
{
2362
685
  if (!bset)
2363
0
    return isl_stat_error;
2364
685
2365
685
  *n_eq += isl_basic_set_n_equality(bset);
2366
685
  *n_ineq += isl_basic_set_n_inequality(bset);
2367
685
  isl_basic_set_free(bset);
2368
685
2369
685
  return isl_stat_ok;
2370
685
}
2371
2372
/* Count the number of equality and inequality constraints
2373
 * that will be added for the given map.
2374
 *
2375
 * The edges that require parameter coefficients are counted separately.
2376
 *
2377
 * "use_coincidence" is set if we should take into account coincidence edges.
2378
 */
2379
static isl_stat count_map_constraints(struct isl_sched_graph *graph,
2380
  struct isl_sched_edge *edge, __isl_take isl_map *map,
2381
  int *n_eq, int *n_ineq, int use_coincidence)
2382
587
{
2383
587
  isl_map *copy;
2384
587
  isl_basic_set *coef;
2385
587
  int f = edge_multiplicity(edge, use_coincidence);
2386
587
  int fp = parametric_intra_edge_multiplicity(edge, use_coincidence);
2387
587
2388
587
  if (f == 0) {
2389
91
    isl_map_free(map);
2390
91
    return isl_stat_ok;
2391
91
  }
2392
496
2393
496
  if (edge->src != edge->dst) {
2394
204
    coef = inter_coefficients(graph, edge, map);
2395
204
    return update_count(coef, f, n_eq, n_ineq);
2396
204
  }
2397
292
2398
292
  if (fp > 0) {
2399
139
    copy = isl_map_copy(map);
2400
139
    coef = intra_coefficients(graph, edge->src, copy, 1);
2401
139
    if (update_count(coef, fp, n_eq, n_ineq) < 0)
2402
0
      goto error;
2403
292
  }
2404
292
2405
292
  if (f > fp) {
2406
284
    copy = isl_map_copy(map);
2407
284
    coef = intra_coefficients(graph, edge->src, copy, 0);
2408
284
    if (update_count(coef, f - fp, n_eq, n_ineq) < 0)
2409
0
      goto error;
2410
292
  }
2411
292
2412
292
  isl_map_free(map);
2413
292
  return isl_stat_ok;
2414
0
error:
2415
0
  isl_map_free(map);
2416
0
  return isl_stat_error;
2417
292
}
2418
2419
/* Count the number of equality and inequality constraints
2420
 * that will be added to the main lp problem.
2421
 * We count as follows
2422
 * validity   -> 1 (>= 0)
2423
 * validity+proximity -> 2 (>= 0 and upper bound)
2424
 * proximity    -> 2 (lower and upper bound)
2425
 * local(+any)    -> 2 (>= 0 and <= 0)
2426
 *
2427
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2428
 * Otherwise, we ignore them.
2429
 */
2430
static int count_constraints(struct isl_sched_graph *graph,
2431
  int *n_eq, int *n_ineq, int use_coincidence)
2432
405
{
2433
405
  int i;
2434
405
2435
405
  *n_eq = *n_ineq = 0;
2436
992
  for (i = 0; i < graph->n_edge; 
++i587
) {
2437
587
    struct isl_sched_edge *edge = &graph->edge[i];
2438
587
    isl_map *map = isl_map_copy(edge->map);
2439
587
2440
587
    if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
2441
587
              use_coincidence) < 0)
2442
0
      return -1;
2443
587
  }
2444
405
2445
405
  return 0;
2446
405
}
2447
2448
/* Count the number of constraints that will be added by
2449
 * add_bound_constant_constraints to bound the values of the constant terms
2450
 * and increment *n_eq and *n_ineq accordingly.
2451
 *
2452
 * In practice, add_bound_constant_constraints only adds inequalities.
2453
 */
2454
static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
2455
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2456
405
{
2457
405
  if (isl_options_get_schedule_max_constant_term(ctx) == -1)
2458
253
    return isl_stat_ok;
2459
152
2460
152
  *n_ineq += graph->n;
2461
152
2462
152
  return isl_stat_ok;
2463
152
}
2464
2465
/* Add constraints to bound the values of the constant terms in the schedule,
2466
 * if requested by the user.
2467
 *
2468
 * The maximal value of the constant terms is defined by the option
2469
 * "schedule_max_constant_term".
2470
 */
2471
static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
2472
  struct isl_sched_graph *graph)
2473
405
{
2474
405
  int i, k;
2475
405
  int max;
2476
405
  int total;
2477
405
2478
405
  max = isl_options_get_schedule_max_constant_term(ctx);
2479
405
  if (max == -1)
2480
253
    return isl_stat_ok;
2481
152
2482
152
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2483
152
2484
314
  for (i = 0; i < graph->n; 
++i162
) {
2485
162
    struct isl_sched_node *node = &graph->node[i];
2486
162
    int pos;
2487
162
2488
162
    k = isl_basic_set_alloc_inequality(graph->lp);
2489
162
    if (k < 0)
2490
0
      return isl_stat_error;
2491
162
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2492
162
    pos = node_cst_coef_offset(node);
2493
162
    isl_int_set_si(graph->lp->ineq[k][1 + pos], -1);
2494
162
    isl_int_set_si(graph->lp->ineq[k][0], max);
2495
162
  }
2496
152
2497
152
  return isl_stat_ok;
2498
152
}
2499
2500
/* Count the number of constraints that will be added by
2501
 * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
2502
 * accordingly.
2503
 *
2504
 * In practice, add_bound_coefficient_constraints only adds inequalities.
2505
 */
2506
static int count_bound_coefficient_constraints(isl_ctx *ctx,
2507
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2508
405
{
2509
405
  int i;
2510
405
2511
405
  if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
2512
405
      
!isl_options_get_schedule_treat_coalescing(ctx)255
)
2513
8
    return 0;
2514
397
2515
956
  
for (i = 0; 397
i < graph->n;
++i559
)
2516
559
    *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
2517
397
2518
397
  return 0;
2519
397
}
2520
2521
/* Add constraints to graph->lp that bound the values of
2522
 * the parameter schedule coefficients of "node" to "max" and
2523
 * the variable schedule coefficients to the corresponding entry
2524
 * in node->max.
2525
 * In either case, a negative value means that no bound needs to be imposed.
2526
 *
2527
 * For parameter coefficients, this amounts to adding a constraint
2528
 *
2529
 *  c_n <= max
2530
 *
2531
 * i.e.,
2532
 *
2533
 *  -c_n + max >= 0
2534
 *
2535
 * The variables coefficients are, however, not represented directly.
2536
 * Instead, the variable coefficients c_x are written as differences
2537
 * c_x = c_x^+ - c_x^-.
2538
 * That is,
2539
 *
2540
 *  -max_i <= c_x_i <= max_i
2541
 *
2542
 * is encoded as
2543
 *
2544
 *  -max_i <= c_x_i^+ - c_x_i^- <= max_i
2545
 *
2546
 * or
2547
 *
2548
 *  -(c_x_i^+ - c_x_i^-) + max_i >= 0
2549
 *  c_x_i^+ - c_x_i^- + max_i >= 0
2550
 */
2551
static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
2552
  struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
2553
559
{
2554
559
  int i, j, k;
2555
559
  int total;
2556
559
  isl_vec *ineq;
2557
559
2558
559
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2559
559
2560
887
  for (j = 0; j < node->nparam; 
++j328
) {
2561
328
    int dim;
2562
328
2563
328
    if (max < 0)
2564
245
      continue;
2565
83
2566
83
    k = isl_basic_set_alloc_inequality(graph->lp);
2567
83
    if (k < 0)
2568
0
      return isl_stat_error;
2569
83
    dim = 1 + node_par_coef_offset(node) + j;
2570
83
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2571
83
    isl_int_set_si(graph->lp->ineq[k][dim], -1);
2572
83
    isl_int_set_si(graph->lp->ineq[k][0], max);
2573
83
  }
2574
559
2575
559
  ineq = isl_vec_alloc(ctx, 1 + total);
2576
559
  ineq = isl_vec_clr(ineq);
2577
559
  if (!ineq)
2578
0
    return isl_stat_error;
2579
1.53k
  
for (i = 0; 559
i < node->nvar;
++i974
) {
2580
974
    int pos = 1 + node_var_coef_pos(node, i);
2581
974
2582
974
    if (isl_int_is_neg(node->max->el[i]))
2583
974
      
continue526
;
2584
448
2585
448
    isl_int_set_si(ineq->el[pos], 1);
2586
448
    isl_int_set_si(ineq->el[pos + 1], -1);
2587
448
    isl_int_set(ineq->el[0], node->max->el[i]);
2588
448
2589
448
    k = isl_basic_set_alloc_inequality(graph->lp);
2590
448
    if (k < 0)
2591
0
      goto error;
2592
448
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2593
448
2594
448
    isl_seq_neg(ineq->el + pos, ineq->el + pos, 2);
2595
448
    k = isl_basic_set_alloc_inequality(graph->lp);
2596
448
    if (k < 0)
2597
0
      goto error;
2598
448
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2599
448
2600
448
    isl_seq_clr(ineq->el + pos, 2);
2601
448
  }
2602
559
  isl_vec_free(ineq);
2603
559
2604
559
  return isl_stat_ok;
2605
0
error:
2606
0
  isl_vec_free(ineq);
2607
0
  return isl_stat_error;
2608
559
}
2609
2610
/* Add constraints that bound the values of the variable and parameter
2611
 * coefficients of the schedule.
2612
 *
2613
 * The maximal value of the coefficients is defined by the option
2614
 * 'schedule_max_coefficient' and the entries in node->max.
2615
 * These latter entries are only set if either the schedule_max_coefficient
2616
 * option or the schedule_treat_coalescing option is set.
2617
 */
2618
static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
2619
  struct isl_sched_graph *graph)
2620
405
{
2621
405
  int i;
2622
405
  int max;
2623
405
2624
405
  max = isl_options_get_schedule_max_coefficient(ctx);
2625
405
2626
405
  if (max == -1 && 
!isl_options_get_schedule_treat_coalescing(ctx)255
)
2627
8
    return isl_stat_ok;
2628
397
2629
956
  
for (i = 0; 397
i < graph->n;
++i559
) {
2630
559
    struct isl_sched_node *node = &graph->node[i];
2631
559
2632
559
    if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
2633
0
      return isl_stat_error;
2634
559
  }
2635
397
2636
397
  return isl_stat_ok;
2637
397
}
2638
2639
/* Add a constraint to graph->lp that equates the value at position
2640
 * "sum_pos" to the sum of the "n" values starting at "first".
2641
 */
2642
static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
2643
  int sum_pos, int first, int n)
2644
405
{
2645
405
  int i, k;
2646
405
  int total;
2647
405
2648
405
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2649
405
2650
405
  k = isl_basic_set_alloc_equality(graph->lp);
2651
405
  if (k < 0)
2652
0
    return isl_stat_error;
2653
405
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2654
405
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2655
957
  for (i = 0; i < n; 
++i552
)
2656
552
    isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
2657
405
2658
405
  return isl_stat_ok;
2659
405
}
2660
2661
/* Add a constraint to graph->lp that equates the value at position
2662
 * "sum_pos" to the sum of the parameter coefficients of all nodes.
2663
 */
2664
static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
2665
  int sum_pos)
2666
401
{
2667
401
  int i, j, k;
2668
401
  int total;
2669
401
2670
401
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2671
401
2672
401
  k = isl_basic_set_alloc_equality(graph->lp);
2673
401
  if (k < 0)
2674
0
    return isl_stat_error;
2675
401
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2676
401
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2677
954
  for (i = 0; i < graph->n; 
++i553
) {
2678
553
    int pos = 1 + node_par_coef_offset(&graph->node[i]);
2679
553
2680
896
    for (j = 0; j < graph->node[i].nparam; 
++j343
)
2681
553
      
isl_int_set_si343
(graph->lp->eq[k][pos + j], 1);
2682
553
  }
2683
401
2684
401
  return isl_stat_ok;
2685
401
}
2686
2687
/* Add a constraint to graph->lp that equates the value at position
2688
 * "sum_pos" to the sum of the variable coefficients of all nodes.
2689
 */
2690
static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
2691
  int sum_pos)
2692
433
{
2693
433
  int i, j, k;
2694
433
  int total;
2695
433
2696
433
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2697
433
2698
433
  k = isl_basic_set_alloc_equality(graph->lp);
2699
433
  if (k < 0)
2700
0
    return isl_stat_error;
2701
433
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2702
433
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2703
1.04k
  for (i = 0; i < graph->n; 
++i615
) {
2704
615
    struct isl_sched_node *node = &graph->node[i];
2705
615
    int pos = 1 + node_var_coef_offset(node);
2706
615
2707
2.78k
    for (j = 0; j < 2 * node->nvar; 
++j2.17k
)
2708
2.17k
      isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2709
615
  }
2710
433
2711
433
  return isl_stat_ok;
2712
433
}
2713
2714
/* Construct an ILP problem for finding schedule coefficients
2715
 * that result in non-negative, but small dependence distances
2716
 * over all dependences.
2717
 * In particular, the dependence distances over proximity edges
2718
 * are bounded by m_0 + m_n n and we compute schedule coefficients
2719
 * with small values (preferably zero) of m_n and m_0.
2720
 *
2721
 * All variables of the ILP are non-negative.  The actual coefficients
2722
 * may be negative, so each coefficient is represented as the difference
2723
 * of two non-negative variables.  The negative part always appears
2724
 * immediately before the positive part.
2725
 * Other than that, the variables have the following order
2726
 *
2727
 *  - sum of positive and negative parts of m_n coefficients
2728
 *  - m_0
2729
 *  - sum of all c_n coefficients
2730
 *    (unconstrained when computing non-parametric schedules)
2731
 *  - sum of positive and negative parts of all c_x coefficients
2732
 *  - positive and negative parts of m_n coefficients
2733
 *  - for each node
2734
 *    - positive and negative parts of c_i_x, in opposite order
2735
 *    - c_i_n (if parametric)
2736
 *    - c_i_0
2737
 *
2738
 * The constraints are those from the edges plus two or three equalities
2739
 * to express the sums.
2740
 *
2741
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2742
 * Otherwise, we ignore them.
2743
 */
2744
static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
2745
  int use_coincidence)
2746
405
{
2747
405
  int i;
2748
405
  unsigned nparam;
2749
405
  unsigned total;
2750
405
  isl_space *space;
2751
405
  int parametric;
2752
405
  int param_pos;
2753
405
  int n_eq, n_ineq;
2754
405
2755
405
  parametric = ctx->opt->schedule_parametric;
2756
405
  nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
2757
405
  param_pos = 4;
2758
405
  total = param_pos + 2 * nparam;
2759
980
  for (i = 0; i < graph->n; 
++i575
) {
2760
575
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2761
575
    if (node_update_vmap(node) < 0)
2762
0
      return isl_stat_error;
2763
575
    node->start = total;
2764
575
    total += 1 + node->nparam + 2 * node->nvar;
2765
575
  }
2766
405
2767
405
  if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0)
2768
0
    return isl_stat_error;
2769
405
  if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2770
0
    return isl_stat_error;
2771
405
  if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2772
0
    return isl_stat_error;
2773
405
2774
405
  space = isl_space_set_alloc(ctx, 0, total);
2775
405
  isl_basic_set_free(graph->lp);
2776
405
  n_eq += 2 + parametric;
2777
405
2778
405
  graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
2779
405
2780
405
  if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
2781
0
    return isl_stat_error;
2782
405
  if (parametric && 
add_param_sum_constraint(graph, 2) < 0373
)
2783
0
    return isl_stat_error;
2784
405
  if (add_var_sum_constraint(graph, 3) < 0)
2785
0
    return isl_stat_error;
2786
405
  if (add_bound_constant_constraints(ctx, graph) < 0)
2787
0
    return isl_stat_error;
2788
405
  if (add_bound_coefficient_constraints(ctx, graph) < 0)
2789
0
    return isl_stat_error;
2790
405
  if (add_all_validity_constraints(graph, use_coincidence) < 0)
2791
0
    return isl_stat_error;
2792
405
  if (add_all_proximity_constraints(graph, use_coincidence) < 0)
2793
0
    return isl_stat_error;
2794
405
2795
405
  return isl_stat_ok;
2796
405
}
2797
2798
/* Analyze the conflicting constraint found by
2799
 * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
2800
 * constraint of one of the edges between distinct nodes, living, moreover
2801
 * in distinct SCCs, then record the source and sink SCC as this may
2802
 * be a good place to cut between SCCs.
2803
 */
2804
static int check_conflict(int con, void *user)
2805
1.14k
{
2806
1.14k
  int i;
2807
1.14k
  struct isl_sched_graph *graph = user;
2808
1.14k
2809
1.14k
  if (graph->src_scc >= 0)
2810
35
    return 0;
2811
1.11k
2812
1.11k
  con -= graph->lp->n_eq;
2813
1.11k
2814
1.11k
  if (con >= graph->lp->n_ineq)
2815
519
    return 0;
2816
591
2817
1.96k
  
for (i = 0; 591
i < graph->n_edge;
++i1.37k
) {
2818
1.37k
    if (!is_validity(&graph->edge[i]))
2819
135
      continue;
2820
1.23k
    if (graph->edge[i].src == graph->edge[i].dst)
2821
671
      continue;
2822
568
    if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
2823
497
      continue;
2824
71
    if (graph->edge[i].start > con)
2825
42
      continue;
2826
29
    if (graph->edge[i].end <= con)
2827
21
      continue;
2828
8
    graph->src_scc = graph->edge[i].src->scc;
2829
8
    graph->dst_scc = graph->edge[i].dst->scc;
2830
8
  }
2831
591
2832
591
  return 0;
2833
591
}
2834
2835
/* Check whether the next schedule row of the given node needs to be
2836
 * non-trivial.  Lower-dimensional domains may have some trivial rows,
2837
 * but as soon as the number of remaining required non-trivial rows
2838
 * is as large as the number or remaining rows to be computed,
2839
 * all remaining rows need to be non-trivial.
2840
 */
2841
static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
2842
607
{
2843
607
  return node->nvar - node->rank >= graph->maxvar - graph->n_row;
2844
607
}
2845
2846
/* Construct a non-triviality region with triviality directions
2847
 * corresponding to the rows of "indep".
2848
 * The rows of "indep" are expressed in terms of the schedule coefficients c_i,
2849
 * while the triviality directions are expressed in terms of
2850
 * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
2851
 * before c^+_i.  Furthermore,
2852
 * the pairs of non-negative variables representing the coefficients
2853
 * are stored in the opposite order.
2854
 */
2855
static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep)
2856
471
{
2857
471
  isl_ctx *ctx;
2858
471
  isl_mat *mat;
2859
471
  int i, j, n, n_var;
2860
471
2861
471
  if (!indep)
2862
0
    return NULL;
2863
471
2864
471
  ctx = isl_mat_get_ctx(indep);
2865
471
  n = isl_mat_rows(indep);
2866
471
  n_var = isl_mat_cols(indep);
2867
471
  mat = isl_mat_alloc(ctx, n, 2 * n_var);
2868
471
  if (!mat)
2869
0
    return NULL;
2870
1.15k
  
for (i = 0; 471
i < n;
++i679
) {
2871
2.16k
    for (j = 0; j < n_var; 
++j1.48k
) {
2872
1.48k
      int nj = n_var - 1 - j;
2873
1.48k
      isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]);
2874
1.48k
      isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]);
2875
1.48k
    }
2876
679
  }
2877
471
2878
471
  return mat;
2879
471
}
2880
2881
/* Solve the ILP problem constructed in setup_lp.
2882
 * For each node such that all the remaining rows of its schedule
2883
 * need to be non-trivial, we construct a non-triviality region.
2884
 * This region imposes that the next row is independent of previous rows.
2885
 * In particular, the non-triviality region enforces that at least
2886
 * one of the linear combinations in the rows of node->indep is non-zero.
2887
 */
2888
static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
2889
405
{
2890
405
  int i;
2891
405
  isl_vec *sol;
2892
405
  isl_basic_set *lp;
2893
405
2894
980
  for (i = 0; i < graph->n; 
++i575
) {
2895
575
    struct isl_sched_node *node = &graph->node[i];
2896
575
    isl_mat *trivial;
2897
575
2898
575
    graph->region[i].pos = node_var_coef_offset(node);
2899
575
    if (needs_row(graph, node))
2900
471
      trivial = construct_trivial(node->indep);
2901
104
    else
2902
104
      trivial = isl_mat_zero(ctx, 0, 0);
2903
575
    graph->region[i].trivial = trivial;
2904
575
  }
2905
405
  lp = isl_basic_set_copy(graph->lp);
2906
405
  sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
2907
405
               graph->region, &check_conflict, graph);
2908
980
  for (i = 0; i < graph->n; 
++i575
)
2909
575
    isl_mat_free(graph->region[i].trivial);
2910
405
  return sol;
2911
405
}
2912
2913
/* Extract the coefficients for the variables of "node" from "sol".
2914
 *
2915
 * Each schedule coefficient c_i_x is represented as the difference
2916
 * between two non-negative variables c_i_x^+ - c_i_x^-.
2917
 * The c_i_x^- appear before their c_i_x^+ counterpart.
2918
 * Furthermore, the order of these pairs is the opposite of that
2919
 * of the corresponding coefficients.
2920
 *
2921
 * Return c_i_x = c_i_x^+ - c_i_x^-
2922
 */
2923
static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
2924
  __isl_keep isl_vec *sol)
2925
488
{
2926
488
  int i;
2927
488
  int pos;
2928
488
  isl_vec *csol;
2929
488
2930
488
  if (!sol)
2931
0
    return NULL;
2932
488
  csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
2933
488
  if (!csol)
2934
0
    return NULL;
2935
488
2936
488
  pos = 1 + node_var_coef_offset(node);
2937
1.40k
  for (i = 0; i < node->nvar; 
++i917
)
2938
917
    isl_int_sub(csol->el[node->nvar - 1 - i],
2939
488
          sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
2940
488
2941
488
  return csol;
2942
488
}
2943
2944
/* Update the schedules of all nodes based on the given solution
2945
 * of the LP problem.
2946
 * The new row is added to the current band.
2947
 * All possibly negative coefficients are encoded as a difference
2948
 * of two non-negative variables, so we need to perform the subtraction
2949
 * here.
2950
 *
2951
 * If coincident is set, then the caller guarantees that the new
2952
 * row satisfies the coincidence constraints.
2953
 */
2954
static int update_schedule(struct isl_sched_graph *graph,
2955
  __isl_take isl_vec *sol, int coincident)
2956
324
{
2957
324
  int i, j;
2958
324
  isl_vec *csol = NULL;
2959
324
2960
324
  if (!sol)
2961
0
    goto error;
2962
324
  if (sol->size == 0)
2963
324
    
isl_die0
(sol->ctx, isl_error_internal,
2964
324
      "no solution found", goto error);
2965
324
  if (graph->n_total_row >= graph->max_row)
2966
324
    
isl_die0
(sol->ctx, isl_error_internal,
2967
324
      "too many schedule rows", goto error);
2968
324
2969
763
  
for (i = 0; 324
i < graph->n;
++i439
) {
2970
439
    struct isl_sched_node *node = &graph->node[i];
2971
439
    int pos;
2972
439
    int row = isl_mat_rows(node->sched);
2973
439
2974
439
    isl_vec_free(csol);
2975
439
    csol = extract_var_coef(node, sol);
2976
439
    if (!csol)
2977
0
      goto error;
2978
439
2979
439
    isl_map_free(node->sched_map);
2980
439
    node->sched_map = NULL;
2981
439
    node->sched = isl_mat_add_rows(node->sched, 1);
2982
439
    if (!node->sched)
2983
0
      goto error;
2984
439
    pos = node_cst_coef_offset(node);
2985
439
    node->sched = isl_mat_set_element(node->sched,
2986
439
          row, 0, sol->el[1 + pos]);
2987
439
    pos = node_par_coef_offset(node);
2988
681
    for (j = 0; j < node->nparam; 
++j242
)
2989
242
      node->sched = isl_mat_set_element(node->sched,
2990
242
          row, 1 + j, sol->el[1 + pos + j]);
2991
1.26k
    for (j = 0; j < node->nvar; 
++j830
)
2992
830
      node->sched = isl_mat_set_element(node->sched,
2993
830
          row, 1 + node->nparam + j, csol->el[j]);
2994
439
    node->coincident[graph->n_total_row] = coincident;
2995
439
  }
2996
324
  isl_vec_free(sol);
2997
324
  isl_vec_free(csol);
2998
324
2999
324
  graph->n_row++;
3000
324
  graph->n_total_row++;
3001
324
3002
324
  return 0;
3003
0
error:
3004
0
  isl_vec_free(sol);
3005
0
  isl_vec_free(csol);
3006
0
  return -1;
3007
324
}
3008
3009
/* Convert row "row" of node->sched into an isl_aff living in "ls"
3010
 * and return this isl_aff.
3011
 */
3012
static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
3013
  struct isl_sched_node *node, int row)
3014
1.02k
{
3015
1.02k
  int j;
3016
1.02k
  isl_int v;
3017
1.02k
  isl_aff *aff;
3018
1.02k
3019
1.02k
  isl_int_init(v);
3020
1.02k
3021
1.02k
  aff = isl_aff_zero_on_domain(ls);
3022
1.02k
  if (isl_mat_get_element(node->sched, row, 0, &v) < 0)
3023
0
    goto error;
3024
1.02k
  aff = isl_aff_set_constant(aff, v);
3025
1.59k
  for (j = 0; j < node->nparam; 
++j570
) {
3026
570
    if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0)
3027
0
      goto error;
3028
570
    aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
3029
570
  }
3030
2.87k
  
for (j = 0; 1.02k
j < node->nvar;
++j1.85k
) {
3031
1.85k
    if (isl_mat_get_element(node->sched, row,
3032
1.85k
          1 + node->nparam + j, &v) < 0)
3033
0
      goto error;
3034
1.85k
    aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
3035
1.85k
  }
3036
1.02k
3037
1.02k
  isl_int_clear(v);
3038
1.02k
3039
1.02k
  return aff;
3040
0
error:
3041
0
  isl_int_clear(v);
3042
0
  isl_aff_free(aff);
3043
0
  return NULL;
3044
1.02k
}
3045
3046
/* Convert the "n" rows starting at "first" of node->sched into a multi_aff
3047
 * and return this multi_aff.
3048
 *
3049
 * The result is defined over the uncompressed node domain.
3050
 */
3051
static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
3052
  struct isl_sched_node *node, int first, int n)
3053
748
{
3054
748
  int i;
3055
748
  isl_space *space;
3056
748
  isl_local_space *ls;
3057
748
  isl_aff *aff;
3058
748
  isl_multi_aff *ma;
3059
748
  int nrow;
3060
748
3061
748
  if (!node)
3062
0
    return NULL;
3063
748
  nrow = isl_mat_rows(node->sched);
3064
748
  if (node->compressed)
3065
66
    space = isl_multi_aff_get_domain_space(node->decompress);
3066
682
  else
3067
682
    space = isl_space_copy(node->space);
3068
748
  ls = isl_local_space_from_space(isl_space_copy(space));
3069
748
  space = isl_space_from_domain(space);
3070
748
  space = isl_space_add_dims(space, isl_dim_out, n);
3071
748
  ma = isl_multi_aff_zero(space);
3072
748
3073
1.76k
  for (i = first; i < first + n; 
++i1.02k
) {
3074
1.02k
    aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
3075
1.02k
    ma = isl_multi_aff_set_aff(ma, i - first, aff);
3076
1.02k
  }
3077
748
3078
748
  isl_local_space_free(ls);
3079
748
3080
748
  if (node->compressed)
3081
66
    ma = isl_multi_aff_pullback_multi_aff(ma,
3082
66
          isl_multi_aff_copy(node->compress));
3083
748
3084
748
  return ma;
3085
748
}
3086
3087
/* Convert node->sched into a multi_aff and return this multi_aff.
3088
 *
3089
 * The result is defined over the uncompressed node domain.
3090
 */
3091
static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
3092
  struct isl_sched_node *node)
3093
213
{
3094
213
  int nrow;
3095
213
3096
213
  nrow = isl_mat_rows(node->sched);
3097
213
  return node_extract_partial_schedule_multi_aff(node, 0, nrow);
3098
213
}
3099
3100
/* Convert node->sched into a map and return this map.
3101
 *
3102
 * The result is cached in node->sched_map, which needs to be released
3103
 * whenever node->sched is updated.
3104
 * It is defined over the uncompressed node domain.
3105
 */
3106
static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
3107
624
{
3108
624
  if (!node->sched_map) {
3109
213
    isl_multi_aff *ma;
3110
213
3111
213
    ma = node_extract_schedule_multi_aff(node);
3112
213
    node->sched_map = isl_map_from_multi_aff(ma);
3113
213
  }
3114
624
3115
624
  return isl_map_copy(node->sched_map);
3116
624
}
3117
3118
/* Construct a map that can be used to update a dependence relation
3119
 * based on the current schedule.
3120
 * That is, construct a map expressing that source and sink
3121
 * are executed within the same iteration of the current schedule.
3122
 * This map can then be intersected with the dependence relation.
3123
 * This is not the most efficient way, but this shouldn't be a critical
3124
 * operation.
3125
 */
3126
static __isl_give isl_map *specializer(struct isl_sched_node *src,
3127
  struct isl_sched_node *dst)
3128
278
{
3129
278
  isl_map *src_sched, *dst_sched;
3130
278
3131
278
  src_sched = node_extract_schedule(src);
3132
278
  dst_sched = node_extract_schedule(dst);
3133
278
  return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
3134
278
}
3135
3136
/* Intersect the domains of the nested relations in domain and range
3137
 * of "umap" with "map".
3138
 */
3139
static __isl_give isl_union_map *intersect_domains(
3140
  __isl_take isl_union_map *umap, __isl_keep isl_map *map)
3141
81
{
3142
81
  isl_union_set *uset;
3143
81
3144
81
  umap = isl_union_map_zip(umap);
3145
81
  uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
3146
81
  umap = isl_union_map_intersect_domain(umap, uset);
3147
81
  umap = isl_union_map_zip(umap);
3148
81
  return umap;
3149
81
}
3150
3151
/* Update the dependence relation of the given edge based
3152
 * on the current schedule.
3153
 * If the dependence is carried completely by the current schedule, then
3154
 * it is removed from the edge_tables.  It is kept in the list of edges
3155
 * as otherwise all edge_tables would have to be recomputed.
3156
 *
3157
 * If the edge is of a type that can appear multiple times
3158
 * between the same pair of nodes, then it is added to
3159
 * the edge table (again).  This prevents the situation
3160
 * where none of these edges is referenced from the edge table
3161
 * because the one that was referenced turned out to be empty and
3162
 * was therefore removed from the table.
3163
 */
3164
static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph,
3165
  struct isl_sched_edge *edge)
3166
278
{
3167
278
  int empty;
3168
278
  isl_map *id;
3169
278
3170
278
  id = specializer(edge->src, edge->dst);
3171
278
  edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
3172
278
  if (!edge->map)
3173
0
    goto error;
3174
278
3175
278
  if (edge->tagged_condition) {
3176
38
    edge->tagged_condition =
3177
38
      intersect_domains(edge->tagged_condition, id);
3178
38
    if (!edge->tagged_condition)
3179
0
      goto error;
3180
278
  }
3181
278
  if (edge->tagged_validity) {
3182
43
    edge->tagged_validity =
3183
43
      intersect_domains(edge->tagged_validity, id);
3184
43
    if (!edge->tagged_validity)
3185
0
      goto error;
3186
278
  }
3187
278
3188
278
  empty = isl_map_plain_is_empty(edge->map);
3189
278
  if (empty < 0)
3190
0
    goto error;
3191
278
  if (empty) {
3192
168
    graph_remove_edge(graph, edge);
3193
168
  } else 
if (110
is_multi_edge_type(edge)110
) {
3194
33
    if (graph_edge_tables_add(ctx, graph, edge) < 0)
3195
0
      goto error;
3196
278
  }
3197
278
3198
278
  isl_map_free(id);
3199
278
  return isl_stat_ok;
3200
0
error:
3201
0
  isl_map_free(id);
3202
0
  return isl_stat_error;
3203
278
}
3204
3205
/* Does the domain of "umap" intersect "uset"?
3206
 */
3207
static int domain_intersects(__isl_keep isl_union_map *umap,
3208
  __isl_keep isl_union_set *uset)
3209
39
{
3210
39
  int empty;
3211
39
3212
39
  umap = isl_union_map_copy(umap);
3213
39
  umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
3214
39
  empty = isl_union_map_is_empty(umap);
3215
39
  isl_union_map_free(umap);
3216
39
3217
39
  return empty < 0 ? 
-10
: !empty;
3218
39
}
3219
3220
/* Does the range of "umap" intersect "uset"?
3221
 */
3222
static int range_intersects(__isl_keep isl_union_map *umap,
3223
  __isl_keep isl_union_set *uset)
3224
26
{
3225
26
  int empty;
3226
26
3227
26
  umap = isl_union_map_copy(umap);
3228
26
  umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
3229
26
  empty = isl_union_map_is_empty(umap);
3230
26
  isl_union_map_free(umap);
3231
26
3232
26
  return empty < 0 ? 
-10
: !empty;
3233
26
}
3234
3235
/* Are the condition dependences of "edge" local with respect to
3236
 * the current schedule?
3237
 *
3238
 * That is, are domain and range of the condition dependences mapped
3239
 * to the same point?
3240
 *
3241
 * In other words, is the condition false?
3242
 */
3243
static int is_condition_false(struct isl_sched_edge *edge)
3244
36
{
3245
36
  isl_union_map *umap;
3246
36
  isl_map *map, *sched, *test;
3247
36
  int empty, local;
3248
36
3249
36
  empty = isl_union_map_is_empty(edge->tagged_condition);
3250
36
  if (empty < 0 || empty)
3251
2
    return empty;
3252
34
3253
34
  umap = isl_union_map_copy(edge->tagged_condition);
3254
34
  umap = isl_union_map_zip(umap);
3255
34
  umap = isl_union_set_unwrap(isl_union_map_domain(umap));
3256
34
  map = isl_map_from_union_map(umap);
3257
34
3258
34
  sched = node_extract_schedule(edge->src);
3259
34
  map = isl_map_apply_domain(map, sched);
3260
34
  sched = node_extract_schedule(edge->dst);
3261
34
  map = isl_map_apply_range(map, sched);
3262
34
3263
34
  test = isl_map_identity(isl_map_get_space(map));
3264
34
  local = isl_map_is_subset(map, test);
3265
34
  isl_map_free(map);
3266
34
  isl_map_free(test);
3267
34
3268
34
  return local;
3269
34
}
3270
3271
/* For each conditional validity constraint that is adjacent
3272
 * to a condition with domain in condition_source or range in condition_sink,
3273
 * turn it into an unconditional validity constraint.
3274
 */
3275
static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
3276
  __isl_take isl_union_set *condition_source,
3277
  __isl_take isl_union_set *condition_sink)
3278
10
{
3279
10
  int i;
3280
10
3281
10
  condition_source = isl_union_set_coalesce(condition_source);
3282
10
  condition_sink = isl_union_set_coalesce(condition_sink);
3283
10
3284
51
  for (i = 0; i < graph->n_edge; 
++i41
) {
3285
41
    int adjacent;
3286
41
    isl_union_map *validity;
3287
41
3288
41
    if (!is_conditional_validity(&graph->edge[i]))
3289
18
      continue;
3290
23
    if (is_validity(&graph->edge[i]))
3291
4
      continue;
3292
19
3293
19
    validity = graph->edge[i].tagged_validity;
3294
19
    adjacent = domain_intersects(validity, condition_sink);
3295
19
    if (adjacent >= 0 && !adjacent)
3296
18
      adjacent = range_intersects(validity, condition_source);
3297
19
    if (adjacent < 0)
3298
0
      goto error;
3299
19
    if (!adjacent)
3300
17
      continue;
3301
2
3302
2
    set_validity(&graph->edge[i]);
3303
2
  }
3304
10
3305
10
  isl_union_set_free(condition_source);
3306
10
  isl_union_set_free(condition_sink);
3307
10
  return 0;
3308
0
error:
3309
0
  isl_union_set_free(condition_source);
3310
0
  isl_union_set_free(condition_sink);
3311
0
  return -1;
3312
10
}
3313
3314
/* Update the dependence relations of all edges based on the current schedule
3315
 * and enforce conditional validity constraints that are adjacent
3316
 * to satisfied condition constraints.
3317
 *
3318
 * First check if any of the condition constraints are satisfied
3319
 * (i.e., not local to the outer schedule) and keep track of
3320
 * their domain and range.
3321
 * Then update all dependence relations (which removes the non-local
3322
 * constraints).
3323
 * Finally, if any condition constraints turned out to be satisfied,
3324
 * then turn all adjacent conditional validity constraints into
3325
 * unconditional validity constraints.
3326
 */
3327
static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
3328
186
{
3329
186
  int i;
3330
186
  int any = 0;
3331
186
  isl_union_set *source, *sink;
3332
186
3333
186
  source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3334
186
  sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3335
464
  for (i = 0; i < graph->n_edge; 
++i278
) {
3336
278
    int local;
3337
278
    isl_union_set *uset;
3338
278
    isl_union_map *umap;
3339
278
3340
278
    if (!is_condition(&graph->edge[i]))
3341
240
      continue;
3342
38
    if (is_local(&graph->edge[i]))
3343
17
      continue;
3344
21
    local = is_condition_false(&graph->edge[i]);
3345
21
    if (local < 0)
3346
0
      goto error;
3347
21
    if (local)
3348
10
      continue;
3349
11
3350
11
    any = 1;
3351
11
3352
11
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3353
11
    uset = isl_union_map_domain(umap);
3354
11
    source = isl_union_set_union(source, uset);
3355
11
3356
11
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3357
11
    uset = isl_union_map_range(umap);
3358
11
    sink = isl_union_set_union(sink, uset);
3359
11
  }
3360
186
3361
464
  
for (i = 0; 186
i < graph->n_edge;
++i278
) {
3362
278
    if (update_edge(ctx, graph, &graph->edge[i]) < 0)
3363
0
      goto error;
3364
278
  }
3365
186
3366
186
  if (any)
3367
10
    return unconditionalize_adjacent_validity(graph, source, sink);
3368
176
3369
176
  isl_union_set_free(source);
3370
176
  isl_union_set_free(sink);
3371
176
  return 0;
3372
0
error:
3373
0
  isl_union_set_free(source);
3374
0
  isl_union_set_free(sink);
3375
0
  return -1;
3376
176
}
3377
3378
static void next_band(struct isl_sched_graph *graph)
3379
188
{
3380
188
  graph->band_start = graph->n_total_row;
3381
188
}
3382
3383
/* Return the union of the universe domains of the nodes in "graph"
3384
 * that satisfy "pred".
3385
 */
3386
static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
3387
  struct isl_sched_graph *graph,
3388
  int (*pred)(struct isl_sched_node *node, int data), int data)
3389
152
{
3390
152
  int i;
3391
152
  isl_set *set;
3392
152
  isl_union_set *dom;
3393
152
3394
255
  for (i = 0; i < graph->n; 
++i103
)
3395
255
    if (pred(&graph->node[i], data))
3396
152
      break;
3397
152
3398
152
  if (i >= graph->n)
3399
152
    
isl_die0
(ctx, isl_error_internal,
3400
152
      "empty component", return NULL);
3401
152
3402
152
  set = isl_set_universe(isl_space_copy(graph->node[i].space));
3403
152
  dom = isl_union_set_from_set(set);
3404
152
3405
278
  for (i = i + 1; i < graph->n; 
++i126
) {
3406
126
    if (!pred(&graph->node[i], data))
3407
113
      continue;
3408
13
    set = isl_set_universe(isl_space_copy(graph->node[i].space));
3409
13
    dom = isl_union_set_union(dom, isl_union_set_from_set(set));
3410
13
  }
3411
152
3412
152
  return dom;
3413
152
}
3414
3415
/* Return a list of unions of universe domains, where each element
3416
 * in the list corresponds to an SCC (or WCC) indexed by node->scc.
3417
 */
3418
static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
3419
  struct isl_sched_graph *graph)
3420
68
{
3421
68
  int i;
3422
68
  isl_union_set_list *filters;
3423
68
3424
68
  filters = isl_union_set_list_alloc(ctx, graph->scc);
3425
216
  for (i = 0; i < graph->scc; 
++i148
) {
3426
148
    isl_union_set *dom;
3427
148
3428
148
    dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
3429
148
    filters = isl_union_set_list_add(filters, dom);
3430
148
  }
3431
68
3432
68
  return filters;
3433
68
}
3434
3435
/* Return a list of two unions of universe domains, one for the SCCs up
3436
 * to and including graph->src_scc and another for the other SCCs.
3437
 */
3438
static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
3439
  struct isl_sched_graph *graph)
3440
2
{
3441
2
  isl_union_set *dom;
3442
2
  isl_union_set_list *filters;
3443
2
3444
2
  filters = isl_union_set_list_alloc(ctx, 2);
3445
2
  dom = isl_sched_graph_domain(ctx, graph,
3446
2
          &node_scc_at_most, graph->src_scc);
3447
2
  filters = isl_union_set_list_add(filters, dom);
3448
2
  dom = isl_sched_graph_domain(ctx, graph,
3449
2
          &node_scc_at_least, graph->src_scc + 1);
3450
2
  filters = isl_union_set_list_add(filters, dom);
3451
2
3452
2
  return filters;
3453
2
}
3454
3455
/* Copy nodes that satisfy node_pred from the src dependence graph
3456
 * to the dst dependence graph.
3457
 */
3458
static isl_stat copy_nodes(struct isl_sched_graph *dst,
3459
  struct isl_sched_graph *src,
3460
  int (*node_pred)(struct isl_sched_node *node, int data), int data)
3461
196
{
3462
196
  int i;
3463
196
3464
196
  dst->n = 0;
3465
700
  for (i = 0; i < src->n; 
++i504
) {
3466
504
    int j;
3467
504
3468
504
    if (!node_pred(&src->node[i], data))
3469
278
      continue;
3470
226
3471
226
    j = dst->n;
3472
226
    dst->node[j].space = isl_space_copy(src->node[i].space);
3473
226
    dst->node[j].compressed = src->node[i].compressed;
3474
226
    dst->node[j].hull = isl_set_copy(src->node[i].hull);
3475
226
    dst->node[j].compress =
3476
226
      isl_multi_aff_copy(src->node[i].compress);
3477
226
    dst->node[j].decompress =
3478
226
      isl_multi_aff_copy(src->node[i].decompress);
3479
226
    dst->node[j].nvar = src->node[i].nvar;
3480
226
    dst->node[j].nparam = src->node[i].nparam;
3481
226
    dst->node[j].sched = isl_mat_copy(src->node[i].sched);
3482
226
    dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
3483
226
    dst->node[j].coincident = src->node[i].coincident;
3484
226
    dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
3485
226
    dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds);
3486
226
    dst->node[j].max = isl_vec_copy(src->node[i].max);
3487
226
    dst->n++;
3488
226
3489
226
    if (!dst->node[j].space || !dst->node[j].sched)
3490
0
      return isl_stat_error;
3491
226
    if (dst->node[j].compressed &&
3492
226
        
(14
!dst->node[j].hull14
||
!dst->node[j].compress14
||
3493
14
         !dst->node[j].decompress))
3494
0
      return isl_stat_error;
3495
226
  }
3496
196
3497
196
  return isl_stat_ok;
3498
196
}
3499
3500
/* Copy non-empty edges that satisfy edge_pred from the src dependence graph
3501
 * to the dst dependence graph.
3502
 * If the source or destination node of the edge is not in the destination
3503
 * graph, then it must be a backward proximity edge and it should simply
3504
 * be ignored.
3505
 */
3506
static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
3507
  struct isl_sched_graph *src,
3508
  int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
3509
196
{
3510
196
  int i;
3511
196
3512
196
  dst->n_edge = 0;
3513
728
  for (i = 0; i < src->n_edge; 
++i532
) {
3514
532
    struct isl_sched_edge *edge = &src->edge[i];
3515
532
    isl_map *map;
3516
532
    isl_union_map *tagged_condition;
3517
532
    isl_union_map *tagged_validity;
3518
532
    struct isl_sched_node *dst_src, *dst_dst;
3519
532
3520
532
    if (!edge_pred(edge, data))
3521
419
      continue;
3522
113
3523
113
    if (isl_map_plain_is_empty(edge->map))
3524
31
      continue;
3525
82
3526
82
    dst_src = graph_find_node(ctx, dst, edge->src->space);
3527
82
    dst_dst = graph_find_node(ctx, dst, edge->dst->space);
3528
82
    if (!dst_src || !dst_dst)
3529
0
      return isl_stat_error;
3530
82
    if (!is_node(dst, dst_src) || !is_node(dst, dst_dst)) {
3531
0
      if (is_validity(edge) || is_conditional_validity(edge))
3532
0
        isl_die(ctx, isl_error_internal,
3533
0
          "backward (conditional) validity edge",
3534
0
          return isl_stat_error);
3535
0
      continue;
3536
82
    }
3537
82
3538
82
    map = isl_map_copy(edge->map);
3539
82
    tagged_condition = isl_union_map_copy(edge->tagged_condition);
3540
82
    tagged_validity = isl_union_map_copy(edge->tagged_validity);
3541
82
3542
82
    dst->edge[dst->n_edge].src = dst_src;
3543
82
    dst->edge[dst->n_edge].dst = dst_dst;
3544
82
    dst->edge[dst->n_edge].map = map;
3545
82
    dst->edge[dst->n_edge].tagged_condition = tagged_condition;
3546
82
    dst->edge[dst->n_edge].tagged_validity = tagged_validity;
3547
82
    dst->edge[dst->n_edge].types = edge->types;
3548
82
    dst->n_edge++;
3549
82
3550
82
    if (edge->tagged_condition && 
!tagged_condition11
)
3551
0
      return isl_stat_error;
3552
82
    if (edge->tagged_validity && 
!tagged_validity6
)
3553
0
      return isl_stat_error;
3554
82
3555
82
    if (graph_edge_tables_add(ctx, dst,
3556
82
              &dst->edge[dst->n_edge - 1]) < 0)
3557
0
      return isl_stat_error;
3558
82
  }
3559
196
3560
196
  return isl_stat_ok;
3561
196
}
3562
3563
/* Compute the maximal number of variables over all nodes.
3564
 * This is the maximal number of linearly independent schedule
3565
 * rows that we need to compute.
3566
 * Just in case we end up in a part of the dependence graph
3567
 * with only lower-dimensional domains, we make sure we will
3568
 * compute the required amount of extra linearly independent rows.
3569
 */
3570
static int compute_maxvar(struct isl_sched_graph *graph)
3571
441
{
3572
441
  int i;
3573
441
3574
441
  graph->maxvar = 0;
3575
1.06k
  for (i = 0; i < graph->n; 
++i619
) {
3576
619
    struct isl_sched_node *node = &graph->node[i];
3577
619
    int nvar;
3578
619
3579
619
    if (node_update_vmap(node) < 0)
3580
0
      return -1;
3581
619
    nvar = node->nvar + graph->n_row - node->rank;
3582
619
    if (nvar > graph->maxvar)
3583
432
      graph->maxvar = nvar;
3584
619
  }
3585
441
3586
441
  return 0;
3587
441
}
3588
3589
/* Extract the subgraph of "graph" that consists of the nodes satisfying
3590
 * "node_pred" and the edges satisfying "edge_pred" and store
3591
 * the result in "sub".
3592
 */
3593
static isl_stat extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
3594
  int (*node_pred)(struct isl_sched_node *node, int data),
3595
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3596
  int data, struct isl_sched_graph *sub)
3597
196
{
3598
196
  int i, n = 0, n_edge = 0;
3599
196
  int t;
3600
196
3601
700
  for (i = 0; i < graph->n; 
++i504
)
3602
504
    if (node_pred(&graph->node[i], data))
3603
226
      ++n;
3604
728
  for (i = 0; i < graph->n_edge; 
++i532
)
3605
532
    if (edge_pred(&graph->edge[i], data))
3606
113
      ++n_edge;
3607
196
  if (graph_alloc(ctx, sub, n, n_edge) < 0)
3608
0
    return isl_stat_error;
3609
196
  sub->root = graph->root;
3610
196
  if (copy_nodes(sub, graph, node_pred, data) < 0)
3611
0
    return isl_stat_error;
3612
196
  if (graph_init_table(ctx, sub) < 0)
3613
0
    return isl_stat_error;
3614
1.17k
  
for (t = 0; 196
t <= isl_edge_last;
++t980
)
3615
980
    sub->max_edge[t] = graph->max_edge[t];
3616
196
  if (graph_init_edge_tables(ctx, sub) < 0)
3617
0
    return isl_stat_error;
3618
196
  if (copy_edges(ctx, sub, graph, edge_pred, data) < 0)
3619
0
    return isl_stat_error;
3620
196
  sub->n_row = graph->n_row;
3621
196
  sub->max_row = graph->max_row;
3622
196
  sub->n_total_row = graph->n_total_row;
3623
196
  sub->band_start = graph->band_start;
3624
196
3625
196
  return isl_stat_ok;
3626
196
}
3627
3628
static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
3629
  struct isl_sched_graph *graph);
3630
static __isl_give isl_schedule_node *compute_schedule_wcc(
3631
  isl_schedule_node *node, struct isl_sched_graph *graph);
3632
3633
/* Compute a schedule for a subgraph of "graph".  In particular, for
3634
 * the graph composed of nodes that satisfy node_pred and edges that
3635
 * that satisfy edge_pred.
3636
 * If the subgraph is known to consist of a single component, then wcc should
3637
 * be set and then we call compute_schedule_wcc on the constructed subgraph.
3638
 * Otherwise, we call compute_schedule, which will check whether the subgraph
3639
 * is connected.
3640
 *
3641
 * The schedule is inserted at "node" and the updated schedule node
3642
 * is returned.
3643
 */
3644
static __isl_give isl_schedule_node *compute_sub_schedule(
3645
  __isl_take isl_schedule_node *node, isl_ctx *ctx,
3646
  struct isl_sched_graph *graph,
3647
  int (*node_pred)(struct isl_sched_node *node, int data),
3648
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3649
  int data, int wcc)
3650
78
{
3651
78
  struct isl_sched_graph split = { 0 };
3652
78
3653
78
  if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
3654
78
        &split) < 0)
3655
0
    goto error;
3656
78
3657
78
  if (wcc)
3658
74
    node = compute_schedule_wcc(node, &split);
3659
4
  else
3660
4
    node = compute_schedule(node, &split);
3661
78
3662
78
  graph_free(ctx, &split);
3663
78
  return node;
3664
0
error:
3665
0
  graph_free(ctx, &split);
3666
0
  return isl_schedule_node_free(node);
3667
78
}
3668
3669
static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
3670
770
{
3671
770
  return edge->src->scc == scc && 
edge->dst->scc == scc308
;
3672
770
}
3673
3674
static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
3675
14
{
3676
14
  return edge->dst->scc <= scc;
3677
14
}
3678
3679
static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
3680
14
{
3681
14
  return edge->src->scc >= scc;
3682
14
}
3683
3684
/* Reset the current band by dropping all its schedule rows.
3685
 */
3686
static isl_stat reset_band(struct isl_sched_graph *graph)
3687
8
{
3688
8
  int i;
3689
8
  int drop;
3690
8
3691
8
  drop = graph->n_total_row - graph->band_start;
3692
8
  graph->n_total_row -= drop;
3693
8
  graph->n_row -= drop;
3694
8
3695
28
  for (i = 0; i < graph->n; 
++i20
) {
3696
20
    struct isl_sched_node *node = &graph->node[i];
3697
20
3698
20
    isl_map_free(node->sched_map);
3699
20
    node->sched_map = NULL;
3700
20
3701
20
    node->sched = isl_mat_drop_rows(node->sched,
3702
20
            graph->band_start, drop);
3703
20
3704
20
    if (!node->sched)
3705
0
      return isl_stat_error;
3706
20
  }
3707
8
3708
8
  return isl_stat_ok;
3709
8
}
3710
3711
/* Split the current graph into two parts and compute a schedule for each
3712
 * part individually.  In particular, one part consists of all SCCs up
3713
 * to and including graph->src_scc, while the other part contains the other
3714
 * SCCs.  The split is enforced by a sequence node inserted at position "node"
3715
 * in the schedule tree.  Return the updated schedule node.
3716
 * If either of these two parts consists of a sequence, then it is spliced
3717
 * into the sequence containing the two parts.
3718
 *
3719
 * The current band is reset. It would be possible to reuse
3720
 * the previously computed rows as the first rows in the next
3721
 * band, but recomputing them may result in better rows as we are looking
3722
 * at a smaller part of the dependence graph.
3723
 */
3724
static __isl_give isl_schedule_node *compute_split_schedule(
3725
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3726
2
{
3727
2
  int is_seq;
3728
2
  isl_ctx *ctx;
3729
2
  isl_union_set_list *filters;
3730
2
3731
2
  if (!node)
3732
0
    return NULL;
3733
2
3734
2
  if (reset_band(graph) < 0)
3735
0
    return isl_schedule_node_free(node);
3736
2
3737
2
  next_band(graph);
3738
2
3739
2
  ctx = isl_schedule_node_get_ctx(node);
3740
2
  filters = extract_split(ctx, graph);
3741
2
  node = isl_schedule_node_insert_sequence(node, filters);
3742
2
  node = isl_schedule_node_child(node, 1);
3743
2
  node = isl_schedule_node_child(node, 0);
3744
2
3745
2
  node = compute_sub_schedule(node, ctx, graph,
3746
2
        &node_scc_at_least, &edge_src_scc_at_least,
3747
2
        graph->src_scc + 1, 0);
3748
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3749
2
  node = isl_schedule_node_parent(node);
3750
2
  node = isl_schedule_node_parent(node);
3751
2
  if (is_seq)
3752
0
    node = isl_schedule_node_sequence_splice_child(node, 1);
3753
2
  node = isl_schedule_node_child(node, 0);
3754
2
  node = isl_schedule_node_child(node, 0);
3755
2
  node = compute_sub_schedule(node, ctx, graph,
3756
2
        &node_scc_at_most, &edge_dst_scc_at_most,
3757
2
        graph->src_scc, 0);
3758
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3759
2
  node = isl_schedule_node_parent(node);
3760
2
  node = isl_schedule_node_parent(node);
3761
2
  if (is_seq)
3762
0
    node = isl_schedule_node_sequence_splice_child(node, 0);
3763
2
3764
2
  return node;
3765
2
}
3766
3767
/* Insert a band node at position "node" in the schedule tree corresponding
3768
 * to the current band in "graph".  Mark the band node permutable
3769
 * if "permutable" is set.
3770
 * The partial schedules and the coincidence property are extracted
3771
 * from the graph nodes.
3772
 * Return the updated schedule node.
3773
 */
3774
static __isl_give isl_schedule_node *insert_current_band(
3775
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3776
  int permutable)
3777
168
{
3778
168
  int i;
3779
168
  int start, end, n;
3780
168
  isl_multi_aff *ma;
3781
168
  isl_multi_pw_aff *mpa;
3782
168
  isl_multi_union_pw_aff *mupa;
3783
168
3784
168
  if (!node)
3785
0
    return NULL;
3786
168
3787
168
  if (graph->n < 1)
3788
168
    
isl_die0
(isl_schedule_node_get_ctx(node), isl_error_internal,
3789
168
      "graph should have at least one node",
3790
168
      return isl_schedule_node_free(node));
3791
168
3792
168
  start = graph->band_start;
3793
168
  end = graph->n_total_row;
3794
168
  n = end - start;
3795
168
3796
168
  ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
3797
168
  mpa = isl_multi_pw_aff_from_multi_aff(ma);
3798
168
  mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3799
168
3800
244
  for (i = 1; i < graph->n; 
++i76
) {
3801
76
    isl_multi_union_pw_aff *mupa_i;
3802
76
3803
76
    ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
3804
76
                start, n);
3805
76
    mpa = isl_multi_pw_aff_from_multi_aff(ma);
3806
76
    mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3807
76
    mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
3808
76
  }
3809
168
  node = isl_schedule_node_insert_partial_schedule(node, mupa);
3810
168
3811
443
  for (i = 0; i < n; 
++i275
)
3812
275
    node = isl_schedule_node_band_member_set_coincident(node, i,
3813
275
          graph->node[0].coincident[start + i]);
3814
168
  node = isl_schedule_node_band_set_permutable(node, permutable);
3815
168
3816
168
  return node;
3817
168
}
3818
3819
/* Update the dependence relations based on the current schedule,
3820
 * add the current band to "node" and then continue with the computation
3821
 * of the next band.
3822
 * Return the updated schedule node.
3823
 */
3824
static __isl_give isl_schedule_node *compute_next_band(
3825
  __isl_take isl_schedule_node *node,
3826
  struct isl_sched_graph *graph, int permutable)
3827
168
{
3828
168
  isl_ctx *ctx;
3829
168
3830
168
  if (!node)
3831
0
    return NULL;
3832
168
3833
168
  ctx = isl_schedule_node_get_ctx(node);
3834
168
  if (update_edges(ctx, graph) < 0)
3835
0
    return isl_schedule_node_free(node);
3836
168
  node = insert_current_band(node, graph, permutable);
3837
168
  next_band(graph);
3838
168
3839
168
  node = isl_schedule_node_child(node, 0);
3840
168
  node = compute_schedule(node, graph);
3841
168
  node = isl_schedule_node_parent(node);
3842
168
3843
168
  return node;
3844
168
}
3845
3846
/* Add the constraints "coef" derived from an edge from "node" to itself
3847
 * to graph->lp in order to respect the dependences and to try and carry them.
3848
 * "pos" is the sequence number of the edge that needs to be carried.
3849
 * "coef" represents general constraints on coefficients (c_0, c_x)
3850
 * of valid constraints for (y - x) with x and y instances of the node.
3851
 *
3852
 * The constraints added to graph->lp need to enforce
3853
 *
3854
 *  (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
3855
 *  = c_j_x (y - x) >= e_i
3856
 *
3857
 * for each (x,y) in the dependence relation of the edge.
3858
 * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
3859
 * taking into account that each coefficient in c_j_x is represented
3860
 * as a pair of non-negative coefficients.
3861
 */
3862
static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
3863
  struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
3864
40
{
3865
40
  int offset;
3866
40
  isl_ctx *ctx;
3867
40
  isl_dim_map *dim_map;
3868
40
3869
40
  if (!coef)
3870
0
    return isl_stat_error;
3871
40
3872
40
  ctx = isl_basic_set_get_ctx(coef);
3873
40
  offset = coef_var_offset(coef);
3874
40
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
3875
40
  isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3876
40
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3877
40
3878
40
  return isl_stat_ok;
3879
40
}
3880
3881
/* Add the constraints "coef" derived from an edge from "src" to "dst"
3882
 * to graph->lp in order to respect the dependences and to try and carry them.
3883
 * "pos" is the sequence number of the edge that needs to be carried or
3884
 * -1 if no attempt should be made to carry the dependences.
3885
 * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
3886
 * of valid constraints for (x, y) with x and y instances of "src" and "dst".
3887
 *
3888
 * The constraints added to graph->lp need to enforce
3889
 *
3890
 *  (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
3891
 *
3892
 * for each (x,y) in the dependence relation of the edge or
3893
 *
3894
 *  (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
3895
 *
3896
 * if pos is -1.
3897
 * That is,
3898
 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3899
 * or
3900
 * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3901
 * needs to be plugged in for (c_0, c_n, c_x, c_y),
3902
 * taking into account that each coefficient in c_j_x and c_k_x is represented
3903
 * as a pair of non-negative coefficients.
3904
 */
3905
static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
3906
  struct isl_sched_node *src, struct isl_sched_node *dst,
3907
  __isl_take isl_basic_set *coef, int pos)
3908
18
{
3909
18
  int offset;
3910
18
  isl_ctx *ctx;
3911
18
  isl_dim_map *dim_map;
3912
18
3913
18
  if (!coef)
3914
0
    return isl_stat_error;
3915
18
3916
18
  ctx = isl_basic_set_get_ctx(coef);
3917
18
  offset = coef_var_offset(coef);
3918
18
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
3919
18
  if (pos >= 0)
3920
18
    isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3921
18
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3922
18
3923
18
  return isl_stat_ok;
3924
18
}
3925
3926
/* Data structure for keeping track of the data needed
3927
 * to exploit non-trivial lineality spaces.
3928
 *
3929
 * "any_non_trivial" is true if there are any non-trivial lineality spaces.
3930
 * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
3931
 * "equivalent" connects instances to other instances on the same line(s).
3932
 * "mask" contains the domain spaces of "equivalent".
3933
 * Any instance set not in "mask" does not have a non-trivial lineality space.
3934
 */
3935
struct isl_exploit_lineality_data {
3936
  isl_bool any_non_trivial;
3937
  isl_union_map *equivalent;
3938
  isl_union_set *mask;
3939
};
3940
3941
/* Data structure collecting information used during the construction
3942
 * of an LP for carrying dependences.
3943
 *
3944
 * "intra" is a sequence of coefficient constraints for intra-node edges.
3945
 * "inter" is a sequence of coefficient constraints for inter-node edges.
3946
 * "lineality" contains data used to exploit non-trivial lineality spaces.
3947
 */
3948
struct isl_carry {
3949
  isl_basic_set_list *intra;
3950
  isl_basic_set_list *inter;
3951
  struct isl_exploit_lineality_data lineality;
3952
};
3953
3954
/* Free all the data stored in "carry".
3955
 */
3956
static void isl_carry_clear(struct isl_carry *carry)
3957
28
{
3958
28
  isl_basic_set_list_free(carry->intra);
3959
28
  isl_basic_set_list_free(carry->inter);
3960
28
  isl_union_map_free(carry->lineality.equivalent);
3961
28
  isl_union_set_free(carry->lineality.mask);
3962
28
}
3963
3964
/* Return a pointer to the node in "graph" that lives in "space".
3965
 * If the requested node has been compressed, then "space"
3966
 * corresponds to the compressed space.
3967
 * The graph is assumed to have such a node.
3968
 * Return NULL in case of error.
3969
 *
3970
 * First try and see if "space" is the space of an uncompressed node.
3971
 * If so, return that node.
3972
 * Otherwise, "space" was constructed by construct_compressed_id and
3973
 * contains a user pointer pointing to the node in the tuple id.
3974
 * However, this node belongs to the original dependence graph.
3975
 * If "graph" is a subgraph of this original dependence graph,
3976
 * then the node with the same space still needs to be looked up
3977
 * in the current graph.
3978
 */
3979
static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
3980
  struct isl_sched_graph *graph, __isl_keep isl_space *space)
3981
96
{
3982
96
  isl_id *id;
3983
96
  struct isl_sched_node *node;
3984
96
3985
96
  if (!space)
3986
0
    return NULL;
3987
96
3988
96
  node = graph_find_node(ctx, graph, space);
3989
96
  if (!node)
3990
0
    return NULL;
3991
96
  if (is_node(graph, node))
3992
96
    return node;
3993
0
3994
0
  id = isl_space_get_tuple_id(space, isl_dim_set);
3995
0
  node = isl_id_get_user(id);
3996
0
  isl_id_free(id);
3997
0
3998
0
  if (!node)
3999
0
    return NULL;
4000
0
4001
0
  if (!is_node(graph->root, node))
4002
0
    isl_die(ctx, isl_error_internal,
4003
0
      "space points to invalid node", return NULL);
4004
0
  if (graph != graph->root)
4005
0
    node = graph_find_node(ctx, graph, node->space);
4006
0
  if (!is_node(graph, node))
4007
0
    isl_die(ctx, isl_error_internal,
4008
0
      "unable to find node", return NULL);
4009
0
4010
0
  return node;
4011
0
}
4012
4013
/* Internal data structure for add_all_constraints.
4014
 *
4015
 * "graph" is the schedule constraint graph for which an LP problem
4016
 * is being constructed.
4017
 * "carry_inter" indicates whether inter-node edges should be carried.
4018
 * "pos" is the position of the next edge that needs to be carried.
4019
 */
4020
struct isl_add_all_constraints_data {
4021
  isl_ctx *ctx;
4022
  struct isl_sched_graph *graph;
4023
  int carry_inter;
4024
  int pos;
4025
};
4026
4027
/* Add the constraints "coef" derived from an edge from a node to itself
4028
 * to data->graph->lp in order to respect the dependences and
4029
 * to try and carry them.
4030
 *
4031
 * The space of "coef" is of the form
4032
 *
4033
 *  coefficients[[c_cst] -> S[c_x]]
4034
 *
4035
 * with S[c_x] the (compressed) space of the node.
4036
 * Extract the node from the space and call add_intra_constraints.
4037
 */
4038
static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
4039
40
{
4040
40
  struct isl_add_all_constraints_data *data = user;
4041
40
  isl_space *space;
4042
40
  struct isl_sched_node *node;
4043
40
4044
40
  space = isl_basic_set_get_space(coef);
4045
40
  space = isl_space_range(isl_space_unwrap(space));
4046
40
  node = graph_find_compressed_node(data->ctx, data->graph, space);
4047
40
  isl_space_free(space);
4048
40
  return add_intra_constraints(data->graph, node, coef, data->pos++);
4049
40
}
4050
4051
/* Add the constraints "coef" derived from an edge from a node j
4052
 * to a node k to data->graph->lp in order to respect the dependences and
4053
 * to try and carry them (provided data->carry_inter is set).
4054
 *
4055
 * The space of "coef" is of the form
4056
 *
4057
 *  coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
4058
 *
4059
 * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
4060
 * Extract the nodes from the space and call add_inter_constraints.
4061
 */
4062
static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
4063
18
{
4064
18
  struct isl_add_all_constraints_data *data = user;
4065
18
  isl_space *space, *dom;
4066
18
  struct isl_sched_node *src, *dst;
4067
18
  int pos;
4068
18
4069
18
  space = isl_basic_set_get_space(coef);
4070
18
  space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
4071
18
  dom = isl_space_domain(isl_space_copy(space));
4072
18
  src = graph_find_compressed_node(data->ctx, data->graph, dom);
4073
18
  isl_space_free(dom);
4074
18
  space = isl_space_range(space);
4075
18
  dst = graph_find_compressed_node(data->ctx, data->graph, space);
4076
18
  isl_space_free(space);
4077
18
4078
18
  pos = data->carry_inter ? data->pos++ : 
-10
;
4079
18
  return add_inter_constraints(data->graph, src, dst, coef, pos);
4080
18
}
4081
4082
/* Add constraints to graph->lp that force all (conditional) validity
4083
 * dependences to be respected and attempt to carry them.
4084
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4085
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4086
 * "carry_inter" indicates whether inter-node edges should be carried or
4087
 * only respected.
4088
 */
4089
static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
4090
  __isl_keep isl_basic_set_list *intra,
4091
  __isl_keep isl_basic_set_list *inter, int carry_inter)
4092
28
{
4093
28
  struct isl_add_all_constraints_data data = { ctx, graph, carry_inter };
4094
28
4095
28
  data.pos = 0;
4096
28
  if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
4097
0
    return isl_stat_error;
4098
28
  if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0)
4099
0
    return isl_stat_error;
4100
28
  return isl_stat_ok;
4101
28
}
4102
4103
/* Internal data structure for count_all_constraints
4104
 * for keeping track of the number of equality and inequality constraints.
4105
 */
4106
struct isl_sched_count {
4107
  int n_eq;
4108
  int n_ineq;
4109
};
4110
4111
/* Add the number of equality and inequality constraints of "bset"
4112
 * to data->n_eq and data->n_ineq.
4113
 */
4114
static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
4115
58
{
4116
58
  struct isl_sched_count *data = user;
4117
58
4118
58
  return update_count(bset, 1, &data->n_eq, &data->n_ineq);
4119
58
}
4120
4121
/* Count the number of equality and inequality constraints
4122
 * that will be added to the carry_lp problem.
4123
 * We count each edge exactly once.
4124
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4125
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4126
 */
4127
static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
4128
  __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
4129
28
{
4130
28
  struct isl_sched_count data;
4131
28
4132
28
  data.n_eq = data.n_ineq = 0;
4133
28
  if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
4134
0
    return isl_stat_error;
4135
28
  if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0)
4136
0
    return isl_stat_error;
4137
28
4138
28
  *n_eq = data.n_eq;
4139
28
  *n_ineq = data.n_ineq;
4140
28
4141
28
  return isl_stat_ok;
4142
28
}
4143
4144
/* Construct an LP problem for finding schedule coefficients
4145
 * such that the schedule carries as many validity dependences as possible.
4146
 * In particular, for each dependence i, we bound the dependence distance
4147
 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
4148
 * of all e_i's.  Dependences with e_i = 0 in the solution are simply
4149
 * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
4150
 * "intra" is the sequence of coefficient constraints for intra-node edges.
4151
 * "inter" is the sequence of coefficient constraints for inter-node edges.
4152
 * "n_edge" is the total number of edges.
4153
 * "carry_inter" indicates whether inter-node edges should be carried or
4154
 * only respected.  That is, if "carry_inter" is not set, then
4155
 * no e_i variables are introduced for the inter-node edges.
4156
 *
4157
 * All variables of the LP are non-negative.  The actual coefficients
4158
 * may be negative, so each coefficient is represented as the difference
4159
 * of two non-negative variables.  The negative part always appears
4160
 * immediately before the positive part.
4161
 * Other than that, the variables have the following order
4162
 *
4163
 *  - sum of (1 - e_i) over all edges
4164
 *  - sum of all c_n coefficients
4165
 *    (unconstrained when computing non-parametric schedules)
4166
 *  - sum of positive and negative parts of all c_x coefficients
4167
 *  - for each edge
4168
 *    - e_i
4169
 *  - for each node
4170
 *    - positive and negative parts of c_i_x, in opposite order
4171
 *    - c_i_n (if parametric)
4172
 *    - c_i_0
4173
 *
4174
 * The constraints are those from the (validity) edges plus three equalities
4175
 * to express the sums and n_edge inequalities to express e_i <= 1.
4176
 */
4177
static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
4178
  int n_edge, __isl_keep isl_basic_set_list *intra,
4179
  __isl_keep isl_basic_set_list *inter, int carry_inter)
4180
28
{
4181
28
  int i;
4182
28
  int k;
4183
28
  isl_space *dim;
4184
28
  unsigned total;
4185
28
  int n_eq, n_ineq;
4186
28
4187
28
  total = 3 + n_edge;
4188
68
  for (i = 0; i < graph->n; 
++i40
) {
4189
40
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
4190
40
    node->start = total;
4191
40
    total += 1 + node->nparam + 2 * node->nvar;
4192
40
  }
4193
28
4194
28
  if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
4195
0
    return isl_stat_error;
4196
28
4197
28
  dim = isl_space_set_alloc(ctx, 0, total);
4198
28
  isl_basic_set_free(graph->lp);
4199
28
  n_eq += 3;
4200
28
  n_ineq += n_edge;
4201
28
  graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
4202
28
  graph->lp = isl_basic_set_set_rational(graph->lp);
4203
28
4204
28
  k = isl_basic_set_alloc_equality(graph->lp);
4205
28
  if (k < 0)
4206
0
    return isl_stat_error;
4207
28
  isl_seq_clr(graph->lp->eq[k], 1 + total);
4208
28
  isl_int_set_si(graph->lp->eq[k][0], -n_edge);
4209
28
  isl_int_set_si(graph->lp->eq[k][1], 1);
4210
86
  for (i = 0; i < n_edge; 
++i58
)
4211
58
    isl_int_set_si(graph->lp->eq[k][4 + i], 1);
4212
28
4213
28
  if (add_param_sum_constraint(graph, 1) < 0)
4214
0
    return isl_stat_error;
4215
28
  if (add_var_sum_constraint(graph, 2) < 0)
4216
0
    return isl_stat_error;
4217
28
4218
86
  
for (i = 0; 28
i < n_edge;
++i58
) {
4219
58
    k = isl_basic_set_alloc_inequality(graph->lp);
4220
58
    if (k < 0)
4221
0
      return isl_stat_error;
4222
58
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
4223
58
    isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
4224
58
    isl_int_set_si(graph->lp->ineq[k][0], 1);
4225
58
  }
4226
28
4227
28
  if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0)
4228
0
    return isl_stat_error;
4229
28
4230
28
  return isl_stat_ok;
4231
28
}
4232
4233
static __isl_give isl_schedule_node *compute_component_schedule(
4234
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4235
  int wcc);
4236
4237
/* If the schedule_split_scaled option is set and if the linear
4238
 * parts of the scheduling rows for all nodes in the graphs have
4239
 * a non-trivial common divisor, then remove this
4240
 * common divisor from the linear part.
4241
 * Otherwise, insert a band node directly and continue with
4242
 * the construction of the schedule.
4243
 *
4244
 * If a non-trivial common divisor is found, then
4245
 * the linear part is reduced and the remainder is ignored.
4246
 * The pieces of the graph that are assigned different remainders
4247
 * form (groups of) strongly connected components within
4248
 * the scaled down band.  If needed, they can therefore
4249
 * be ordered along this remainder in a sequence node.
4250
 * However, this ordering is not enforced here in order to allow
4251
 * the scheduler to combine some of the strongly connected components.
4252
 */
4253
static __isl_give isl_schedule_node *split_scaled(
4254
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4255
26
{
4256
26
  int i;
4257
26
  int row;
4258
26
  isl_ctx *ctx;
4259
26
  isl_int gcd, gcd_i;
4260
26
4261
26
  if (!node)
4262
0
    return NULL;
4263
26
4264
26
  ctx = isl_schedule_node_get_ctx(node);
4265
26
  if (!ctx->opt->schedule_split_scaled)
4266
0
    return compute_next_band(node, graph, 0);
4267
26
  if (graph->n <= 1)
4268
20
    return compute_next_band(node, graph, 0);
4269
6
4270
6
  isl_int_init(gcd);
4271
6
  isl_int_init(gcd_i);
4272
6
4273
6
  isl_int_set_si(gcd, 0);
4274
6
4275
6
  row = isl_mat_rows(graph->node[0].sched) - 1;
4276
6
4277
20
  for (i = 0; i < graph->n; 
++i14
) {
4278
14
    struct isl_sched_node *node = &graph->node[i];
4279
14
    int cols = isl_mat_cols(node->sched);
4280
14
4281
14
    isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
4282
14
    isl_int_gcd(gcd, gcd, gcd_i);
4283
14
  }
4284
6
4285
6
  isl_int_clear(gcd_i);
4286
6
4287
6
  if (isl_int_cmp_si(gcd, 1) <= 0) {
4288
2
    isl_int_clear(gcd);
4289
2
    return compute_next_band(node, graph, 0);
4290
2
  }
4291
4
4292
14
  
for (i = 0; 4
i < graph->n;
++i10
) {
4293
10
    struct isl_sched_node *node = &graph->node[i];
4294
10
4295
10
    isl_int_fdiv_q(node->sched->row[row][0],
4296
10
             node->sched->row[row][0], gcd);
4297
10
    isl_int_mul(node->sched->row[row][0],
4298
10
          node->sched->row[row][0], gcd);
4299
10
    node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
4300
10
    if (!node->sched)
4301
0
      goto error;
4302
10
  }
4303
4
4304
4
  isl_int_clear(gcd);
4305
4
4306
4
  return compute_next_band(node, graph, 0);
4307
0
error:
4308
0
  isl_int_clear(gcd);
4309
0
  return isl_schedule_node_free(node);
4310
4
}
4311
4312
/* Is the schedule row "sol" trivial on node "node"?
4313
 * That is, is the solution zero on the dimensions linearly independent of
4314
 * the previously found solutions?
4315
 * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
4316
 *
4317
 * Each coefficient is represented as the difference between
4318
 * two non-negative values in "sol".
4319
 * We construct the schedule row s and check if it is linearly
4320
 * independent of previously computed schedule rows
4321
 * by computing T s, with T the linear combinations that are zero
4322
 * on linearly dependent schedule rows.
4323
 * If the result consists of all zeros, then the solution is trivial.
4324
 */
4325
static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
4326
32
{
4327
32
  int trivial;
4328
32
  isl_vec *node_sol;
4329
32
4330
32
  if (!sol)
4331
0
    return -1;
4332
32
  if (node->nvar == node->rank)
4333
4
    return 0;
4334
28
4335
28
  node_sol = extract_var_coef(node, sol);
4336
28
  node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol);
4337
28
  if (!node_sol)
4338
0
    return -1;
4339
28
4340
28
  trivial = isl_seq_first_non_zero(node_sol->el,
4341
28
          node->nvar - node->rank) == -1;
4342
28
4343
28
  isl_vec_free(node_sol);
4344
28
4345
28
  return trivial;
4346
28
}
4347
4348
/* Is the schedule row "sol" trivial on any node where it should
4349
 * not be trivial?
4350
 * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
4351
 */
4352
static int is_any_trivial(struct isl_sched_graph *graph,
4353
  __isl_keep isl_vec *sol)
4354
28
{
4355
28
  int i;
4356
28
4357
56
  for (i = 0; i < graph->n; 
++i28
) {
4358
32
    struct isl_sched_node *node = &graph->node[i];
4359
32
    int trivial;
4360
32
4361
32
    if (!needs_row(graph, node))
4362
0
      continue;
4363
32
    trivial = is_trivial(node, sol);
4364
32
    if (trivial < 0 || trivial)
4365
4
      return trivial;
4366
32
  }
4367
28
4368
28
  
return 024
;
4369
28
}
4370
4371
/* Does the schedule represented by "sol" perform loop coalescing on "node"?
4372
 * If so, return the position of the coalesced dimension.
4373
 * Otherwise, return node->nvar or -1 on error.
4374
 *
4375
 * In particular, look for pairs of coefficients c_i and c_j such that
4376
 * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
4377
 * If any such pair is found, then return i.
4378
 * If size_i is infinity, then no check on c_i needs to be performed.
4379
 */
4380
static int find_node_coalescing(struct isl_sched_node *node,
4381
  __isl_keep isl_vec *sol)
4382
44
{
4383
44
  int i, j;
4384
44
  isl_int max;
4385
44
  isl_vec *csol;
4386
44
4387
44
  if (node->nvar <= 1)
4388
23
    return node->nvar;
4389
21
4390
21
  csol = extract_var_coef(node, sol);
4391
21
  if (!csol)
4392
0
    return -1;
4393
21
  isl_int_init(max);
4394
61
  for (i = 0; i < node->nvar; 
++i40
) {
4395
42
    isl_val *v;
4396
42
4397
42
    if (isl_int_is_zero(csol->el[i]))
4398
42
      
continue16
;
4399
26
    v = isl_multi_val_get_val(node->sizes, i);
4400
26
    if (!v)
4401
0
      goto error;
4402
26
    if (!isl_val_is_int(v)) {
4403
14
      isl_val_free(v);
4404
14
      continue;
4405
14
    }
4406
12
    v = isl_val_div_ui(v, 2);
4407
12
    v = isl_val_ceil(v);
4408
12
    if (!v)
4409
0
      goto error;
4410
12
    isl_int_mul(max, v->n, csol->el[i]);
4411
12
    isl_val_free(v);
4412
12
4413
32
    for (j = 0; j < node->nvar; 
++j20
) {
4414
22
      if (j == i)
4415
10
        continue;
4416
12
      if (isl_int_abs_gt(csol->el[j], max))
4417
12
        
break2
;
4418
12
    }
4419
12
    if (j < node->nvar)
4420
2
      break;
4421
12
  }
4422
21
4423
21
  isl_int_clear(max);
4424
21
  isl_vec_free(csol);
4425
21
  return i;
4426
0
error:
4427
0
  isl_int_clear(max);
4428
0
  isl_vec_free(csol);
4429
0
  return -1;
4430
21
}
4431
4432
/* Force the schedule coefficient at position "pos" of "node" to be zero
4433
 * in "tl".
4434
 * The coefficient is encoded as the difference between two non-negative
4435
 * variables.  Force these two variables to have the same value.
4436
 */
4437
static __isl_give isl_tab_lexmin *zero_out_node_coef(
4438
  __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
4439
2
{
4440
2
  int dim;
4441
2
  isl_ctx *ctx;
4442
2
  isl_vec *eq;
4443
2
4444
2
  ctx = isl_space_get_ctx(node->space);
4445
2
  dim = isl_tab_lexmin_dim(tl);
4446
2
  if (dim < 0)
4447
0
    return isl_tab_lexmin_free(tl);
4448
2
  eq = isl_vec_alloc(ctx, 1 + dim);
4449
2
  eq = isl_vec_clr(eq);
4450
2
  if (!eq)
4451
0
    return isl_tab_lexmin_free(tl);
4452
2
4453
2
  pos = 1 + node_var_coef_pos(node, pos);
4454
2
  isl_int_set_si(eq->el[pos], 1);
4455
2
  isl_int_set_si(eq->el[pos + 1], -1);
4456
2
  tl = isl_tab_lexmin_add_eq(tl, eq->el);
4457
2
  isl_vec_free(eq);
4458
2
4459
2
  return tl;
4460
2
}
4461
4462
/* Return the lexicographically smallest rational point in the basic set
4463
 * from which "tl" was constructed, double checking that this input set
4464
 * was not empty.
4465
 */
4466
static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
4467
32
{
4468
32
  isl_vec *sol;
4469
32
4470
32
  sol = isl_tab_lexmin_get_solution(tl);
4471
32
  if (!sol)
4472
0
    return NULL;
4473
32
  if (sol->size == 0)
4474
32
    
isl_die0
(isl_vec_get_ctx(sol), isl_error_internal,
4475
32
      "error in schedule construction",
4476
32
      return isl_vec_free(sol));
4477
32
  return sol;
4478
32
}
4479
4480
/* Does the solution "sol" of the LP problem constructed by setup_carry_lp
4481
 * carry any of the "n_edge" groups of dependences?
4482
 * The value in the first position is the sum of (1 - e_i) over all "n_edge"
4483
 * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
4484
 * by the edge are carried by the solution.
4485
 * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
4486
 * one of those is carried.
4487
 *
4488
 * Note that despite the fact that the problem is solved using a rational
4489
 * solver, the solution is guaranteed to be integral.
4490
 * Specifically, the dependence distance lower bounds e_i (and therefore
4491
 * also their sum) are integers.  See Lemma 5 of [1].
4492
 *
4493
 * Any potential denominator of the sum is cleared by this function.
4494
 * The denominator is not relevant for any of the other elements
4495
 * in the solution.
4496
 *
4497
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4498
 *     Problem, Part II: Multi-Dimensional Time.
4499
 *     In Intl. Journal of Parallel Programming, 1992.
4500
 */
4501
static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
4502
32
{
4503
32
  isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
4504
32
  isl_int_set_si(sol->el[0], 1);
4505
32
  return isl_int_cmp_si(sol->el[1], n_edge) < 0;
4506
32
}
4507
4508
/* Return the lexicographically smallest rational point in "lp",
4509
 * assuming that all variables are non-negative and performing some
4510
 * additional sanity checks.
4511
 * If "want_integral" is set, then compute the lexicographically smallest
4512
 * integer point instead.
4513
 * In particular, "lp" should not be empty by construction.
4514
 * Double check that this is the case.
4515
 * If dependences are not carried for any of the "n_edge" edges,
4516
 * then return an empty vector.
4517
 *
4518
 * If the schedule_treat_coalescing option is set and
4519
 * if the computed schedule performs loop coalescing on a given node,
4520
 * i.e., if it is of the form
4521
 *
4522
 *  c_i i + c_j j + ...
4523
 *
4524
 * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
4525
 * to cut out this solution.  Repeat this process until no more loop
4526
 * coalescing occurs or until no more dependences can be carried.
4527
 * In the latter case, revert to the previously computed solution.
4528
 *
4529
 * If the caller requests an integral solution and if coalescing should
4530
 * be treated, then perform the coalescing treatment first as
4531
 * an integral solution computed before coalescing treatment
4532
 * would carry the same number of edges and would therefore probably
4533
 * also be coalescing.
4534
 *
4535
 * To allow the coalescing treatment to be performed first,
4536
 * the initial solution is allowed to be rational and it is only
4537
 * cut out (if needed) in the next iteration, if no coalescing measures
4538
 * were taken.
4539
 */
4540
static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
4541
  __isl_take isl_basic_set *lp, int n_edge, int want_integral)
4542
28
{
4543
28
  int i, pos, cut;
4544
28
  isl_ctx *ctx;
4545
28
  isl_tab_lexmin *tl;
4546
28
  isl_vec *sol = NULL, *prev;
4547
28
  int treat_coalescing;
4548
28
  int try_again;
4549
28
4550
28
  if (!lp)
4551
0
    return NULL;
4552
28
  ctx = isl_basic_set_get_ctx(lp);
4553
28
  treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
4554
28
  tl = isl_tab_lexmin_from_basic_set(lp);
4555
28
4556
28
  cut = 0;
4557
32
  do {
4558
32
    int integral;
4559
32
4560
32
    try_again = 0;
4561
32
    if (cut)
4562
2
      tl = isl_tab_lexmin_cut_to_integer(tl);
4563
32
    prev = sol;
4564
32
    sol = non_empty_solution(tl);
4565
32
    if (!sol)
4566
0
      goto error;
4567
32
4568
32
    integral = isl_int_is_one(sol->el[0]);
4569
32
    if (!carries_dependences(sol, n_edge)) {
4570
2
      if (!prev)
4571
0
        prev = isl_vec_alloc(ctx, 0);
4572
2
      isl_vec_free(sol);
4573
2
      sol = prev;
4574
2
      break;
4575
2
    }
4576
30
    prev = isl_vec_free(prev);
4577
30
    cut = want_integral && 
!integral16
;
4578
30
    if (cut)
4579
2
      try_again = 1;
4580
30
    if (!treat_coalescing)
4581
0
      continue;
4582
72
    
for (i = 0; 30
i < graph->n;
++i42
) {
4583
44
      struct isl_sched_node *node = &graph->node[i];
4584
44
4585
44
      pos = find_node_coalescing(node, sol);
4586
44
      if (pos < 0)
4587
0
        goto error;
4588
44
      if (pos < node->nvar)
4589
2
        break;
4590
44
    }
4591
30
    if (i < graph->n) {
4592
2
      try_again = 1;
4593
2
      tl = zero_out_node_coef(tl, &graph->node[i], pos);
4594
2
      cut = 0;
4595
2
    }
4596
30
  } while (try_again);
4597
28
4598
28
  isl_tab_lexmin_free(tl);
4599
28
4600
28
  return sol;
4601
0
error:
4602
0
  isl_tab_lexmin_free(tl);
4603
0
  isl_vec_free(prev);
4604
0
  isl_vec_free(sol);
4605
0
  return NULL;
4606
28
}
4607
4608
/* If "edge" is an edge from a node to itself, then add the corresponding
4609
 * dependence relation to "umap".
4610
 * If "node" has been compressed, then the dependence relation
4611
 * is also compressed first.
4612
 */
4613
static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
4614
  struct isl_sched_edge *edge)
4615
42
{
4616
42
  isl_map *map;
4617
42
  struct isl_sched_node *node = edge->src;
4618
42
4619
42
  if (edge->src != edge->dst)
4620
20
    return umap;
4621
22
4622
22
  map = isl_map_copy(edge->map);
4623
22
  if (node->compressed) {
4624
0
    map = isl_map_preimage_domain_multi_aff(map,
4625
0
            isl_multi_aff_copy(node->decompress));
4626
0
    map = isl_map_preimage_range_multi_aff(map,
4627
0
            isl_multi_aff_copy(node->decompress));
4628
0
  }
4629
22
  umap = isl_union_map_add_map(umap, map);
4630
22
  return umap;
4631
22
}
4632
4633
/* If "edge" is an edge from a node to another node, then add the corresponding
4634
 * dependence relation to "umap".
4635
 * If the source or destination nodes of "edge" have been compressed,
4636
 * then the dependence relation is also compressed first.
4637
 */
4638
static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
4639
  struct isl_sched_edge *edge)
4640
42
{
4641
42
  isl_map *map;
4642
42
4643
42
  if (edge->src == edge->dst)
4644
22
    return umap;
4645
20
4646
20
  map = isl_map_copy(edge->map);
4647
20
  if (edge->src->compressed)
4648
0
    map = isl_map_preimage_domain_multi_aff(map,
4649
0
            isl_multi_aff_copy(edge->src->decompress));
4650
20
  if (edge->dst->compressed)
4651
0
    map = isl_map_preimage_range_multi_aff(map,
4652
0
            isl_multi_aff_copy(edge->dst->decompress));
4653
20
  umap = isl_union_map_add_map(umap, map);
4654
20
  return umap;
4655
20
}
4656
4657
/* Internal data structure used by union_drop_coalescing_constraints
4658
 * to collect bounds on all relevant statements.
4659
 *
4660
 * "graph" is the schedule constraint graph for which an LP problem
4661
 * is being constructed.
4662
 * "bounds" collects the bounds.
4663
 */
4664
struct isl_collect_bounds_data {
4665
  isl_ctx *ctx;
4666
  struct isl_sched_graph *graph;
4667
  isl_union_set *bounds;
4668
};
4669
4670
/* Add the size bounds for the node with instance deltas in "set"
4671
 * to data->bounds.
4672
 */
4673
static isl_stat collect_bounds(__isl_take isl_set *set, void *user)
4674
20
{
4675
20
  struct isl_collect_bounds_data *data = user;
4676
20
  struct isl_sched_node *node;
4677
20
  isl_space *space;
4678
20
  isl_set *bounds;
4679
20
4680
20
  space = isl_set_get_space(set);
4681
20
  isl_set_free(set);
4682
20
4683
20
  node = graph_find_compressed_node(data->ctx, data->graph, space);
4684
20
  isl_space_free(space);
4685
20
4686
20
  bounds = isl_set_from_basic_set(get_size_bounds(node));
4687
20
  data->bounds = isl_union_set_add_set(data->bounds, bounds);
4688
20
4689
20
  return isl_stat_ok;
4690
20
}
4691
4692
/* Drop some constraints from "delta" that could be exploited
4693
 * to construct loop coalescing schedules.
4694
 * In particular, drop those constraint that bound the difference
4695
 * to the size of the domain.
4696
 * Do this for each set/node in "delta" separately.
4697
 * The parameters are assumed to have been projected out by the caller.
4698
 */
4699
static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx,
4700
  struct isl_sched_graph *graph, __isl_take isl_union_set *delta)
4701
28
{
4702
28
  struct isl_collect_bounds_data data = { ctx, graph };
4703
28
4704
28
  data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4705
28
  if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0)
4706
0
    data.bounds = isl_union_set_free(data.bounds);
4707
28
  delta = isl_union_set_plain_gist(delta, data.bounds);
4708
28
4709
28
  return delta;
4710
28
}
4711
4712
/* Given a non-trivial lineality space "lineality", add the corresponding
4713
 * universe set to data->mask and add a map from elements to
4714
 * other elements along the lines in "lineality" to data->equivalent.
4715
 * If this is the first time this function gets called
4716
 * (data->any_non_trivial is still false), then set data->any_non_trivial and
4717
 * initialize data->mask and data->equivalent.
4718
 *
4719
 * In particular, if the lineality space is defined by equality constraints
4720
 *
4721
 *  E x = 0
4722
 *
4723
 * then construct an affine mapping
4724
 *
4725
 *  f : x -> E x
4726
 *
4727
 * and compute the equivalence relation of having the same image under f:
4728
 *
4729
 *  { x -> x' : E x = E x' }
4730
 */
4731
static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality,
4732
  struct isl_exploit_lineality_data *data)
4733
2
{
4734
2
  isl_mat *eq;
4735
2
  isl_space *space;
4736
2
  isl_set *univ;
4737
2
  isl_multi_aff *ma;
4738
2
  isl_multi_pw_aff *mpa;
4739
2
  isl_map *map;
4740
2
  int n;
4741
2
4742
2
  if (!lineality)
4743
0
    return isl_stat_error;
4744
2
  if (isl_basic_set_dim(lineality, isl_dim_div) != 0)
4745
2
    
isl_die0
(isl_basic_set_get_ctx(lineality), isl_error_internal,
4746
2
      "local variables not allowed", goto error);
4747
2
4748
2
  space = isl_basic_set_get_space(lineality);
4749
2
  if (!data->any_non_trivial) {
4750
2
    data->equivalent = isl_union_map_empty(isl_space_copy(space));
4751
2
    data->mask = isl_union_set_empty(isl_space_copy(space));
4752
2
  }
4753
2
  data->any_non_trivial = isl_bool_true;
4754
2
4755
2
  univ = isl_set_universe(isl_space_copy(space));
4756
2
  data->mask = isl_union_set_add_set(data->mask, univ);
4757
2
4758
2
  eq = isl_basic_set_extract_equalities(lineality);
4759
2
  n = isl_mat_rows(eq);
4760
2
  eq = isl_mat_insert_zero_rows(eq, 0, 1);
4761
2
  eq = isl_mat_set_element_si(eq, 0, 0, 1);
4762
2
  space = isl_space_from_domain(space);
4763
2
  space = isl_space_add_dims(space, isl_dim_out, n);
4764
2
  ma = isl_multi_aff_from_aff_mat(space, eq);
4765
2
  mpa = isl_multi_pw_aff_from_multi_aff(ma);
4766
2
  map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa));
4767
2
  data->equivalent = isl_union_map_add_map(data->equivalent, map);
4768
2
4769
2
  isl_basic_set_free(lineality);
4770
2
  return isl_stat_ok;
4771
0
error:
4772
0
  isl_basic_set_free(lineality);
4773
0
  return isl_stat_error;
4774
2
}
4775
4776
/* Check if the lineality space "set" is non-trivial (i.e., is not just
4777
 * the origin or, in other words, satisfies a number of equality constraints
4778
 * that is smaller than the dimension of the set).
4779
 * If so, extend data->mask and data->equivalent accordingly.
4780
 *
4781
 * The input should not have any local variables already, but
4782
 * isl_set_remove_divs is called to make sure it does not.
4783
 */
4784
static isl_stat add_lineality(__isl_take isl_set *set, void *user)
4785
20
{
4786
20
  struct isl_exploit_lineality_data *data = user;
4787
20
  isl_basic_set *hull;
4788
20
  int dim, n_eq;
4789
20
4790
20
  set = isl_set_remove_divs(set);
4791
20
  hull = isl_set_unshifted_simple_hull(set);
4792
20
  dim = isl_basic_set_dim(hull, isl_dim_set);
4793
20
  n_eq = isl_basic_set_n_equality(hull);
4794
20
  if (!hull)
4795
0
    return isl_stat_error;
4796
20
  if (dim != n_eq)
4797
2
    return add_non_trivial_lineality(hull, data);
4798
18
  isl_basic_set_free(hull);
4799
18
  return isl_stat_ok;
4800
18
}
4801
4802
/* Check if the difference set on intra-node schedule constraints "intra"
4803
 * has any non-trivial lineality space.
4804
 * If so, then extend the difference set to a difference set
4805
 * on equivalent elements.  That is, if "intra" is
4806
 *
4807
 *  { y - x : (x,y) \in V }
4808
 *
4809
 * and elements are equivalent if they have the same image under f,
4810
 * then return
4811
 *
4812
 *  { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4813
 *
4814
 * or, since f is linear,
4815
 *
4816
 *  { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
4817
 *
4818
 * The results of the search for non-trivial lineality spaces is stored
4819
 * in "data".
4820
 */
4821
static __isl_give isl_union_set *exploit_intra_lineality(
4822
  __isl_take isl_union_set *intra,
4823
  struct isl_exploit_lineality_data *data)
4824
28
{
4825
28
  isl_union_set *lineality;
4826
28
  isl_union_set *uset;
4827
28
4828
28
  data->any_non_trivial = isl_bool_false;
4829
28
  lineality = isl_union_set_copy(intra);
4830
28
  lineality = isl_union_set_combined_lineality_space(lineality);
4831
28
  if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0)
4832
0
    data->any_non_trivial = isl_bool_error;
4833
28
  isl_union_set_free(lineality);
4834
28
4835
28
  if (data->any_non_trivial < 0)
4836
0
    return isl_union_set_free(intra);
4837
28
  if (!data->any_non_trivial)
4838
26
    return intra;
4839
2
4840
2
  uset = isl_union_set_copy(intra);
4841
2
  intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask));
4842
2
  uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent));
4843
2
  intra = isl_union_set_union(intra, uset);
4844
2
4845
2
  intra = isl_union_set_remove_divs(intra);
4846
2
4847
2
  return intra;
4848
2
}
4849
4850
/* If the difference set on intra-node schedule constraints was found to have
4851
 * any non-trivial lineality space by exploit_intra_lineality,
4852
 * as recorded in "data", then extend the inter-node
4853
 * schedule constraints "inter" to schedule constraints on equivalent elements.
4854
 * That is, if "inter" is V and
4855
 * elements are equivalent if they have the same image under f, then return
4856
 *
4857
 *  { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4858
 */
4859
static __isl_give isl_union_map *exploit_inter_lineality(
4860
  __isl_take isl_union_map *inter,
4861
  struct isl_exploit_lineality_data *data)
4862
28
{
4863
28
  isl_union_map *umap;
4864
28
4865
28
  if (data->any_non_trivial < 0)
4866
0
    return isl_union_map_free(inter);
4867
28
  if (!data->any_non_trivial)
4868
26
    return inter;
4869
2
4870
2
  umap = isl_union_map_copy(inter);
4871
2
  inter = isl_union_map_subtract_range(inter,
4872
2
        isl_union_set_copy(data->mask));
4873
2
  umap = isl_union_map_apply_range(umap,
4874
2
        isl_union_map_copy(data->equivalent));
4875
2
  inter = isl_union_map_union(inter, umap);
4876
2
  umap = isl_union_map_copy(inter);
4877
2
  inter = isl_union_map_subtract_domain(inter,
4878
2
        isl_union_set_copy(data->mask));
4879
2
  umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent),
4880
2
        umap);
4881
2
  inter = isl_union_map_union(inter, umap);
4882
2
4883
2
  inter = isl_union_map_remove_divs(inter);
4884
2
4885
2
  return inter;
4886
2
}
4887
4888
/* For each (conditional) validity edge in "graph",
4889
 * add the corresponding dependence relation using "add"
4890
 * to a collection of dependence relations and return the result.
4891
 * If "coincidence" is set, then coincidence edges are considered as well.
4892
 */
4893
static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
4894
  __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
4895
    struct isl_sched_edge *edge), int coincidence)
4896
56
{
4897
56
  int i;
4898
56
  isl_space *space;
4899
56
  isl_union_map *umap;
4900
56
4901
56
  space = isl_space_copy(graph->node[0].space);
4902
56
  umap = isl_union_map_empty(space);
4903
56
4904
140
  for (i = 0; i < graph->n_edge; 
++i84
) {
4905
84
    struct isl_sched_edge *edge = &graph->edge[i];
4906
84
4907
84
    if (!is_any_validity(edge) &&
4908
84
        
(0
!coincidence0
||
!is_coincidence(edge)0
))
4909
0
      continue;
4910
84
4911
84
    umap = add(umap, edge);
4912
84
  }
4913
56
4914
56
  return umap;
4915
56
}
4916
4917
/* Project out all parameters from "uset" and return the result.
4918
 */
4919
static __isl_give isl_union_set *union_set_drop_parameters(
4920
  __isl_take isl_union_set *uset)
4921
28
{
4922
28
  unsigned nparam;
4923
28
4924
28
  nparam = isl_union_set_dim(uset, isl_dim_param);
4925
28
  return isl_union_set_project_out(uset, isl_dim_param, 0, nparam);
4926
28
}
4927
4928
/* For each dependence relation on a (conditional) validity edge
4929
 * from a node to itself,
4930
 * construct the set of coefficients of valid constraints for elements
4931
 * in that dependence relation and collect the results.
4932
 * If "coincidence" is set, then coincidence edges are considered as well.
4933
 *
4934
 * In particular, for each dependence relation R, constraints
4935
 * on coefficients (c_0, c_x) are constructed such that
4936
 *
4937
 *  c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
4938
 *
4939
 * If the schedule_treat_coalescing option is set, then some constraints
4940
 * that could be exploited to construct coalescing schedules
4941
 * are removed before the dual is computed, but after the parameters
4942
 * have been projected out.
4943
 * The entire computation is essentially the same as that performed
4944
 * by intra_coefficients, except that it operates on multiple
4945
 * edges together and that the parameters are always projected out.
4946
 *
4947
 * Additionally, exploit any non-trivial lineality space
4948
 * in the difference set after removing coalescing constraints and
4949
 * store the results of the non-trivial lineality space detection in "data".
4950
 * The procedure is currently run unconditionally, but it is unlikely
4951
 * to find any non-trivial lineality spaces if no coalescing constraints
4952
 * have been removed.
4953
 *
4954
 * Note that if a dependence relation is a union of basic maps,
4955
 * then each basic map needs to be treated individually as it may only
4956
 * be possible to carry the dependences expressed by some of those
4957
 * basic maps and not all of them.
4958
 * The collected validity constraints are therefore not coalesced and
4959
 * it is assumed that they are not coalesced automatically.
4960
 * Duplicate basic maps can be removed, however.
4961
 * In particular, if the same basic map appears as a disjunct
4962
 * in multiple edges, then it only needs to be carried once.
4963
 */
4964
static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx,
4965
  struct isl_sched_graph *graph, int coincidence,
4966
  struct isl_exploit_lineality_data *data)
4967
28
{
4968
28
  isl_union_map *intra;
4969
28
  isl_union_set *delta;
4970
28
  isl_basic_set_list *list;
4971
28
4972
28
  intra = collect_validity(graph, &add_intra, coincidence);
4973
28
  delta = isl_union_map_deltas(intra);
4974
28
  delta = union_set_drop_parameters(delta);
4975
28
  delta = isl_union_set_remove_divs(delta);
4976
28
  if (isl_options_get_schedule_treat_coalescing(ctx))
4977
28
    delta = union_drop_coalescing_constraints(ctx, graph, delta);
4978
28
  delta = exploit_intra_lineality(delta, data);
4979
28
  list = isl_union_set_get_basic_set_list(delta);
4980
28
  isl_union_set_free(delta);
4981
28
4982
28
  return isl_basic_set_list_coefficients(list);
4983
28
}
4984
4985
/* For each dependence relation on a (conditional) validity edge
4986
 * from a node to some other node,
4987
 * construct the set of coefficients of valid constraints for elements
4988
 * in that dependence relation and collect the results.
4989
 * If "coincidence" is set, then coincidence edges are considered as well.
4990
 *
4991
 * In particular, for each dependence relation R, constraints
4992
 * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
4993
 *
4994
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
4995
 *
4996
 * This computation is essentially the same as that performed
4997
 * by inter_coefficients, except that it operates on multiple
4998
 * edges together.
4999
 *
5000
 * Additionally, exploit any non-trivial lineality space
5001
 * that may have been discovered by collect_intra_validity
5002
 * (as stored in "data").
5003
 *
5004
 * Note that if a dependence relation is a union of basic maps,
5005
 * then each basic map needs to be treated individually as it may only
5006
 * be possible to carry the dependences expressed by some of those
5007
 * basic maps and not all of them.
5008
 * The collected validity constraints are therefore not coalesced and
5009
 * it is assumed that they are not coalesced automatically.
5010
 * Duplicate basic maps can be removed, however.
5011
 * In particular, if the same basic map appears as a disjunct
5012
 * in multiple edges, then it only needs to be carried once.
5013
 */
5014
static __isl_give isl_basic_set_list *collect_inter_validity(
5015
  struct isl_sched_graph *graph, int coincidence,
5016
  struct isl_exploit_lineality_data *data)
5017
28
{
5018
28
  isl_union_map *inter;
5019
28
  isl_union_set *wrap;
5020
28
  isl_basic_set_list *list;
5021
28
5022
28
  inter = collect_validity(graph, &add_inter, coincidence);
5023
28
  inter = exploit_inter_lineality(inter, data);
5024
28
  inter = isl_union_map_remove_divs(inter);
5025
28
  wrap = isl_union_map_wrap(inter);
5026
28
  list = isl_union_set_get_basic_set_list(wrap);
5027
28
  isl_union_set_free(wrap);
5028
28
  return isl_basic_set_list_coefficients(list);
5029
28
}
5030
5031
/* Construct an LP problem for finding schedule coefficients
5032
 * such that the schedule carries as many of the "n_edge" groups of
5033
 * dependences as possible based on the corresponding coefficient
5034
 * constraints and return the lexicographically smallest non-trivial solution.
5035
 * "intra" is the sequence of coefficient constraints for intra-node edges.
5036
 * "inter" is the sequence of coefficient constraints for inter-node edges.
5037
 * If "want_integral" is set, then compute an integral solution
5038
 * for the coefficients rather than using the numerators
5039
 * of a rational solution.
5040
 * "carry_inter" indicates whether inter-node edges should be carried or
5041
 * only respected.
5042
 *
5043
 * If none of the "n_edge" groups can be carried
5044
 * then return an empty vector.
5045
 */
5046
static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx,
5047
  struct isl_sched_graph *graph, int n_edge,
5048
  __isl_keep isl_basic_set_list *intra,
5049
  __isl_keep isl_basic_set_list *inter, int want_integral,
5050
  int carry_inter)
5051
28
{
5052
28
  isl_basic_set *lp;
5053
28
5054
28
  if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0)
5055
0
    return NULL;
5056
28
5057
28
  lp = isl_basic_set_copy(graph->lp);
5058
28
  return non_neg_lexmin(graph, lp, n_edge, want_integral);
5059
28
}
5060
5061
/* Construct an LP problem for finding schedule coefficients
5062
 * such that the schedule carries as many of the validity dependences
5063
 * as possible and
5064
 * return the lexicographically smallest non-trivial solution.
5065
 * If "fallback" is set, then the carrying is performed as a fallback
5066
 * for the Pluto-like scheduler.
5067
 * If "coincidence" is set, then try and carry coincidence edges as well.
5068
 *
5069
 * The variable "n_edge" stores the number of groups that should be carried.
5070
 * If none of the "n_edge" groups can be carried
5071
 * then return an empty vector.
5072
 * If, moreover, "n_edge" is zero, then the LP problem does not even
5073
 * need to be constructed.
5074
 *
5075
 * If a fallback solution is being computed, then compute an integral solution
5076
 * for the coefficients rather than using the numerators
5077
 * of a rational solution.
5078
 *
5079
 * If a fallback solution is being computed, if there are any intra-node
5080
 * dependences, and if requested by the user, then first try
5081
 * to only carry those intra-node dependences.
5082
 * If this fails to carry any dependences, then try again
5083
 * with the inter-node dependences included.
5084
 */
5085
static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
5086
  struct isl_sched_graph *graph, int fallback, int coincidence)
5087
28
{
5088
28
  int n_intra, n_inter;
5089
28
  int n_edge;
5090
28
  struct isl_carry carry = { 0 };
5091
28
  isl_vec *sol;
5092
28
5093
28
  carry.intra = collect_intra_validity(ctx, graph, coincidence,
5094
28
            &carry.lineality);
5095
28
  carry.inter = collect_inter_validity(graph, coincidence,
5096
28
            &carry.lineality);
5097
28
  if (!carry.intra || !carry.inter)
5098
0
    goto error;
5099
28
  n_intra = isl_basic_set_list_n_basic_set(carry.intra);
5100
28
  n_inter = isl_basic_set_list_n_basic_set(carry.inter);
5101
28
5102
28
  if (fallback && 
n_intra > 014
&&
5103
28
      
isl_options_get_schedule_carry_self_first(ctx)10
) {
5104
10
    sol = compute_carrying_sol_coef(ctx, graph, n_intra,
5105
10
        carry.intra, carry.inter, fallback, 0);
5106
10
    if (!sol || sol->size != 0 || 
n_inter == 00
) {
5107
10
      isl_carry_clear(&carry);
5108
10
      return sol;
5109
10
    }
5110
0
    isl_vec_free(sol);
5111
0
  }
5112
28
5113
28
  n_edge = n_intra + n_inter;
5114
18
  if (n_edge == 0) {
5115
0
    isl_carry_clear(&carry);
5116
0
    return isl_vec_alloc(ctx, 0);
5117
0
  }
5118
18
5119
18
  sol = compute_carrying_sol_coef(ctx, graph, n_edge,
5120
18
        carry.intra, carry.inter, fallback, 1);
5121
18
  isl_carry_clear(&carry);
5122
18
  return sol;
5123
0
error:
5124
0
  isl_carry_clear(&carry);
5125
0
  return NULL;
5126
18
}
5127
5128
/* Construct a schedule row for each node such that as many validity dependences
5129
 * as possible are carried and then continue with the next band.
5130
 * If "fallback" is set, then the carrying is performed as a fallback
5131
 * for the Pluto-like scheduler.
5132
 * If "coincidence" is set, then try and carry coincidence edges as well.
5133
 *
5134
 * If there are no validity dependences, then no dependence can be carried and
5135
 * the procedure is guaranteed to fail.  If there is more than one component,
5136
 * then try computing a schedule on each component separately
5137
 * to prevent or at least postpone this failure.
5138
 *
5139
 * If a schedule row is computed, then check that dependences are carried
5140
 * for at least one of the edges.
5141
 *
5142
 * If the computed schedule row turns out to be trivial on one or
5143
 * more nodes where it should not be trivial, then we throw it away
5144
 * and try again on each component separately.
5145
 *
5146
 * If there is only one component, then we accept the schedule row anyway,
5147
 * but we do not consider it as a complete row and therefore do not
5148
 * increment graph->n_row.  Note that the ranks of the nodes that
5149
 * do get a non-trivial schedule part will get updated regardless and
5150
 * graph->maxvar is computed based on these ranks.  The test for
5151
 * whether more schedule rows are required in compute_schedule_wcc
5152
 * is therefore not affected.
5153
 *
5154
 * Insert a band corresponding to the schedule row at position "node"
5155
 * of the schedule tree and continue with the construction of the schedule.
5156
 * This insertion and the continued construction is performed by split_scaled
5157
 * after optionally checking for non-trivial common divisors.
5158
 */
5159
static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node,
5160
  struct isl_sched_graph *graph, int fallback, int coincidence)
5161
28
{
5162
28
  int trivial;
5163
28
  isl_ctx *ctx;
5164
28
  isl_vec *sol;
5165
28
5166
28
  if (!node)
5167
0
    return NULL;
5168
28
5169
28
  ctx = isl_schedule_node_get_ctx(node);
5170
28
  sol = compute_carrying_sol(ctx, graph, fallback, coincidence);
5171
28
  if (!sol)
5172
0
    return isl_schedule_node_free(node);
5173
28
  if (sol->size == 0) {
5174
0
    isl_vec_free(sol);
5175
0
    if (graph->scc > 1)
5176
0
      return compute_component_schedule(node, graph, 1);
5177
0
    isl_die(ctx, isl_error_unknown, "unable to carry dependences",
5178
0
      return isl_schedule_node_free(node));
5179
0
  }
5180
28
5181
28
  trivial = is_any_trivial(graph, sol);
5182
28
  if (trivial < 0) {
5183
0
    sol = isl_vec_free(sol);
5184
28
  } else if (trivial && 
graph->scc > 14
) {
5185
2
    isl_vec_free(sol);
5186
2
    return compute_component_schedule(node, graph, 1);
5187
2
  }
5188
26
5189
26
  if (update_schedule(graph, sol, 0) < 0)
5190
0
    return isl_schedule_node_free(node);
5191
26
  if (trivial)
5192
2
    graph->n_row--;
5193
26
5194
26
  return split_scaled(node, graph);
5195
26
}
5196
5197
/* Construct a schedule row for each node such that as many validity dependences
5198
 * as possible are carried and then continue with the next band.
5199
 * Do so as a fallback for the Pluto-like scheduler.
5200
 * If "coincidence" is set, then try and carry coincidence edges as well.
5201
 */
5202
static __isl_give isl_schedule_node *carry_fallback(
5203
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5204
  int coincidence)
5205
14
{
5206
14
  return carry(node, graph, 1, coincidence);
5207
14
}
5208
5209
/* Construct a schedule row for each node such that as many validity dependences
5210
 * as possible are carried and then continue with the next band.
5211
 * Do so for the case where the Feautrier scheduler was selected
5212
 * by the user.
5213
 */
5214
static __isl_give isl_schedule_node *carry_feautrier(
5215
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5216
14
{
5217
14
  return carry(node, graph, 0, 0);
5218
14
}
5219
5220
/* Construct a schedule row for each node such that as many validity dependences
5221
 * as possible are carried and then continue with the next band.
5222
 * Do so as a fallback for the Pluto-like scheduler.
5223
 */
5224
static __isl_give isl_schedule_node *carry_dependences(
5225
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5226
9
{
5227
9
  return carry_fallback(node, graph, 0);
5228
9
}
5229
5230
/* Construct a schedule row for each node such that as many validity or
5231
 * coincidence dependences as possible are carried and
5232
 * then continue with the next band.
5233
 * Do so as a fallback for the Pluto-like scheduler.
5234
 */
5235
static __isl_give isl_schedule_node *carry_coincidence(
5236
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5237
5
{
5238
5
  return carry_fallback(node, graph, 1);
5239
5
}
5240
5241
/* Topologically sort statements mapped to the same schedule iteration
5242
 * and add insert a sequence node in front of "node"
5243
 * corresponding to this order.
5244
 * If "initialized" is set, then it may be assumed that compute_maxvar
5245
 * has been called on the current band.  Otherwise, call
5246
 * compute_maxvar if and before carry_dependences gets called.
5247
 *
5248
 * If it turns out to be impossible to sort the statements apart,
5249
 * because different dependences impose different orderings
5250
 * on the statements, then we extend the schedule such that
5251
 * it carries at least one more dependence.
5252
 */
5253
static __isl_give isl_schedule_node *sort_statements(
5254
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5255
  int initialized)
5256
171
{
5257
171
  isl_ctx *ctx;
5258
171
  isl_union_set_list *filters;
5259
171
5260
171
  if (!node)
5261
0
    return NULL;
5262
171
5263
171
  ctx = isl_schedule_node_get_ctx(node);
5264
171
  if (graph->n < 1)
5265
171
    
isl_die0
(ctx, isl_error_internal,
5266
171
      "graph should have at least one node",
5267
171
      return isl_schedule_node_free(node));
5268
171
5269
171
  if (graph->n == 1)
5270
153
    return node;
5271
18
5272
18
  if (update_edges(ctx, graph) < 0)
5273
0
    return isl_schedule_node_free(node);
5274
18
5275
18
  if (graph->n_edge == 0)
5276
0
    return node;
5277
18
5278
18
  if (detect_sccs(ctx, graph) < 0)
5279
0
    return isl_schedule_node_free(node);
5280
18
5281
18
  next_band(graph);
5282
18
  if (graph->scc < graph->n) {
5283
2
    if (!initialized && 
compute_maxvar(graph) < 00
)
5284
0
      return isl_schedule_node_free(node);
5285
2
    return carry_dependences(node, graph);
5286
2
  }
5287
16
5288
16
  filters = extract_sccs(ctx, graph);
5289
16
  node = isl_schedule_node_insert_sequence(node, filters);
5290
16
5291
16
  return node;
5292
16
}
5293
5294
/* Are there any (non-empty) (conditional) validity edges in the graph?
5295
 */
5296
static int has_validity_edges(struct isl_sched_graph *graph)
5297
34
{
5298
34
  int i;
5299
34
5300
46
  for (i = 0; i < graph->n_edge; 
++i12
) {
5301
26
    int empty;
5302
26
5303
26
    empty = isl_map_plain_is_empty(graph->edge[i].map);
5304
26
    if (empty < 0)
5305
0
      return -1;
5306
26
    if (empty)
5307
10
      continue;
5308
16
    if (is_any_validity(&graph->edge[i]))
5309
14
      return 1;
5310
16
  }
5311
34
5312
34
  
return 020
;
5313
34
}
5314
5315
/* Should we apply a Feautrier step?
5316
 * That is, did the user request the Feautrier algorithm and are
5317
 * there any validity dependences (left)?
5318
 */
5319
static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
5320
321
{
5321
321
  if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
5322
321
    
return 0287
;
5323
34
5324
34
  return has_validity_edges(graph);
5325
34
}
5326
5327
/* Compute a schedule for a connected dependence graph using Feautrier's
5328
 * multi-dimensional scheduling algorithm and return the updated schedule node.
5329
 *
5330
 * The original algorithm is described in [1].
5331
 * The main idea is to minimize the number of scheduling dimensions, by
5332
 * trying to satisfy as many dependences as possible per scheduling dimension.
5333
 *
5334
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
5335
 *     Problem, Part II: Multi-Dimensional Time.
5336
 *     In Intl. Journal of Parallel Programming, 1992.
5337
 */
5338
static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
5339
  isl_schedule_node *node, struct isl_sched_graph *graph)
5340
14
{
5341
14
  return carry_feautrier(node, graph);
5342
14
}
5343
5344
/* Turn off the "local" bit on all (condition) edges.
5345
 */
5346
static void clear_local_edges(struct isl_sched_graph *graph)
5347
381
{
5348
381
  int i;
5349
381
5350
733
  for (i = 0; i < graph->n_edge; 
++i352
)
5351
352
    if (is_condition(&graph->edge[i]))
5352
44
      clear_local(&graph->edge[i]);
5353
381
}
5354
5355
/* Does "graph" have both condition and conditional validity edges?
5356
 */
5357
static int need_condition_check(struct isl_sched_graph *graph)
5358
381
{
5359
381
  int i;
5360
381
  int any_condition = 0;
5361
381
  int any_conditional_validity = 0;
5362
381
5363
733
  for (i = 0; i < graph->n_edge; 
++i352
) {
5364
352
    if (is_condition(&graph->edge[i]))
5365
44
      any_condition = 1;
5366
352
    if (is_conditional_validity(&graph->edge[i]))
5367
45
      any_conditional_validity = 1;
5368
352
  }
5369
381
5370
381
  return any_condition && 
any_conditional_validity27
;
5371
381
}
5372
5373
/* Does "graph" contain any coincidence edge?
5374
 */
5375
static int has_any_coincidence(struct isl_sched_graph *graph)
5376
381
{
5377
381
  int i;
5378
381
5379
526
  for (i = 0; i < graph->n_edge; 
++i145
)
5380
298
    if (is_coincidence(&graph->edge[i]))
5381
153
      return 1;
5382
381
5383
381
  
return 0228
;
5384
381
}
5385
5386
/* Extract the final schedule row as a map with the iteration domain
5387
 * of "node" as domain.
5388
 */
5389
static __isl_give isl_map *final_row(struct isl_sched_node *node)
5390
142
{
5391
142
  isl_multi_aff *ma;
5392
142
  int row;
5393
142
5394
142
  row = isl_mat_rows(node->sched) - 1;
5395
142
  ma = node_extract_partial_schedule_multi_aff(node, row, 1);
5396
142
  return isl_map_from_multi_aff(ma);
5397
142
}
5398
5399
/* Is the conditional validity dependence in the edge with index "edge_index"
5400
 * violated by the latest (i.e., final) row of the schedule?
5401
 * That is, is i scheduled after j
5402
 * for any conditional validity dependence i -> j?
5403
 */
5404
static int is_violated(struct isl_sched_graph *graph, int edge_index)
5405
71
{
5406
71
  isl_map *src_sched, *dst_sched, *map;
5407
71
  struct isl_sched_edge *edge = &graph->edge[edge_index];
5408
71
  int empty;
5409
71
5410
71
  src_sched = final_row(edge->src);
5411
71
  dst_sched = final_row(edge->dst);
5412
71
  map = isl_map_copy(edge->map);
5413
71
  map = isl_map_apply_domain(map, src_sched);
5414
71
  map = isl_map_apply_range(map, dst_sched);
5415
71
  map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
5416
71
  empty = isl_map_is_empty(map);
5417
71
  isl_map_free(map);
5418
71
5419
71
  if (empty < 0)
5420
0
    return -1;
5421
71
5422
71
  return !empty;
5423
71
}
5424
5425
/* Does "graph" have any satisfied condition edges that
5426
 * are adjacent to the conditional validity constraint with
5427
 * domain "conditional_source" and range "conditional_sink"?
5428
 *