Coverage Report

Created: 2017-06-23 12:40

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/External/isl/isl_scheduler.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2011      INRIA Saclay
3
 * Copyright 2012-2014 Ecole Normale Superieure
4
 * Copyright 2015-2016 Sven Verdoolaege
5
 * Copyright 2016      INRIA Paris
6
 * Copyright 2017      Sven Verdoolaege
7
 *
8
 * Use of this software is governed by the MIT license
9
 *
10
 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
11
 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
12
 * 91893 Orsay, France
13
 * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14
 * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
15
 * CS 42112, 75589 Paris Cedex 12, France
16
 */
17
18
#include <isl_ctx_private.h>
19
#include <isl_map_private.h>
20
#include <isl_space_private.h>
21
#include <isl_aff_private.h>
22
#include <isl/hash.h>
23
#include <isl/constraint.h>
24
#include <isl/schedule.h>
25
#include <isl_schedule_constraints.h>
26
#include <isl/schedule_node.h>
27
#include <isl_mat_private.h>
28
#include <isl_vec_private.h>
29
#include <isl/set.h>
30
#include <isl/union_set.h>
31
#include <isl_seq.h>
32
#include <isl_tab.h>
33
#include <isl_dim_map.h>
34
#include <isl/map_to_basic_set.h>
35
#include <isl_sort.h>
36
#include <isl_options_private.h>
37
#include <isl_tarjan.h>
38
#include <isl_morph.h>
39
#include <isl/ilp.h>
40
#include <isl_val_private.h>
41
42
/*
43
 * The scheduling algorithm implemented in this file was inspired by
44
 * Bondhugula et al., "Automatic Transformations for Communication-Minimized
45
 * Parallelization and Locality Optimization in the Polyhedral Model".
46
 */
47
48
49
/* Internal information about a node that is used during the construction
50
 * of a schedule.
51
 * space represents the original space in which the domain lives;
52
 *  that is, the space is not affected by compression
53
 * sched is a matrix representation of the schedule being constructed
54
 *  for this node; if compressed is set, then this schedule is
55
 *  defined over the compressed domain space
56
 * sched_map is an isl_map representation of the same (partial) schedule
57
 *  sched_map may be NULL; if compressed is set, then this map
58
 *  is defined over the uncompressed domain space
59
 * rank is the number of linearly independent rows in the linear part
60
 *  of sched
61
 * the columns of cmap represent a change of basis for the schedule
62
 *  coefficients; the first rank columns span the linear part of
63
 *  the schedule rows
64
 * cinv is the inverse of cmap.
65
 * ctrans is the transpose of cmap.
66
 * start is the first variable in the LP problem in the sequences that
67
 *  represents the schedule coefficients of this node
68
 * nvar is the dimension of the domain
69
 * nparam is the number of parameters or 0 if we are not constructing
70
 *  a parametric schedule
71
 *
72
 * If compressed is set, then hull represents the constraints
73
 * that were used to derive the compression, while compress and
74
 * decompress map the original space to the compressed space and
75
 * vice versa.
76
 *
77
 * scc is the index of SCC (or WCC) this node belongs to
78
 *
79
 * "cluster" is only used inside extract_clusters and identifies
80
 * the cluster of SCCs that the node belongs to.
81
 *
82
 * coincident contains a boolean for each of the rows of the schedule,
83
 * indicating whether the corresponding scheduling dimension satisfies
84
 * the coincidence constraints in the sense that the corresponding
85
 * dependence distances are zero.
86
 *
87
 * If the schedule_treat_coalescing option is set, then
88
 * "sizes" contains the sizes of the (compressed) instance set
89
 * in each direction.  If there is no fixed size in a given direction,
90
 * then the corresponding size value is set to infinity.
91
 * If the schedule_treat_coalescing option or the schedule_max_coefficient
92
 * option is set, then "max" contains the maximal values for
93
 * schedule coefficients of the (compressed) variables.  If no bound
94
 * needs to be imposed on a particular variable, then the corresponding
95
 * value is negative.
96
 */
97
struct isl_sched_node {
98
  isl_space *space;
99
  int compressed;
100
  isl_set *hull;
101
  isl_multi_aff *compress;
102
  isl_multi_aff *decompress;
103
  isl_mat *sched;
104
  isl_map *sched_map;
105
  int  rank;
106
  isl_mat *cmap;
107
  isl_mat *cinv;
108
  isl_mat *ctrans;
109
  int  start;
110
  int  nvar;
111
  int  nparam;
112
113
  int  scc;
114
  int  cluster;
115
116
  int *coincident;
117
118
  isl_multi_val *sizes;
119
  isl_vec *max;
120
};
121
122
static int node_has_tuples(const void *entry, const void *val)
123
1.54k
{
124
1.54k
  struct isl_sched_node *node = (struct isl_sched_node *)entry;
125
1.54k
  isl_space *space = (isl_space *) val;
126
1.54k
127
1.54k
  return isl_space_has_equal_tuples(node->space, space);
128
1.54k
}
129
130
static int node_scc_exactly(struct isl_sched_node *node, int scc)
131
830
{
132
830
  return node->scc == scc;
133
830
}
134
135
static int node_scc_at_most(struct isl_sched_node *node, int scc)
136
18
{
137
18
  return node->scc <= scc;
138
18
}
139
140
static int node_scc_at_least(struct isl_sched_node *node, int scc)
141
18
{
142
18
  return node->scc >= scc;
143
18
}
144
145
/* An edge in the dependence graph.  An edge may be used to
146
 * ensure validity of the generated schedule, to minimize the dependence
147
 * distance or both
148
 *
149
 * map is the dependence relation, with i -> j in the map if j depends on i
150
 * tagged_condition and tagged_validity contain the union of all tagged
151
 *  condition or conditional validity dependence relations that
152
 *  specialize the dependence relation "map"; that is,
153
 *  if (i -> a) -> (j -> b) is an element of "tagged_condition"
154
 *  or "tagged_validity", then i -> j is an element of "map".
155
 *  If these fields are NULL, then they represent the empty relation.
156
 * src is the source node
157
 * dst is the sink node
158
 *
159
 * types is a bit vector containing the types of this edge.
160
 * validity is set if the edge is used to ensure correctness
161
 * coincidence is used to enforce zero dependence distances
162
 * proximity is set if the edge is used to minimize dependence distances
163
 * condition is set if the edge represents a condition
164
 *  for a conditional validity schedule constraint
165
 * local can only be set for condition edges and indicates that
166
 *  the dependence distance over the edge should be zero
167
 * conditional_validity is set if the edge is used to conditionally
168
 *  ensure correctness
169
 *
170
 * For validity edges, start and end mark the sequence of inequality
171
 * constraints in the LP problem that encode the validity constraint
172
 * corresponding to this edge.
173
 *
174
 * During clustering, an edge may be marked "no_merge" if it should
175
 * not be used to merge clusters.
176
 * The weight is also only used during clustering and it is
177
 * an indication of how many schedule dimensions on either side
178
 * of the schedule constraints can be aligned.
179
 * If the weight is negative, then this means that this edge was postponed
180
 * by has_bounded_distances or any_no_merge.  The original weight can
181
 * be retrieved by adding 1 + graph->max_weight, with "graph"
182
 * the graph containing this edge.
183
 */
184
struct isl_sched_edge {
185
  isl_map *map;
186
  isl_union_map *tagged_condition;
187
  isl_union_map *tagged_validity;
188
189
  struct isl_sched_node *src;
190
  struct isl_sched_node *dst;
191
192
  unsigned types;
193
194
  int start;
195
  int end;
196
197
  int no_merge;
198
  int weight;
199
};
200
201
/* Is "edge" marked as being of type "type"?
202
 */
203
static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
204
11.1k
{
205
11.1k
  return ISL_FL_ISSET(edge->types, 1 << type);
206
11.1k
}
207
208
/* Mark "edge" as being of type "type".
209
 */
210
static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
211
610
{
212
610
  ISL_FL_SET(edge->types, 1 << type);
213
610
}
214
215
/* No longer mark "edge" as being of type "type"?
216
 */
217
static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
218
34
{
219
34
  ISL_FL_CLR(edge->types, 1 << type);
220
34
}
221
222
/* Is "edge" marked as a validity edge?
223
 */
224
static int is_validity(struct isl_sched_edge *edge)
225
3.19k
{
226
3.19k
  return is_type(edge, isl_edge_validity);
227
3.19k
}
228
229
/* Mark "edge" as a validity edge.
230
 */
231
static void set_validity(struct isl_sched_edge *edge)
232
2
{
233
2
  set_type(edge, isl_edge_validity);
234
2
}
235
236
/* Is "edge" marked as a proximity edge?
237
 */
238
static int is_proximity(struct isl_sched_edge *edge)
239
1.34k
{
240
1.34k
  return is_type(edge, isl_edge_proximity);
241
1.34k
}
242
243
/* Is "edge" marked as a local edge?
244
 */
245
static int is_local(struct isl_sched_edge *edge)
246
1.86k
{
247
1.86k
  return is_type(edge, isl_edge_local);
248
1.86k
}
249
250
/* Mark "edge" as a local edge.
251
 */
252
static void set_local(struct isl_sched_edge *edge)
253
15
{
254
15
  set_type(edge, isl_edge_local);
255
15
}
256
257
/* No longer mark "edge" as a local edge.
258
 */
259
static void clear_local(struct isl_sched_edge *edge)
260
34
{
261
34
  clear_type(edge, isl_edge_local);
262
34
}
263
264
/* Is "edge" marked as a coincidence edge?
265
 */
266
static int is_coincidence(struct isl_sched_edge *edge)
267
1.83k
{
268
1.83k
  return is_type(edge, isl_edge_coincidence);
269
1.83k
}
270
271
/* Is "edge" marked as a condition edge?
272
 */
273
static int is_condition(struct isl_sched_edge *edge)
274
1.50k
{
275
1.50k
  return is_type(edge, isl_edge_condition);
276
1.50k
}
277
278
/* Is "edge" marked as a conditional validity edge?
279
 */
280
static int is_conditional_validity(struct isl_sched_edge *edge)
281
1.18k
{
282
1.18k
  return is_type(edge, isl_edge_conditional_validity);
283
1.18k
}
284
285
/* Internal information about the dependence graph used during
286
 * the construction of the schedule.
287
 *
288
 * intra_hmap is a cache, mapping dependence relations to their dual,
289
 *  for dependences from a node to itself
290
 * inter_hmap is a cache, mapping dependence relations to their dual,
291
 *  for dependences between distinct nodes
292
 * if compression is involved then the key for these maps
293
 * is the original, uncompressed dependence relation, while
294
 * the value is the dual of the compressed dependence relation.
295
 *
296
 * n is the number of nodes
297
 * node is the list of nodes
298
 * maxvar is the maximal number of variables over all nodes
299
 * max_row is the allocated number of rows in the schedule
300
 * n_row is the current (maximal) number of linearly independent
301
 *  rows in the node schedules
302
 * n_total_row is the current number of rows in the node schedules
303
 * band_start is the starting row in the node schedules of the current band
304
 * root is set if this graph is the original dependence graph,
305
 *  without any splitting
306
 *
307
 * sorted contains a list of node indices sorted according to the
308
 *  SCC to which a node belongs
309
 *
310
 * n_edge is the number of edges
311
 * edge is the list of edges
312
 * max_edge contains the maximal number of edges of each type;
313
 *  in particular, it contains the number of edges in the inital graph.
314
 * edge_table contains pointers into the edge array, hashed on the source
315
 *  and sink spaces; there is one such table for each type;
316
 *  a given edge may be referenced from more than one table
317
 *  if the corresponding relation appears in more than one of the
318
 *  sets of dependences; however, for each type there is only
319
 *  a single edge between a given pair of source and sink space
320
 *  in the entire graph
321
 *
322
 * node_table contains pointers into the node array, hashed on the space tuples
323
 *
324
 * region contains a list of variable sequences that should be non-trivial
325
 *
326
 * lp contains the (I)LP problem used to obtain new schedule rows
327
 *
328
 * src_scc and dst_scc are the source and sink SCCs of an edge with
329
 *  conflicting constraints
330
 *
331
 * scc represents the number of components
332
 * weak is set if the components are weakly connected
333
 *
334
 * max_weight is used during clustering and represents the maximal
335
 * weight of the relevant proximity edges.
336
 */
337
struct isl_sched_graph {
338
  isl_map_to_basic_set *intra_hmap;
339
  isl_map_to_basic_set *inter_hmap;
340
341
  struct isl_sched_node *node;
342
  int n;
343
  int maxvar;
344
  int max_row;
345
  int n_row;
346
347
  int *sorted;
348
349
  int n_total_row;
350
  int band_start;
351
352
  int root;
353
354
  struct isl_sched_edge *edge;
355
  int n_edge;
356
  int max_edge[isl_edge_last + 1];
357
  struct isl_hash_table *edge_table[isl_edge_last + 1];
358
359
  struct isl_hash_table *node_table;
360
  struct isl_region *region;
361
362
  isl_basic_set *lp;
363
364
  int src_scc;
365
  int dst_scc;
366
367
  int scc;
368
  int weak;
369
370
  int max_weight;
371
};
372
373
/* Initialize node_table based on the list of nodes.
374
 */
375
static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
376
255
{
377
255
  int i;
378
255
379
255
  graph->node_table = isl_hash_table_alloc(ctx, graph->n);
380
255
  if (!graph->node_table)
381
0
    return -1;
382
255
383
639
  
for (i = 0; 255
i < graph->n639
;
++i384
)
{384
384
384
    struct isl_hash_table_entry *entry;
385
384
    uint32_t hash;
386
384
387
384
    hash = isl_space_get_tuple_hash(graph->node[i].space);
388
384
    entry = isl_hash_table_find(ctx, graph->node_table, hash,
389
384
              &node_has_tuples,
390
384
              graph->node[i].space, 1);
391
384
    if (!entry)
392
0
      return -1;
393
384
    entry->data = &graph->node[i];
394
384
  }
395
255
396
255
  return 0;
397
255
}
398
399
/* Return a pointer to the node that lives within the given space,
400
 * or NULL if there is no such node.
401
 */
402
static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
403
  struct isl_sched_graph *graph, __isl_keep isl_space *space)
404
1.56k
{
405
1.56k
  struct isl_hash_table_entry *entry;
406
1.56k
  uint32_t hash;
407
1.56k
408
1.56k
  hash = isl_space_get_tuple_hash(space);
409
1.56k
  entry = isl_hash_table_find(ctx, graph->node_table, hash,
410
1.56k
            &node_has_tuples, space, 0);
411
1.56k
412
1.54k
  return entry ? entry->data : NULL;
413
1.56k
}
414
415
static int edge_has_src_and_dst(const void *entry, const void *val)
416
1.09k
{
417
1.09k
  const struct isl_sched_edge *edge = entry;
418
1.09k
  const struct isl_sched_edge *temp = val;
419
1.09k
420
1.09k
  return edge->src == temp->src && edge->dst == temp->dst;
421
1.09k
}
422
423
/* Add the given edge to graph->edge_table[type].
424
 */
425
static isl_stat graph_edge_table_add(isl_ctx *ctx,
426
  struct isl_sched_graph *graph, enum isl_edge_type type,
427
  struct isl_sched_edge *edge)
428
774
{
429
774
  struct isl_hash_table_entry *entry;
430
774
  uint32_t hash;
431
774
432
774
  hash = isl_hash_init();
433
774
  hash = isl_hash_builtin(hash, edge->src);
434
774
  hash = isl_hash_builtin(hash, edge->dst);
435
774
  entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
436
774
            &edge_has_src_and_dst, edge, 1);
437
774
  if (!entry)
438
0
    return isl_stat_error;
439
774
  entry->data = edge;
440
774
441
774
  return isl_stat_ok;
442
774
}
443
444
/* Allocate the edge_tables based on the maximal number of edges of
445
 * each type.
446
 */
447
static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
448
255
{
449
255
  int i;
450
255
451
1.53k
  for (i = 0; 
i <= isl_edge_last1.53k
;
++i1.27k
)
{1.27k
452
1.27k
    graph->edge_table[i] = isl_hash_table_alloc(ctx,
453
1.27k
                  graph->max_edge[i]);
454
1.27k
    if (!graph->edge_table[i])
455
0
      return -1;
456
1.27k
  }
457
255
458
255
  return 0;
459
255
}
460
461
/* If graph->edge_table[type] contains an edge from the given source
462
 * to the given destination, then return the hash table entry of this edge.
463
 * Otherwise, return NULL.
464
 */
465
static struct isl_hash_table_entry *graph_find_edge_entry(
466
  struct isl_sched_graph *graph,
467
  enum isl_edge_type type,
468
  struct isl_sched_node *src, struct isl_sched_node *dst)
469
4.00k
{
470
4.00k
  isl_ctx *ctx = isl_space_get_ctx(src->space);
471
4.00k
  uint32_t hash;
472
4.00k
  struct isl_sched_edge temp = { .src = src, .dst = dst };
473
4.00k
474
4.00k
  hash = isl_hash_init();
475
4.00k
  hash = isl_hash_builtin(hash, temp.src);
476
4.00k
  hash = isl_hash_builtin(hash, temp.dst);
477
4.00k
  return isl_hash_table_find(ctx, graph->edge_table[type], hash,
478
4.00k
            &edge_has_src_and_dst, &temp, 0);
479
4.00k
}
480
481
482
/* If graph->edge_table[type] contains an edge from the given source
483
 * to the given destination, then return this edge.
484
 * Otherwise, return NULL.
485
 */
486
static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
487
  enum isl_edge_type type,
488
  struct isl_sched_node *src, struct isl_sched_node *dst)
489
3.46k
{
490
3.46k
  struct isl_hash_table_entry *entry;
491
3.46k
492
3.46k
  entry = graph_find_edge_entry(graph, type, src, dst);
493
3.46k
  if (!entry)
494
2.60k
    return NULL;
495
3.46k
496
858
  return entry->data;
497
3.46k
}
498
499
/* Check whether the dependence graph has an edge of the given type
500
 * between the given two nodes.
501
 */
502
static isl_bool graph_has_edge(struct isl_sched_graph *graph,
503
  enum isl_edge_type type,
504
  struct isl_sched_node *src, struct isl_sched_node *dst)
505
1.54k
{
506
1.54k
  struct isl_sched_edge *edge;
507
1.54k
  isl_bool empty;
508
1.54k
509
1.54k
  edge = graph_find_edge(graph, type, src, dst);
510
1.54k
  if (!edge)
511
1.25k
    return 0;
512
1.54k
513
290
  empty = isl_map_plain_is_empty(edge->map);
514
290
  if (empty < 0)
515
0
    return isl_bool_error;
516
290
517
290
  return !empty;
518
290
}
519
520
/* Look for any edge with the same src, dst and map fields as "model".
521
 *
522
 * Return the matching edge if one can be found.
523
 * Return "model" if no matching edge is found.
524
 * Return NULL on error.
525
 */
526
static struct isl_sched_edge *graph_find_matching_edge(
527
  struct isl_sched_graph *graph, struct isl_sched_edge *model)
528
593
{
529
593
  enum isl_edge_type i;
530
593
  struct isl_sched_edge *edge;
531
593
532
1.80k
  for (i = isl_edge_first; 
i <= isl_edge_last1.80k
;
++i1.21k
)
{1.56k
533
1.56k
    int is_equal;
534
1.56k
535
1.56k
    edge = graph_find_edge(graph, i, model->src, model->dst);
536
1.56k
    if (!edge)
537
1.19k
      continue;
538
369
    is_equal = isl_map_plain_is_equal(model->map, edge->map);
539
369
    if (is_equal < 0)
540
0
      return NULL;
541
369
    
if (369
is_equal369
)
542
351
      return edge;
543
369
  }
544
593
545
242
  return model;
546
593
}
547
548
/* Remove the given edge from all the edge_tables that refer to it.
549
 */
550
static void graph_remove_edge(struct isl_sched_graph *graph,
551
  struct isl_sched_edge *edge)
552
108
{
553
108
  isl_ctx *ctx = isl_map_get_ctx(edge->map);
554
108
  enum isl_edge_type i;
555
108
556
648
  for (i = isl_edge_first; 
i <= isl_edge_last648
;
++i540
)
{540
557
540
    struct isl_hash_table_entry *entry;
558
540
559
540
    entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
560
540
    if (!entry)
561
300
      continue;
562
240
    
if (240
entry->data != edge240
)
563
21
      continue;
564
219
    isl_hash_table_remove(ctx, graph->edge_table[i], entry);
565
219
  }
566
108
}
567
568
/* Check whether the dependence graph has any edge
569
 * between the given two nodes.
570
 */
571
static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
572
  struct isl_sched_node *src, struct isl_sched_node *dst)
573
306
{
574
306
  enum isl_edge_type i;
575
306
  isl_bool r;
576
306
577
1.11k
  for (i = isl_edge_first; 
i <= isl_edge_last1.11k
;
++i812
)
{980
578
980
    r = graph_has_edge(graph, i, src, dst);
579
980
    if (
r < 0 || 980
r980
)
580
168
      return r;
581
980
  }
582
306
583
138
  return r;
584
306
}
585
586
/* Check whether the dependence graph has a validity edge
587
 * between the given two nodes.
588
 *
589
 * Conditional validity edges are essentially validity edges that
590
 * can be ignored if the corresponding condition edges are iteration private.
591
 * Here, we are only checking for the presence of validity
592
 * edges, so we need to consider the conditional validity edges too.
593
 * In particular, this function is used during the detection
594
 * of strongly connected components and we cannot ignore
595
 * conditional validity edges during this detection.
596
 */
597
static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
598
  struct isl_sched_node *src, struct isl_sched_node *dst)
599
335
{
600
335
  isl_bool r;
601
335
602
335
  r = graph_has_edge(graph, isl_edge_validity, src, dst);
603
335
  if (
r < 0 || 335
r335
)
604
102
    return r;
605
335
606
233
  return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
607
335
}
608
609
static int graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
610
  int n_node, int n_edge)
611
255
{
612
255
  int i;
613
255
614
255
  graph->n = n_node;
615
255
  graph->n_edge = n_edge;
616
255
  graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
617
255
  graph->sorted = isl_calloc_array(ctx, int, graph->n);
618
255
  graph->region = isl_alloc_array(ctx, struct isl_region, graph->n);
619
255
  graph->edge = isl_calloc_array(ctx,
620
255
          struct isl_sched_edge, graph->n_edge);
621
255
622
255
  graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
623
255
  graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
624
255
625
255
  if (
!graph->node || 255
!graph->region255
||
(graph->n_edge && 255
!graph->edge161
) ||
626
255
      !graph->sorted)
627
0
    return -1;
628
255
629
641
  
for(i = 0; 255
i < graph->n641
;
++i386
)
630
386
    graph->sorted[i] = i;
631
255
632
255
  return 0;
633
255
}
634
635
static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
636
273
{
637
273
  int i;
638
273
639
273
  isl_map_to_basic_set_free(graph->intra_hmap);
640
273
  isl_map_to_basic_set_free(graph->inter_hmap);
641
273
642
273
  if (graph->node)
643
639
    
for (i = 0; 255
i < graph->n639
;
++i384
)
{384
644
384
      isl_space_free(graph->node[i].space);
645
384
      isl_set_free(graph->node[i].hull);
646
384
      isl_multi_aff_free(graph->node[i].compress);
647
384
      isl_multi_aff_free(graph->node[i].decompress);
648
384
      isl_mat_free(graph->node[i].sched);
649
384
      isl_map_free(graph->node[i].sched_map);
650
384
      isl_mat_free(graph->node[i].cmap);
651
384
      isl_mat_free(graph->node[i].cinv);
652
384
      isl_mat_free(graph->node[i].ctrans);
653
384
      if (graph->root)
654
238
        free(graph->node[i].coincident);
655
384
      isl_multi_val_free(graph->node[i].sizes);
656
384
      isl_vec_free(graph->node[i].max);
657
384
    }
658
273
  free(graph->node);
659
273
  free(graph->sorted);
660
273
  if (graph->edge)
661
568
    
for (i = 0; 255
i < graph->n_edge568
;
++i313
)
{313
662
313
      isl_map_free(graph->edge[i].map);
663
313
      isl_union_map_free(graph->edge[i].tagged_condition);
664
313
      isl_union_map_free(graph->edge[i].tagged_validity);
665
313
    }
666
273
  free(graph->edge);
667
273
  free(graph->region);
668
1.63k
  for (i = 0; 
i <= isl_edge_last1.63k
;
++i1.36k
)
669
1.36k
    isl_hash_table_free(ctx, graph->edge_table[i]);
670
273
  isl_hash_table_free(ctx, graph->node_table);
671
273
  isl_basic_set_free(graph->lp);
672
273
}
673
674
/* For each "set" on which this function is called, increment
675
 * graph->n by one and update graph->maxvar.
676
 */
677
static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
678
240
{
679
240
  struct isl_sched_graph *graph = user;
680
240
  int nvar = isl_set_dim(set, isl_dim_set);
681
240
682
240
  graph->n++;
683
240
  if (nvar > graph->maxvar)
684
150
    graph->maxvar = nvar;
685
240
686
240
  isl_set_free(set);
687
240
688
240
  return isl_stat_ok;
689
240
}
690
691
/* Compute the number of rows that should be allocated for the schedule.
692
 * In particular, we need one row for each variable or one row
693
 * for each basic map in the dependences.
694
 * Note that it is practically impossible to exhaust both
695
 * the number of dependences and the number of variables.
696
 */
697
static isl_stat compute_max_row(struct isl_sched_graph *graph,
698
  __isl_keep isl_schedule_constraints *sc)
699
133
{
700
133
  int n_edge;
701
133
  isl_stat r;
702
133
  isl_union_set *domain;
703
133
704
133
  graph->n = 0;
705
133
  graph->maxvar = 0;
706
133
  domain = isl_schedule_constraints_get_domain(sc);
707
133
  r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
708
133
  isl_union_set_free(domain);
709
133
  if (r < 0)
710
0
    return isl_stat_error;
711
133
  n_edge = isl_schedule_constraints_n_basic_map(sc);
712
133
  if (n_edge < 0)
713
0
    return isl_stat_error;
714
133
  graph->max_row = n_edge + graph->maxvar;
715
133
716
133
  return isl_stat_ok;
717
133
}
718
719
/* Does "bset" have any defining equalities for its set variables?
720
 */
721
static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
722
238
{
723
238
  int i, n;
724
238
725
238
  if (!bset)
726
0
    return isl_bool_error;
727
238
728
238
  n = isl_basic_set_dim(bset, isl_dim_set);
729
589
  for (i = 0; 
i < n589
;
++i351
)
{359
730
359
    isl_bool has;
731
359
732
359
    has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
733
359
              NULL);
734
359
    if (
has < 0 || 359
has359
)
735
8
      return has;
736
359
  }
737
238
738
230
  return isl_bool_false;
739
238
}
740
741
/* Set the entries of node->max to the value of the schedule_max_coefficient
742
 * option, if set.
743
 */
744
static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
745
4
{
746
4
  int max;
747
4
748
4
  max = isl_options_get_schedule_max_coefficient(ctx);
749
4
  if (max == -1)
750
4
    return isl_stat_ok;
751
4
752
0
  node->max = isl_vec_alloc(ctx, node->nvar);
753
0
  node->max = isl_vec_set_si(node->max, max);
754
0
  if (!node->max)
755
0
    return isl_stat_error;
756
0
757
0
  return isl_stat_ok;
758
0
}
759
760
/* Set the entries of node->max to the minimum of the schedule_max_coefficient
761
 * option (if set) and half of the minimum of the sizes in the other
762
 * dimensions.  If the minimum of the sizes is one, half of the size
763
 * is zero and this value is reset to one.
764
 * If the global minimum is unbounded (i.e., if both
765
 * the schedule_max_coefficient is not set and the sizes in the other
766
 * dimensions are unbounded), then store a negative value.
767
 * If the schedule coefficient is close to the size of the instance set
768
 * in another dimension, then the schedule may represent a loop
769
 * coalescing transformation (especially if the coefficient
770
 * in that other dimension is one).  Forcing the coefficient to be
771
 * smaller than or equal to half the minimal size should avoid this
772
 * situation.
773
 */
774
static isl_stat compute_max_coefficient(isl_ctx *ctx,
775
  struct isl_sched_node *node)
776
234
{
777
234
  int max;
778
234
  int i, j;
779
234
  isl_vec *v;
780
234
781
234
  max = isl_options_get_schedule_max_coefficient(ctx);
782
234
  v = isl_vec_alloc(ctx, node->nvar);
783
234
  if (!v)
784
0
    return isl_stat_error;
785
234
786
575
  
for (i = 0; 234
i < node->nvar575
;
++i341
)
{341
787
341
    isl_int_set_si(v->el[i], max);
788
341
    isl_int_mul_si(v->el[i], v->el[i], 2);
789
341
  }
790
234
791
575
  for (i = 0; 
i < node->nvar575
;
++i341
)
{341
792
341
    isl_val *size;
793
341
794
341
    size = isl_multi_val_get_val(node->sizes, i);
795
341
    if (!size)
796
0
      goto error;
797
341
    
if (341
!isl_val_is_int(size)341
)
{193
798
193
      isl_val_free(size);
799
193
      continue;
800
193
    }
801
464
    
for (j = 0; 148
j < node->nvar464
;
++j316
)
{316
802
316
      if (j == i)
803
148
        continue;
804
168
      
if (168
isl_int_is_neg168
(v->el[j]) ||168
805
147
          isl_int_gt(v->el[j], size->n))
806
23
        isl_int_set(v->el[j], size->n);
807
168
    }
808
148
    isl_val_free(size);
809
148
  }
810
234
811
575
  
for (i = 0; 234
i < node->nvar575
;
++i341
)
{341
812
341
    isl_int_fdiv_q_ui(v->el[i], v->el[i], 2);
813
341
    if (isl_int_is_zero(v->el[i]))
814
10
      isl_int_set_si(v->el[i], 1);
815
341
  }
816
234
817
234
  node->max = v;
818
234
  return isl_stat_ok;
819
0
error:
820
0
  isl_vec_free(v);
821
0
  return isl_stat_error;
822
234
}
823
824
/* Compute and return the size of "set" in dimension "dim".
825
 * The size is taken to be the difference in values for that variable
826
 * for fixed values of the other variables.
827
 * In particular, the variable is first isolated from the other variables
828
 * in the range of a map
829
 *
830
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
831
 *
832
 * and then duplicated
833
 *
834
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
835
 *
836
 * The shared variables are then projected out and the maximal value
837
 * of i_dim' - i_dim is computed.
838
 */
839
static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
840
341
{
841
341
  isl_map *map;
842
341
  isl_local_space *ls;
843
341
  isl_aff *obj;
844
341
  isl_val *v;
845
341
846
341
  map = isl_set_project_onto_map(set, isl_dim_set, dim, 1);
847
341
  map = isl_map_project_out(map, isl_dim_in, dim, 1);
848
341
  map = isl_map_range_product(map, isl_map_copy(map));
849
341
  map = isl_set_unwrap(isl_map_range(map));
850
341
  set = isl_map_deltas(map);
851
341
  ls = isl_local_space_from_space(isl_set_get_space(set));
852
341
  obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
853
341
  v = isl_set_max_val(set, obj);
854
341
  isl_aff_free(obj);
855
341
  isl_set_free(set);
856
341
857
341
  return v;
858
341
}
859
860
/* Compute the size of the instance set "set" of "node", after compression,
861
 * as well as bounds on the corresponding coefficients, if needed.
862
 *
863
 * The sizes are needed when the schedule_treat_coalescing option is set.
864
 * The bounds are needed when the schedule_treat_coalescing option or
865
 * the schedule_max_coefficient option is set.
866
 *
867
 * If the schedule_treat_coalescing option is not set, then at most
868
 * the bounds need to be set and this is done in set_max_coefficient.
869
 * Otherwise, compress the domain if needed, compute the size
870
 * in each direction and store the results in node->size.
871
 * Finally, set the bounds on the coefficients based on the sizes
872
 * and the schedule_max_coefficient option in compute_max_coefficient.
873
 */
874
static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
875
  __isl_take isl_set *set)
876
238
{
877
238
  int j, n;
878
238
  isl_multi_val *mv;
879
238
880
238
  if (
!isl_options_get_schedule_treat_coalescing(ctx)238
)
{4
881
4
    isl_set_free(set);
882
4
    return set_max_coefficient(ctx, node);
883
4
  }
884
238
885
234
  
if (234
node->compressed234
)
886
8
    set = isl_set_preimage_multi_aff(set,
887
8
          isl_multi_aff_copy(node->decompress));
888
234
  mv = isl_multi_val_zero(isl_set_get_space(set));
889
234
  n = isl_set_dim(set, isl_dim_set);
890
575
  for (j = 0; 
j < n575
;
++j341
)
{341
891
341
    isl_val *v;
892
341
893
341
    v = compute_size(isl_set_copy(set), j);
894
341
    mv = isl_multi_val_set_val(mv, j, v);
895
341
  }
896
234
  node->sizes = mv;
897
234
  isl_set_free(set);
898
234
  if (!node->sizes)
899
0
    return isl_stat_error;
900
234
  return compute_max_coefficient(ctx, node);
901
234
}
902
903
/* Add a new node to the graph representing the given instance set.
904
 * "nvar" is the (possibly compressed) number of variables and
905
 * may be smaller than then number of set variables in "set"
906
 * if "compressed" is set.
907
 * If "compressed" is set, then "hull" represents the constraints
908
 * that were used to derive the compression, while "compress" and
909
 * "decompress" map the original space to the compressed space and
910
 * vice versa.
911
 * If "compressed" is not set, then "hull", "compress" and "decompress"
912
 * should be NULL.
913
 *
914
 * Compute the size of the instance set and bounds on the coefficients,
915
 * if needed.
916
 */
917
static isl_stat add_node(struct isl_sched_graph *graph,
918
  __isl_take isl_set *set, int nvar, int compressed,
919
  __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
920
  __isl_take isl_multi_aff *decompress)
921
238
{
922
238
  int nparam;
923
238
  isl_ctx *ctx;
924
238
  isl_mat *sched;
925
238
  isl_space *space;
926
238
  int *coincident;
927
238
  struct isl_sched_node *node;
928
238
929
238
  if (!set)
930
0
    return isl_stat_error;
931
238
932
238
  ctx = isl_set_get_ctx(set);
933
238
  nparam = isl_set_dim(set, isl_dim_param);
934
238
  if (!ctx->opt->schedule_parametric)
935
24
    nparam = 0;
936
238
  sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
937
238
  node = &graph->node[graph->n];
938
238
  graph->n++;
939
238
  space = isl_set_get_space(set);
940
238
  node->space = space;
941
238
  node->nvar = nvar;
942
238
  node->nparam = nparam;
943
238
  node->sched = sched;
944
238
  node->sched_map = NULL;
945
238
  coincident = isl_calloc_array(ctx, int, graph->max_row);
946
238
  node->coincident = coincident;
947
238
  node->compressed = compressed;
948
238
  node->hull = hull;
949
238
  node->compress = compress;
950
238
  node->decompress = decompress;
951
238
  if (compute_sizes_and_max(ctx, node, set) < 0)
952
0
    return isl_stat_error;
953
238
954
238
  
if (238
!space || 238
!sched238
||
(graph->max_row && 238
!coincident236
))
955
0
    return isl_stat_error;
956
238
  
if (238
compressed && 238
(!hull || 8
!compress8
||
!decompress8
))
957
0
    return isl_stat_error;
958
238
959
238
  return isl_stat_ok;
960
238
}
961
962
/* Construct an identifier for node "node", which will represent "set".
963
 * The name of the identifier is either "compressed" or
964
 * "compressed_<name>", with <name> the name of the space of "set".
965
 * The user pointer of the identifier points to "node".
966
 */
967
static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
968
  struct isl_sched_node *node)
969
8
{
970
8
  isl_bool has_name;
971
8
  isl_ctx *ctx;
972
8
  isl_id *id;
973
8
  isl_printer *p;
974
8
  const char *name;
975
8
  char *id_name;
976
8
977
8
  has_name = isl_set_has_tuple_name(set);
978
8
  if (has_name < 0)
979
0
    return NULL;
980
8
981
8
  ctx = isl_set_get_ctx(set);
982
8
  if (!has_name)
983
0
    return isl_id_alloc(ctx, "compressed", node);
984
8
985
8
  p = isl_printer_to_str(ctx);
986
8
  name = isl_set_get_tuple_name(set);
987
8
  p = isl_printer_print_str(p, "compressed_");
988
8
  p = isl_printer_print_str(p, name);
989
8
  id_name = isl_printer_get_str(p);
990
8
  isl_printer_free(p);
991
8
992
8
  id = isl_id_alloc(ctx, id_name, node);
993
8
  free(id_name);
994
8
995
8
  return id;
996
8
}
997
998
/* Add a new node to the graph representing the given set.
999
 *
1000
 * If any of the set variables is defined by an equality, then
1001
 * we perform variable compression such that we can perform
1002
 * the scheduling on the compressed domain.
1003
 * In this case, an identifier is used that references the new node
1004
 * such that each compressed space is unique and
1005
 * such that the node can be recovered from the compressed space.
1006
 */
1007
static isl_stat extract_node(__isl_take isl_set *set, void *user)
1008
238
{
1009
238
  int nvar;
1010
238
  isl_bool has_equality;
1011
238
  isl_id *id;
1012
238
  isl_basic_set *hull;
1013
238
  isl_set *hull_set;
1014
238
  isl_morph *morph;
1015
238
  isl_multi_aff *compress, *decompress;
1016
238
  struct isl_sched_graph *graph = user;
1017
238
1018
238
  hull = isl_set_affine_hull(isl_set_copy(set));
1019
238
  hull = isl_basic_set_remove_divs(hull);
1020
238
  nvar = isl_set_dim(set, isl_dim_set);
1021
238
  has_equality = has_any_defining_equality(hull);
1022
238
1023
238
  if (has_equality < 0)
1024
0
    goto error;
1025
238
  
if (238
!has_equality238
)
{230
1026
230
    isl_basic_set_free(hull);
1027
230
    return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
1028
230
  }
1029
238
1030
8
  id = construct_compressed_id(set, &graph->node[graph->n]);
1031
8
  morph = isl_basic_set_variable_compression_with_id(hull,
1032
8
                  isl_dim_set, id);
1033
8
  isl_id_free(id);
1034
8
  nvar = isl_morph_ran_dim(morph, isl_dim_set);
1035
8
  compress = isl_morph_get_var_multi_aff(morph);
1036
8
  morph = isl_morph_inverse(morph);
1037
8
  decompress = isl_morph_get_var_multi_aff(morph);
1038
8
  isl_morph_free(morph);
1039
8
1040
8
  hull_set = isl_set_from_basic_set(hull);
1041
8
  return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
1042
0
error:
1043
0
  isl_basic_set_free(hull);
1044
0
  isl_set_free(set);
1045
0
  return isl_stat_error;
1046
238
}
1047
1048
struct isl_extract_edge_data {
1049
  enum isl_edge_type type;
1050
  struct isl_sched_graph *graph;
1051
};
1052
1053
/* Merge edge2 into edge1, freeing the contents of edge2.
1054
 * Return 0 on success and -1 on failure.
1055
 *
1056
 * edge1 and edge2 are assumed to have the same value for the map field.
1057
 */
1058
static int merge_edge(struct isl_sched_edge *edge1,
1059
  struct isl_sched_edge *edge2)
1060
351
{
1061
351
  edge1->types |= edge2->types;
1062
351
  isl_map_free(edge2->map);
1063
351
1064
351
  if (
is_condition(edge2)351
)
{20
1065
20
    if (!edge1->tagged_condition)
1066
20
      edge1->tagged_condition = edge2->tagged_condition;
1067
20
    else
1068
0
      edge1->tagged_condition =
1069
0
        isl_union_map_union(edge1->tagged_condition,
1070
0
                edge2->tagged_condition);
1071
20
  }
1072
351
1073
351
  if (
is_conditional_validity(edge2)351
)
{3
1074
3
    if (!edge1->tagged_validity)
1075
3
      edge1->tagged_validity = edge2->tagged_validity;
1076
3
    else
1077
0
      edge1->tagged_validity =
1078
0
        isl_union_map_union(edge1->tagged_validity,
1079
0
                edge2->tagged_validity);
1080
3
  }
1081
351
1082
351
  if (
is_condition(edge2) && 351
!edge1->tagged_condition20
)
1083
0
    return -1;
1084
351
  
if (351
is_conditional_validity(edge2) && 351
!edge1->tagged_validity3
)
1085
0
    return -1;
1086
351
1087
351
  return 0;
1088
351
}
1089
1090
/* Insert dummy tags in domain and range of "map".
1091
 *
1092
 * In particular, if "map" is of the form
1093
 *
1094
 *  A -> B
1095
 *
1096
 * then return
1097
 *
1098
 *  [A -> dummy_tag] -> [B -> dummy_tag]
1099
 *
1100
 * where the dummy_tags are identical and equal to any dummy tags
1101
 * introduced by any other call to this function.
1102
 */
1103
static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1104
12
{
1105
12
  static char dummy;
1106
12
  isl_ctx *ctx;
1107
12
  isl_id *id;
1108
12
  isl_space *space;
1109
12
  isl_set *domain, *range;
1110
12
1111
12
  ctx = isl_map_get_ctx(map);
1112
12
1113
12
  id = isl_id_alloc(ctx, NULL, &dummy);
1114
12
  space = isl_space_params(isl_map_get_space(map));
1115
12
  space = isl_space_set_from_params(space);
1116
12
  space = isl_space_set_tuple_id(space, isl_dim_set, id);
1117
12
  space = isl_space_map_from_set(space);
1118
12
1119
12
  domain = isl_map_wrap(map);
1120
12
  range = isl_map_wrap(isl_map_universe(space));
1121
12
  map = isl_map_from_domain_and_range(domain, range);
1122
12
  map = isl_map_zip(map);
1123
12
1124
12
  return map;
1125
12
}
1126
1127
/* Given that at least one of "src" or "dst" is compressed, return
1128
 * a map between the spaces of these nodes restricted to the affine
1129
 * hull that was used in the compression.
1130
 */
1131
static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1132
  struct isl_sched_node *dst)
1133
22
{
1134
22
  isl_set *dom, *ran;
1135
22
1136
22
  if (src->compressed)
1137
14
    dom = isl_set_copy(src->hull);
1138
22
  else
1139
8
    dom = isl_set_universe(isl_space_copy(src->space));
1140
22
  if (dst->compressed)
1141
14
    ran = isl_set_copy(dst->hull);
1142
22
  else
1143
8
    ran = isl_set_universe(isl_space_copy(dst->space));
1144
22
1145
22
  return isl_map_from_domain_and_range(dom, ran);
1146
22
}
1147
1148
/* Intersect the domains of the nested relations in domain and range
1149
 * of "tagged" with "map".
1150
 */
1151
static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1152
  __isl_keep isl_map *map)
1153
14
{
1154
14
  isl_set *set;
1155
14
1156
14
  tagged = isl_map_zip(tagged);
1157
14
  set = isl_map_wrap(isl_map_copy(map));
1158
14
  tagged = isl_map_intersect_domain(tagged, set);
1159
14
  tagged = isl_map_zip(tagged);
1160
14
  return tagged;
1161
14
}
1162
1163
/* Return a pointer to the node that lives in the domain space of "map"
1164
 * or NULL if there is no such node.
1165
 */
1166
static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1167
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1168
617
{
1169
617
  struct isl_sched_node *node;
1170
617
  isl_space *space;
1171
617
1172
617
  space = isl_space_domain(isl_map_get_space(map));
1173
617
  node = graph_find_node(ctx, graph, space);
1174
617
  isl_space_free(space);
1175
617
1176
617
  return node;
1177
617
}
1178
1179
/* Return a pointer to the node that lives in the range space of "map"
1180
 * or NULL if there is no such node.
1181
 */
1182
static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1183
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1184
617
{
1185
617
  struct isl_sched_node *node;
1186
617
  isl_space *space;
1187
617
1188
617
  space = isl_space_range(isl_map_get_space(map));
1189
617
  node = graph_find_node(ctx, graph, space);
1190
617
  isl_space_free(space);
1191
617
1192
617
  return node;
1193
617
}
1194
1195
/* Add a new edge to the graph based on the given map
1196
 * and add it to data->graph->edge_table[data->type].
1197
 * If a dependence relation of a given type happens to be identical
1198
 * to one of the dependence relations of a type that was added before,
1199
 * then we don't create a new edge, but instead mark the original edge
1200
 * as also representing a dependence of the current type.
1201
 *
1202
 * Edges of type isl_edge_condition or isl_edge_conditional_validity
1203
 * may be specified as "tagged" dependence relations.  That is, "map"
1204
 * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1205
 * the dependence on iterations and a and b are tags.
1206
 * edge->map is set to the relation containing the elements i -> j,
1207
 * while edge->tagged_condition and edge->tagged_validity contain
1208
 * the union of all the "map" relations
1209
 * for which extract_edge is called that result in the same edge->map.
1210
 *
1211
 * If the source or the destination node is compressed, then
1212
 * intersect both "map" and "tagged" with the constraints that
1213
 * were used to construct the compression.
1214
 * This ensures that there are no schedule constraints defined
1215
 * outside of these domains, while the scheduler no longer has
1216
 * any control over those outside parts.
1217
 */
1218
static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1219
617
{
1220
617
  isl_ctx *ctx = isl_map_get_ctx(map);
1221
617
  struct isl_extract_edge_data *data = user;
1222
617
  struct isl_sched_graph *graph = data->graph;
1223
617
  struct isl_sched_node *src, *dst;
1224
617
  struct isl_sched_edge *edge;
1225
617
  isl_map *tagged = NULL;
1226
617
1227
617
  if (data->type == isl_edge_condition ||
1228
591
      
data->type == isl_edge_conditional_validity591
)
{58
1229
58
    if (
isl_map_can_zip(map)58
)
{46
1230
46
      tagged = isl_map_copy(map);
1231
46
      map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1232
12
    } else {
1233
12
      tagged = insert_dummy_tags(isl_map_copy(map));
1234
12
    }
1235
58
  }
1236
617
1237
617
  src = find_domain_node(ctx, graph, map);
1238
617
  dst = find_range_node(ctx, graph, map);
1239
617
1240
617
  if (
!src || 617
!dst617
)
{24
1241
24
    isl_map_free(map);
1242
24
    isl_map_free(tagged);
1243
24
    return isl_stat_ok;
1244
24
  }
1245
617
1246
593
  
if (593
src->compressed || 593
dst->compressed579
)
{22
1247
22
    isl_map *hull;
1248
22
    hull = extract_hull(src, dst);
1249
22
    if (tagged)
1250
14
      tagged = map_intersect_domains(tagged, hull);
1251
22
    map = isl_map_intersect(map, hull);
1252
22
  }
1253
593
1254
593
  graph->edge[graph->n_edge].src = src;
1255
593
  graph->edge[graph->n_edge].dst = dst;
1256
593
  graph->edge[graph->n_edge].map = map;
1257
593
  graph->edge[graph->n_edge].types = 0;
1258
593
  graph->edge[graph->n_edge].tagged_condition = NULL;
1259
593
  graph->edge[graph->n_edge].tagged_validity = NULL;
1260
593
  set_type(&graph->edge[graph->n_edge], data->type);
1261
593
  if (data->type == isl_edge_condition)
1262
26
    graph->edge[graph->n_edge].tagged_condition =
1263
26
          isl_union_map_from_map(tagged);
1264
593
  if (data->type == isl_edge_conditional_validity)
1265
32
    graph->edge[graph->n_edge].tagged_validity =
1266
32
          isl_union_map_from_map(tagged);
1267
593
1268
593
  edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1269
593
  if (
!edge593
)
{0
1270
0
    graph->n_edge++;
1271
0
    return isl_stat_error;
1272
0
  }
1273
593
  
if (593
edge == &graph->edge[graph->n_edge]593
)
1274
242
    return graph_edge_table_add(ctx, graph, data->type,
1275
242
            &graph->edge[graph->n_edge++]);
1276
593
1277
351
  
if (351
merge_edge(edge, &graph->edge[graph->n_edge]) < 0351
)
1278
0
    return -1;
1279
351
1280
351
  return graph_edge_table_add(ctx, graph, data->type, edge);
1281
351
}
1282
1283
/* Initialize the schedule graph "graph" from the schedule constraints "sc".
1284
 *
1285
 * The context is included in the domain before the nodes of
1286
 * the graphs are extracted in order to be able to exploit
1287
 * any possible additional equalities.
1288
 * Note that this intersection is only performed locally here.
1289
 */
1290
static isl_stat graph_init(struct isl_sched_graph *graph,
1291
  __isl_keep isl_schedule_constraints *sc)
1292
133
{
1293
133
  isl_ctx *ctx;
1294
133
  isl_union_set *domain;
1295
133
  isl_union_map *c;
1296
133
  struct isl_extract_edge_data data;
1297
133
  enum isl_edge_type i;
1298
133
  isl_stat r;
1299
133
1300
133
  if (!sc)
1301
0
    return isl_stat_error;
1302
133
1303
133
  ctx = isl_schedule_constraints_get_ctx(sc);
1304
133
1305
133
  domain = isl_schedule_constraints_get_domain(sc);
1306
133
  graph->n = isl_union_set_n_set(domain);
1307
133
  isl_union_set_free(domain);
1308
133
1309
133
  if (graph_alloc(ctx, graph, graph->n,
1310
133
      isl_schedule_constraints_n_map(sc)) < 0)
1311
0
    return isl_stat_error;
1312
133
1313
133
  
if (133
compute_max_row(graph, sc) < 0133
)
1314
0
    return isl_stat_error;
1315
133
  graph->root = 1;
1316
133
  graph->n = 0;
1317
133
  domain = isl_schedule_constraints_get_domain(sc);
1318
133
  domain = isl_union_set_intersect_params(domain,
1319
133
            isl_schedule_constraints_get_context(sc));
1320
133
  r = isl_union_set_foreach_set(domain, &extract_node, graph);
1321
133
  isl_union_set_free(domain);
1322
133
  if (r < 0)
1323
0
    return isl_stat_error;
1324
133
  
if (133
graph_init_table(ctx, graph) < 0133
)
1325
0
    return isl_stat_error;
1326
798
  
for (i = isl_edge_first; 133
i <= isl_edge_last798
;
++i665
)
{665
1327
665
    c = isl_schedule_constraints_get(sc, i);
1328
665
    graph->max_edge[i] = isl_union_map_n_map(c);
1329
665
    isl_union_map_free(c);
1330
665
    if (!c)
1331
0
      return isl_stat_error;
1332
665
  }
1333
133
  
if (133
graph_init_edge_tables(ctx, graph) < 0133
)
1334
0
    return isl_stat_error;
1335
133
  graph->n_edge = 0;
1336
133
  data.graph = graph;
1337
798
  for (i = isl_edge_first; 
i <= isl_edge_last798
;
++i665
)
{665
1338
665
    isl_stat r;
1339
665
1340
665
    data.type = i;
1341
665
    c = isl_schedule_constraints_get(sc, i);
1342
665
    r = isl_union_map_foreach_map(c, &extract_edge, &data);
1343
665
    isl_union_map_free(c);
1344
665
    if (r < 0)
1345
0
      return isl_stat_error;
1346
665
  }
1347
133
1348
133
  return isl_stat_ok;
1349
133
}
1350
1351
/* Check whether there is any dependence from node[j] to node[i]
1352
 * or from node[i] to node[j].
1353
 */
1354
static isl_bool node_follows_weak(int i, int j, void *user)
1355
206
{
1356
206
  isl_bool f;
1357
206
  struct isl_sched_graph *graph = user;
1358
206
1359
206
  f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
1360
206
  if (
f < 0 || 206
f206
)
1361
106
    return f;
1362
100
  return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
1363
206
}
1364
1365
/* Check whether there is a (conditional) validity dependence from node[j]
1366
 * to node[i], forcing node[i] to follow node[j].
1367
 */
1368
static isl_bool node_follows_strong(int i, int j, void *user)
1369
276
{
1370
276
  struct isl_sched_graph *graph = user;
1371
276
1372
276
  return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
1373
276
}
1374
1375
/* Use Tarjan's algorithm for computing the strongly connected components
1376
 * in the dependence graph only considering those edges defined by "follows".
1377
 */
1378
static int detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
1379
  isl_bool (*follows)(int i, int j, void *user))
1380
402
{
1381
402
  int i, n;
1382
402
  struct isl_tarjan_graph *g = NULL;
1383
402
1384
402
  g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
1385
402
  if (!g)
1386
0
    return -1;
1387
402
1388
402
  graph->scc = 0;
1389
402
  i = 0;
1390
402
  n = graph->n;
1391
937
  while (
n937
)
{535
1392
1.20k
    while (
g->order[i] != -11.20k
)
{672
1393
672
      graph->node[g->order[i]].scc = graph->scc;
1394
672
      --n;
1395
672
      ++i;
1396
672
    }
1397
535
    ++i;
1398
535
    graph->scc++;
1399
535
  }
1400
402
1401
402
  isl_tarjan_graph_free(g);
1402
402
1403
402
  return 0;
1404
402
}
1405
1406
/* Apply Tarjan's algorithm to detect the strongly connected components
1407
 * in the dependence graph.
1408
 * Only consider the (conditional) validity dependences and clear "weak".
1409
 */
1410
static int detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1411
258
{
1412
258
  graph->weak = 0;
1413
258
  return detect_ccs(ctx, graph, &node_follows_strong);
1414
258
}
1415
1416
/* Apply Tarjan's algorithm to detect the (weakly) connected components
1417
 * in the dependence graph.
1418
 * Consider all dependences and set "weak".
1419
 */
1420
static int detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1421
128
{
1422
128
  graph->weak = 1;
1423
128
  return detect_ccs(ctx, graph, &node_follows_weak);
1424
128
}
1425
1426
static int cmp_scc(const void *a, const void *b, void *data)
1427
87
{
1428
87
  struct isl_sched_graph *graph = data;
1429
87
  const int *i1 = a;
1430
87
  const int *i2 = b;
1431
87
1432
87
  return graph->node[*i1].scc - graph->node[*i2].scc;
1433
87
}
1434
1435
/* Sort the elements of graph->sorted according to the corresponding SCCs.
1436
 */
1437
static int sort_sccs(struct isl_sched_graph *graph)
1438
219
{
1439
219
  return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
1440
219
}
1441
1442
/* Given a dependence relation R from "node" to itself,
1443
 * construct the set of coefficients of valid constraints for elements
1444
 * in that dependence relation.
1445
 * In particular, the result contains tuples of coefficients
1446
 * c_0, c_n, c_x such that
1447
 *
1448
 *  c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
1449
 *
1450
 * or, equivalently,
1451
 *
1452
 *  c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
1453
 *
1454
 * We choose here to compute the dual of delta R.
1455
 * Alternatively, we could have computed the dual of R, resulting
1456
 * in a set of tuples c_0, c_n, c_x, c_y, and then
1457
 * plugged in (c_0, c_n, c_x, -c_x).
1458
 *
1459
 * If "node" has been compressed, then the dependence relation
1460
 * is also compressed before the set of coefficients is computed.
1461
 */
1462
static __isl_give isl_basic_set *intra_coefficients(
1463
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1464
  __isl_take isl_map *map)
1465
807
{
1466
807
  isl_set *delta;
1467
807
  isl_map *key;
1468
807
  isl_basic_set *coef;
1469
807
  isl_maybe_isl_basic_set m;
1470
807
1471
807
  m = isl_map_to_basic_set_try_get(graph->intra_hmap, map);
1472
807
  if (
m.valid < 0 || 807
m.valid807
)
{702
1473
702
    isl_map_free(map);
1474
702
    return m.value;
1475
702
  }
1476
807
1477
105
  key = isl_map_copy(map);
1478
105
  if (
node->compressed105
)
{0
1479
0
    map = isl_map_preimage_domain_multi_aff(map,
1480
0
            isl_multi_aff_copy(node->decompress));
1481
0
    map = isl_map_preimage_range_multi_aff(map,
1482
0
            isl_multi_aff_copy(node->decompress));
1483
0
  }
1484
105
  delta = isl_set_remove_divs(isl_map_deltas(map));
1485
105
  coef = isl_set_coefficients(delta);
1486
105
  graph->intra_hmap = isl_map_to_basic_set_set(graph->intra_hmap, key,
1487
105
          isl_basic_set_copy(coef));
1488
105
1489
105
  return coef;
1490
807
}
1491
1492
/* Given a dependence relation R, construct the set of coefficients
1493
 * of valid constraints for elements in that dependence relation.
1494
 * In particular, the result contains tuples of coefficients
1495
 * c_0, c_n, c_x, c_y such that
1496
 *
1497
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
1498
 *
1499
 * If the source or destination nodes of "edge" have been compressed,
1500
 * then the dependence relation is also compressed before
1501
 * the set of coefficients is computed.
1502
 */
1503
static __isl_give isl_basic_set *inter_coefficients(
1504
  struct isl_sched_graph *graph, struct isl_sched_edge *edge,
1505
  __isl_take isl_map *map)
1506
604
{
1507
604
  isl_set *set;
1508
604
  isl_map *key;
1509
604
  isl_basic_set *coef;
1510
604
  isl_maybe_isl_basic_set m;
1511
604
1512
604
  m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
1513
604
  if (
m.valid < 0 || 604
m.valid604
)
{519
1514
519
    isl_map_free(map);
1515
519
    return m.value;
1516
519
  }
1517
604
1518
85
  key = isl_map_copy(map);
1519
85
  if (edge->src->compressed)
1520
3
    map = isl_map_preimage_domain_multi_aff(map,
1521
3
            isl_multi_aff_copy(edge->src->decompress));
1522
85
  if (edge->dst->compressed)
1523
3
    map = isl_map_preimage_range_multi_aff(map,
1524
3
            isl_multi_aff_copy(edge->dst->decompress));
1525
85
  set = isl_map_wrap(isl_map_remove_divs(map));
1526
85
  coef = isl_set_coefficients(set);
1527
85
  graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
1528
85
          isl_basic_set_copy(coef));
1529
85
1530
85
  return coef;
1531
604
}
1532
1533
/* Return the position of the coefficients of the variables in
1534
 * the coefficients constraints "coef".
1535
 *
1536
 * The space of "coef" is of the form
1537
 *
1538
 *  { coefficients[[cst, params] -> S] }
1539
 *
1540
 * Return the position of S.
1541
 */
1542
static int coef_var_offset(__isl_keep isl_basic_set *coef)
1543
995
{
1544
995
  int offset;
1545
995
  isl_space *space;
1546
995
1547
995
  space = isl_space_unwrap(isl_basic_set_get_space(coef));
1548
995
  offset = isl_space_dim(space, isl_dim_in);
1549
995
  isl_space_free(space);
1550
995
1551
995
  return offset;
1552
995
}
1553
1554
/* Return the offset of the coefficients of the variables of "node"
1555
 * within the (I)LP.
1556
 *
1557
 * Within each node, the coefficients have the following order:
1558
 *  - c_i_0
1559
 *  - c_i_n (if parametric)
1560
 *  - positive and negative parts of c_i_x
1561
 */
1562
static int node_var_coef_offset(struct isl_sched_node *node)
1563
3.91k
{
1564
3.91k
  return node->start + 1 + node->nparam;
1565
3.91k
}
1566
1567
/* Construct an isl_dim_map for mapping constraints on coefficients
1568
 * for "node" to the corresponding positions in graph->lp.
1569
 * "offset" is the offset of the coefficients for the variables
1570
 * in the input constraints.
1571
 * "s" is the sign of the mapping.
1572
 *
1573
 * The input constraints are given in terms of the coefficients (c_0, c_n, c_x).
1574
 * The mapping produced by this function essentially plugs in
1575
 * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
1576
 * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
1577
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1578
 *
1579
 * The caller can extend the mapping to also map the other coefficients
1580
 * (and therefore not plug in 0).
1581
 */
1582
static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
1583
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1584
  int offset, int s)
1585
581
{
1586
581
  int pos;
1587
581
  unsigned total;
1588
581
  isl_dim_map *dim_map;
1589
581
1590
581
  if (!node)
1591
0
    return NULL;
1592
581
1593
581
  total = isl_basic_set_total_dim(graph->lp);
1594
581
  pos = node_var_coef_offset(node);
1595
581
  dim_map = isl_dim_map_alloc(ctx, total);
1596
581
  isl_dim_map_range(dim_map, pos, 2, offset, 1, node->nvar, -s);
1597
581
  isl_dim_map_range(dim_map, pos + 1, 2, offset, 1, node->nvar, s);
1598
581
1599
581
  return dim_map;
1600
581
}
1601
1602
/* Construct an isl_dim_map for mapping constraints on coefficients
1603
 * for "src" (node i) and "dst" (node j) to the corresponding positions
1604
 * in graph->lp.
1605
 * "offset" is the offset of the coefficients for the variables of "src"
1606
 * in the input constraints.
1607
 * "s" is the sign of the mapping.
1608
 *
1609
 * The input constraints are given in terms of the coefficients
1610
 * (c_0, c_n, c_x, c_y).
1611
 * The mapping produced by this function essentially plugs in
1612
 * (c_j_0 - c_i_0, c_j_n - c_i_n,
1613
 *  -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
1614
 * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
1615
 *  c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
1616
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1617
 *
1618
 * The caller can further extend the mapping.
1619
 */
1620
static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
1621
  struct isl_sched_graph *graph, struct isl_sched_node *src,
1622
  struct isl_sched_node *dst, int offset, int s)
1623
414
{
1624
414
  int pos;
1625
414
  unsigned total;
1626
414
  isl_dim_map *dim_map;
1627
414
1628
414
  if (
!src || 414
!dst414
)
1629
0
    return NULL;
1630
414
1631
414
  total = isl_basic_set_total_dim(graph->lp);
1632
414
  dim_map = isl_dim_map_alloc(ctx, total);
1633
414
1634
414
  isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, s);
1635
414
  isl_dim_map_range(dim_map, dst->start + 1, 1, 1, 1, dst->nparam, s);
1636
414
  pos = node_var_coef_offset(dst);
1637
414
  isl_dim_map_range(dim_map, pos, 2, offset + src->nvar, 1,
1638
414
        dst->nvar, -s);
1639
414
  isl_dim_map_range(dim_map, pos + 1, 2, offset + src->nvar, 1,
1640
414
        dst->nvar, s);
1641
414
1642
414
  isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -s);
1643
414
  isl_dim_map_range(dim_map, src->start + 1, 1, 1, 1, src->nparam, -s);
1644
414
  pos = node_var_coef_offset(src);
1645
414
  isl_dim_map_range(dim_map, pos, 2, offset, 1, src->nvar, s);
1646
414
  isl_dim_map_range(dim_map, pos + 1, 2, offset, 1, src->nvar, -s);
1647
414
1648
414
  return dim_map;
1649
414
}
1650
1651
/* Add the constraints from "src" to "dst" using "dim_map",
1652
 * after making sure there is enough room in "dst" for the extra constraints.
1653
 */
1654
static __isl_give isl_basic_set *add_constraints_dim_map(
1655
  __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
1656
  __isl_take isl_dim_map *dim_map)
1657
995
{
1658
995
  int n_eq, n_ineq;
1659
995
1660
995
  n_eq = isl_basic_set_n_equality(src);
1661
995
  n_ineq = isl_basic_set_n_inequality(src);
1662
995
  dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
1663
995
  dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
1664
995
  return dst;
1665
995
}
1666
1667
/* Add constraints to graph->lp that force validity for the given
1668
 * dependence from a node i to itself.
1669
 * That is, add constraints that enforce
1670
 *
1671
 *  (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
1672
 *  = c_i_x (y - x) >= 0
1673
 *
1674
 * for each (x,y) in R.
1675
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
1676
 * of valid constraints for (y - x) and then plug in (0, 0, c_i_x^+ - c_i_x^-),
1677
 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
1678
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1679
 *
1680
 * Actually, we do not construct constraints for the c_i_x themselves,
1681
 * but for the coefficients of c_i_x written as a linear combination
1682
 * of the columns in node->cmap.
1683
 */
1684
static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
1685
  struct isl_sched_edge *edge)
1686
263
{
1687
263
  int offset;
1688
263
  isl_map *map = isl_map_copy(edge->map);
1689
263
  isl_ctx *ctx = isl_map_get_ctx(map);
1690
263
  isl_dim_map *dim_map;
1691
263
  isl_basic_set *coef;
1692
263
  struct isl_sched_node *node = edge->src;
1693
263
1694
263
  coef = intra_coefficients(graph, node, map);
1695
263
1696
263
  offset = coef_var_offset(coef);
1697
263
1698
263
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1699
263
              offset, isl_mat_copy(node->cmap));
1700
263
  if (!coef)
1701
0
    return isl_stat_error;
1702
263
1703
263
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
1704
263
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1705
263
1706
263
  return isl_stat_ok;
1707
263
}
1708
1709
/* Add constraints to graph->lp that force validity for the given
1710
 * dependence from node i to node j.
1711
 * That is, add constraints that enforce
1712
 *
1713
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
1714
 *
1715
 * for each (x,y) in R.
1716
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1717
 * of valid constraints for R and then plug in
1718
 * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
1719
 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
1720
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1721
 *
1722
 * Actually, we do not construct constraints for the c_*_x themselves,
1723
 * but for the coefficients of c_*_x written as a linear combination
1724
 * of the columns in node->cmap.
1725
 */
1726
static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
1727
  struct isl_sched_edge *edge)
1728
195
{
1729
195
  int offset;
1730
195
  isl_map *map;
1731
195
  isl_ctx *ctx;
1732
195
  isl_dim_map *dim_map;
1733
195
  isl_basic_set *coef;
1734
195
  struct isl_sched_node *src = edge->src;
1735
195
  struct isl_sched_node *dst = edge->dst;
1736
195
1737
195
  if (!graph->lp)
1738
0
    return isl_stat_error;
1739
195
1740
195
  map = isl_map_copy(edge->map);
1741
195
  ctx = isl_map_get_ctx(map);
1742
195
  coef = inter_coefficients(graph, edge, map);
1743
195
1744
195
  offset = coef_var_offset(coef);
1745
195
1746
195
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1747
195
        offset, isl_mat_copy(src->cmap));
1748
195
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1749
195
        offset + src->nvar, isl_mat_copy(dst->cmap));
1750
195
  if (!coef)
1751
0
    return isl_stat_error;
1752
195
1753
195
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
1754
195
1755
195
  edge->start = graph->lp->n_ineq;
1756
195
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1757
195
  if (!graph->lp)
1758
0
    return isl_stat_error;
1759
195
  edge->end = graph->lp->n_ineq;
1760
195
1761
195
  return isl_stat_ok;
1762
195
}
1763
1764
/* Add constraints to graph->lp that bound the dependence distance for the given
1765
 * dependence from a node i to itself.
1766
 * If s = 1, we add the constraint
1767
 *
1768
 *  c_i_x (y - x) <= m_0 + m_n n
1769
 *
1770
 * or
1771
 *
1772
 *  -c_i_x (y - x) + m_0 + m_n n >= 0
1773
 *
1774
 * for each (x,y) in R.
1775
 * If s = -1, we add the constraint
1776
 *
1777
 *  -c_i_x (y - x) <= m_0 + m_n n
1778
 *
1779
 * or
1780
 *
1781
 *  c_i_x (y - x) + m_0 + m_n n >= 0
1782
 *
1783
 * for each (x,y) in R.
1784
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
1785
 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
1786
 * with each coefficient (except m_0) represented as a pair of non-negative
1787
 * coefficients.
1788
 *
1789
 * Actually, we do not construct constraints for the c_i_x themselves,
1790
 * but for the coefficients of c_i_x written as a linear combination
1791
 * of the columns in node->cmap.
1792
 *
1793
 *
1794
 * If "local" is set, then we add constraints
1795
 *
1796
 *  c_i_x (y - x) <= 0
1797
 *
1798
 * or
1799
 *
1800
 *  -c_i_x (y - x) <= 0
1801
 *
1802
 * instead, forcing the dependence distance to be (less than or) equal to 0.
1803
 * That is, we plug in (0, 0, -s * c_i_x),
1804
 * Note that dependences marked local are treated as validity constraints
1805
 * by add_all_validity_constraints and therefore also have
1806
 * their distances bounded by 0 from below.
1807
 */
1808
static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
1809
  struct isl_sched_edge *edge, int s, int local)
1810
273
{
1811
273
  int offset;
1812
273
  unsigned nparam;
1813
273
  isl_map *map = isl_map_copy(edge->map);
1814
273
  isl_ctx *ctx = isl_map_get_ctx(map);
1815
273
  isl_dim_map *dim_map;
1816
273
  isl_basic_set *coef;
1817
273
  struct isl_sched_node *node = edge->src;
1818
273
1819
273
  coef = intra_coefficients(graph, node, map);
1820
273
1821
273
  offset = coef_var_offset(coef);
1822
273
1823
273
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1824
273
              offset, isl_mat_copy(node->cmap));
1825
273
  if (!coef)
1826
0
    return isl_stat_error;
1827
273
1828
273
  nparam = isl_space_dim(node->space, isl_dim_param);
1829
273
  dim_map = intra_dim_map(ctx, graph, node, offset, -s);
1830
273
1831
273
  if (
!local273
)
{141
1832
141
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
1833
141
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
1834
141
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
1835
141
  }
1836
273
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1837
273
1838
273
  return isl_stat_ok;
1839
273
}
1840
1841
/* Add constraints to graph->lp that bound the dependence distance for the given
1842
 * dependence from node i to node j.
1843
 * If s = 1, we add the constraint
1844
 *
1845
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
1846
 *    <= m_0 + m_n n
1847
 *
1848
 * or
1849
 *
1850
 *  -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
1851
 *    m_0 + m_n n >= 0
1852
 *
1853
 * for each (x,y) in R.
1854
 * If s = -1, we add the constraint
1855
 *
1856
 *  -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
1857
 *    <= m_0 + m_n n
1858
 *
1859
 * or
1860
 *
1861
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
1862
 *    m_0 + m_n n >= 0
1863
 *
1864
 * for each (x,y) in R.
1865
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1866
 * of valid constraints for R and then plug in
1867
 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
1868
 *  s*c_i_x, -s*c_j_x)
1869
 * with each coefficient (except m_0, c_*_0 and c_*_n)
1870
 * represented as a pair of non-negative coefficients.
1871
 *
1872
 * Actually, we do not construct constraints for the c_*_x themselves,
1873
 * but for the coefficients of c_*_x written as a linear combination
1874
 * of the columns in node->cmap.
1875
 *
1876
 *
1877
 * If "local" is set (and s = 1), then we add constraints
1878
 *
1879
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
1880
 *
1881
 * or
1882
 *
1883
 *  -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
1884
 *
1885
 * instead, forcing the dependence distance to be (less than or) equal to 0.
1886
 * That is, we plug in
1887
 * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
1888
 * Note that dependences marked local are treated as validity constraints
1889
 * by add_all_validity_constraints and therefore also have
1890
 * their distances bounded by 0 from below.
1891
 */
1892
static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
1893
  struct isl_sched_edge *edge, int s, int local)
1894
205
{
1895
205
  int offset;
1896
205
  unsigned nparam;
1897
205
  isl_map *map = isl_map_copy(edge->map);
1898
205
  isl_ctx *ctx = isl_map_get_ctx(map);
1899
205
  isl_dim_map *dim_map;
1900
205
  isl_basic_set *coef;
1901
205
  struct isl_sched_node *src = edge->src;
1902
205
  struct isl_sched_node *dst = edge->dst;
1903
205
1904
205
  coef = inter_coefficients(graph, edge, map);
1905
205
1906
205
  offset = coef_var_offset(coef);
1907
205
1908
205
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1909
205
        offset, isl_mat_copy(src->cmap));
1910
205
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1911
205
        offset + src->nvar, isl_mat_copy(dst->cmap));
1912
205
  if (!coef)
1913
0
    return isl_stat_error;
1914
205
1915
205
  nparam = isl_space_dim(src->space, isl_dim_param);
1916
205
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
1917
205
1918
205
  if (
!local205
)
{121
1919
121
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
1920
121
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
1921
121
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
1922
121
  }
1923
205
1924
205
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1925
205
1926
205
  return isl_stat_ok;
1927
205
}
1928
1929
/* Add all validity constraints to graph->lp.
1930
 *
1931
 * An edge that is forced to be local needs to have its dependence
1932
 * distances equal to zero.  We take care of bounding them by 0 from below
1933
 * here.  add_all_proximity_constraints takes care of bounding them by 0
1934
 * from above.
1935
 *
1936
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
1937
 * Otherwise, we ignore them.
1938
 */
1939
static int add_all_validity_constraints(struct isl_sched_graph *graph,
1940
  int use_coincidence)
1941
377
{
1942
377
  int i;
1943
377
1944
943
  for (i = 0; 
i < graph->n_edge943
;
++i566
)
{566
1945
566
    struct isl_sched_edge *edge = &graph->edge[i];
1946
566
    int local;
1947
566
1948
566
    local = is_local(edge) ||
1949
542
      
(is_coincidence(edge) && 542
use_coincidence328
);
1950
566
    if (
!is_validity(edge) && 566
!local112
)
1951
108
      continue;
1952
458
    
if (458
edge->src != edge->dst458
)
1953
195
      continue;
1954
263
    
if (263
add_intra_validity_constraints(graph, edge) < 0263
)
1955
0
      return -1;
1956
263
  }
1957
377
1958
943
  
for (i = 0; 377
i < graph->n_edge943
;
++i566
)
{566
1959
566
    struct isl_sched_edge *edge = &graph->edge[i];
1960
566
    int local;
1961
566
1962
566
    local = is_local(edge) ||
1963
542
      
(is_coincidence(edge) && 542
use_coincidence328
);
1964
566
    if (
!is_validity(edge) && 566
!local112
)
1965
108
      continue;
1966
458
    
if (458
edge->src == edge->dst458
)
1967
263
      continue;
1968
195
    
if (195
add_inter_validity_constraints(graph, edge) < 0195
)
1969
0
      return -1;
1970
195
  }
1971
377
1972
377
  return 0;
1973
377
}
1974
1975
/* Add constraints to graph->lp that bound the dependence distance
1976
 * for all dependence relations.
1977
 * If a given proximity dependence is identical to a validity
1978
 * dependence, then the dependence distance is already bounded
1979
 * from below (by zero), so we only need to bound the distance
1980
 * from above.  (This includes the case of "local" dependences
1981
 * which are treated as validity dependence by add_all_validity_constraints.)
1982
 * Otherwise, we need to bound the distance both from above and from below.
1983
 *
1984
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
1985
 * Otherwise, we ignore them.
1986
 */
1987
static int add_all_proximity_constraints(struct isl_sched_graph *graph,
1988
  int use_coincidence)
1989
377
{
1990
377
  int i;
1991
377
1992
943
  for (i = 0; 
i < graph->n_edge943
;
++i566
)
{566
1993
566
    struct isl_sched_edge *edge = &graph->edge[i];
1994
566
    int local;
1995
566
1996
566
    local = is_local(edge) ||
1997
542
      
(is_coincidence(edge) && 542
use_coincidence328
);
1998
566
    if (
!is_proximity(edge) && 566
!local113
)
1999
105
      continue;
2000
461
    
if (461
edge->src == edge->dst &&461
2001
265
        add_intra_proximity_constraints(graph, edge, 1, local) < 0)
2002
0
      return -1;
2003
461
    
if (461
edge->src != edge->dst &&461
2004
196
        add_inter_proximity_constraints(graph, edge, 1, local) < 0)
2005
0
      return -1;
2006
461
    
if (461
is_validity(edge) || 461
local21
)
2007
444
      continue;
2008
17
    
if (17
edge->src == edge->dst &&17
2009
8
        add_intra_proximity_constraints(graph, edge, -1, 0) < 0)
2010
0
      return -1;
2011
17
    
if (17
edge->src != edge->dst &&17
2012
9
        add_inter_proximity_constraints(graph, edge, -1, 0) < 0)
2013
0
      return -1;
2014
17
  }
2015
377
2016
377
  return 0;
2017
377
}
2018
2019
/* Compute a basis for the rows in the linear part of the schedule
2020
 * and extend this basis to a full basis.  The remaining rows
2021
 * can then be used to force linear independence from the rows
2022
 * in the schedule.
2023
 *
2024
 * In particular, given the schedule rows S, we compute
2025
 *
2026
 *  S   = H Q
2027
 *  S U = H
2028
 *
2029
 * with H the Hermite normal form of S.  That is, all but the
2030
 * first rank columns of H are zero and so each row in S is
2031
 * a linear combination of the first rank rows of Q.
2032
 * The matrix Q is then transposed because we will write the
2033
 * coefficients of the next schedule row as a column vector s
2034
 * and express this s as a linear combination s = Q c of the
2035
 * computed basis.
2036
 * Similarly, the matrix U is transposed such that we can
2037
 * compute the coefficients c = U s from a schedule row s.
2038
 */
2039
static int node_update_cmap(struct isl_sched_node *node)
2040
951
{
2041
951
  isl_mat *H, *U, *Q;
2042
951
  int n_row = isl_mat_rows(node->sched);
2043
951
2044
951
  H = isl_mat_sub_alloc(node->sched, 0, n_row,
2045
951
            1 + node->nparam, node->nvar);
2046
951
2047
951
  H = isl_mat_left_hermite(H, 0, &U, &Q);
2048
951
  isl_mat_free(node->cmap);
2049
951
  isl_mat_free(node->cinv);
2050
951
  isl_mat_free(node->ctrans);
2051
951
  node->ctrans = isl_mat_copy(Q);
2052
951
  node->cmap = isl_mat_transpose(Q);
2053
951
  node->cinv = isl_mat_transpose(U);
2054
951
  node->rank = isl_mat_initial_non_zero_cols(H);
2055
951
  isl_mat_free(H);
2056
951
2057
951
  if (
!node->cmap || 951
!node->cinv951
||
!node->ctrans951
||
node->rank < 0951
)
2058
0
    return -1;
2059
951
  return 0;
2060
951
}
2061
2062
/* Is "edge" marked as a validity or a conditional validity edge?
2063
 */
2064
static int is_any_validity(struct isl_sched_edge *edge)
2065
94
{
2066
4
  return is_validity(edge) || is_conditional_validity(edge);
2067
94
}
2068
2069
/* How many times should we count the constraints in "edge"?
2070
 *
2071
 * We count as follows
2072
 * validity   -> 1 (>= 0)
2073
 * validity+proximity -> 2 (>= 0 and upper bound)
2074
 * proximity    -> 2 (lower and upper bound)
2075
 * local(+any)    -> 2 (>= 0 and <= 0)
2076
 *
2077
 * If an edge is only marked conditional_validity then it counts
2078
 * as zero since it is only checked afterwards.
2079
 *
2080
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2081
 * Otherwise, we ignore them.
2082
 */
2083
static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
2084
566
{
2085
566
  if (
is_proximity(edge) || 566
is_local(edge)113
)
2086
457
    return 2;
2087
109
  
if (109
use_coincidence && 109
is_coincidence(edge)4
)
2088
4
    return 2;
2089
105
  
if (105
is_validity(edge)105
)
2090
14
    return 1;
2091
91
  return 0;
2092
105
}
2093
2094
/* Count the number of equality and inequality constraints
2095
 * that will be added for the given map.
2096
 *
2097
 * "use_coincidence" is set if we should take into account coincidence edges.
2098
 */
2099
static isl_stat count_map_constraints(struct isl_sched_graph *graph,
2100
  struct isl_sched_edge *edge, __isl_take isl_map *map,
2101
  int *n_eq, int *n_ineq, int use_coincidence)
2102
566
{
2103
566
  isl_basic_set *coef;
2104
566
  int f = edge_multiplicity(edge, use_coincidence);
2105
566
2106
566
  if (
f == 0566
)
{91
2107
91
    isl_map_free(map);
2108
91
    return isl_stat_ok;
2109
91
  }
2110
566
2111
475
  
if (475
edge->src == edge->dst475
)
2112
271
    coef = intra_coefficients(graph, edge->src, map);
2113
475
  else
2114
204
    coef = inter_coefficients(graph, edge, map);
2115
475
  if (!coef)
2116
0
    return isl_stat_error;
2117
475
  *n_eq += f * isl_basic_set_n_equality(coef);
2118
475
  *n_ineq += f * isl_basic_set_n_inequality(coef);
2119
475
  isl_basic_set_free(coef);
2120
475
2121
475
  return isl_stat_ok;
2122
475
}
2123
2124
/* Count the number of equality and inequality constraints
2125
 * that will be added to the main lp problem.
2126
 * We count as follows
2127
 * validity   -> 1 (>= 0)
2128
 * validity+proximity -> 2 (>= 0 and upper bound)
2129
 * proximity    -> 2 (lower and upper bound)
2130
 * local(+any)    -> 2 (>= 0 and <= 0)
2131
 *
2132
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2133
 * Otherwise, we ignore them.
2134
 */
2135
static int count_constraints(struct isl_sched_graph *graph,
2136
  int *n_eq, int *n_ineq, int use_coincidence)
2137
377
{
2138
377
  int i;
2139
377
2140
377
  *n_eq = *n_ineq = 0;
2141
943
  for (i = 0; 
i < graph->n_edge943
;
++i566
)
{566
2142
566
    struct isl_sched_edge *edge = &graph->edge[i];
2143
566
    isl_map *map = isl_map_copy(edge->map);
2144
566
2145
566
    if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
2146
566
              use_coincidence) < 0)
2147
0
      return -1;
2148
566
  }
2149
377
2150
377
  return 0;
2151
377
}
2152
2153
/* Count the number of constraints that will be added by
2154
 * add_bound_constant_constraints to bound the values of the constant terms
2155
 * and increment *n_eq and *n_ineq accordingly.
2156
 *
2157
 * In practice, add_bound_constant_constraints only adds inequalities.
2158
 */
2159
static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
2160
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2161
377
{
2162
377
  if (isl_options_get_schedule_max_constant_term(ctx) == -1)
2163
249
    return isl_stat_ok;
2164
377
2165
128
  *n_ineq += graph->n;
2166
128
2167
128
  return isl_stat_ok;
2168
377
}
2169
2170
/* Add constraints to bound the values of the constant terms in the schedule,
2171
 * if requested by the user.
2172
 *
2173
 * The maximal value of the constant terms is defined by the option
2174
 * "schedule_max_constant_term".
2175
 *
2176
 * Within each node, the coefficients have the following order:
2177
 *  - c_i_0
2178
 *  - c_i_n (if parametric)
2179
 *  - positive and negative parts of c_i_x
2180
 */
2181
static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
2182
  struct isl_sched_graph *graph)
2183
377
{
2184
377
  int i, k;
2185
377
  int max;
2186
377
  int total;
2187
377
2188
377
  max = isl_options_get_schedule_max_constant_term(ctx);
2189
377
  if (max == -1)
2190
249
    return isl_stat_ok;
2191
377
2192
128
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2193
128
2194
266
  for (i = 0; 
i < graph->n266
;
++i138
)
{138
2195
138
    struct isl_sched_node *node = &graph->node[i];
2196
138
    k = isl_basic_set_alloc_inequality(graph->lp);
2197
138
    if (k < 0)
2198
0
      return isl_stat_error;
2199
138
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2200
138
    isl_int_set_si(graph->lp->ineq[k][1 + node->start], -1);
2201
138
    isl_int_set_si(graph->lp->ineq[k][0], max);
2202
138
  }
2203
128
2204
128
  return isl_stat_ok;
2205
128
}
2206
2207
/* Count the number of constraints that will be added by
2208
 * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
2209
 * accordingly.
2210
 *
2211
 * In practice, add_bound_coefficient_constraints only adds inequalities.
2212
 */
2213
static int count_bound_coefficient_constraints(isl_ctx *ctx,
2214
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2215
377
{
2216
377
  int i;
2217
377
2218
377
  if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
2219
248
      !isl_options_get_schedule_treat_coalescing(ctx))
2220
8
    return 0;
2221
377
2222
900
  
for (i = 0; 369
i < graph->n900
;
++i531
)
2223
531
    *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
2224
369
2225
369
  return 0;
2226
377
}
2227
2228
/* Add constraints to graph->lp that bound the values of
2229
 * the parameter schedule coefficients of "node" to "max" and
2230
 * the variable schedule coefficients to the corresponding entry
2231
 * in node->max.
2232
 * In either case, a negative value means that no bound needs to be imposed.
2233
 *
2234
 * For parameter coefficients, this amounts to adding a constraint
2235
 *
2236
 *  c_n <= max
2237
 *
2238
 * i.e.,
2239
 *
2240
 *  -c_n + max >= 0
2241
 *
2242
 * The variables coefficients are, however, not represented directly.
2243
 * Instead, the variables coefficients c_x are written as a linear
2244
 * combination c_x = cmap c_z of some other coefficients c_z,
2245
 * which are in turn encoded as c_z = c_z^+ - c_z^-.
2246
 * Let a_j be the elements of row i of node->cmap, then
2247
 *
2248
 *  -max_i <= c_x_i <= max_i
2249
 *
2250
 * is encoded as
2251
 *
2252
 *  -max_i <= \sum_j a_j (c_z_j^+ - c_z_j^-) <= max_i
2253
 *
2254
 * or
2255
 *
2256
 *  -\sum_j a_j (c_z_j^+ - c_z_j^-) + max_i >= 0
2257
 *  \sum_j a_j (c_z_j^+ - c_z_j^-) + max_i >= 0
2258
 */
2259
static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
2260
  struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
2261
531
{
2262
531
  int i, j, k;
2263
531
  int total;
2264
531
  isl_vec *ineq;
2265
531
2266
531
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2267
531
2268
824
  for (j = 0; 
j < node->nparam824
;
++j293
)
{293
2269
293
    int dim;
2270
293
2271
293
    if (max < 0)
2272
238
      continue;
2273
293
2274
55
    k = isl_basic_set_alloc_inequality(graph->lp);
2275
55
    if (k < 0)
2276
0
      return isl_stat_error;
2277
55
    dim = 1 + node->start + 1 + j;
2278
55
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2279
55
    isl_int_set_si(graph->lp->ineq[k][dim], -1);
2280
55
    isl_int_set_si(graph->lp->ineq[k][0], max);
2281
55
  }
2282
531
2283
531
  ineq = isl_vec_alloc(ctx, 1 + total);
2284
531
  ineq = isl_vec_clr(ineq);
2285
531
  if (!ineq)
2286
0
    return isl_stat_error;
2287
1.42k
  
for (i = 0; 531
i < node->nvar1.42k
;
++i895
)
{895
2288
895
    int pos = 1 + node_var_coef_offset(node);
2289
895
2290
895
    if (isl_int_is_neg(node->max->el[i]))
2291
497
      continue;
2292
895
2293
1.40k
    
for (j = 0; 398
j < node->nvar1.40k
;
++j1.00k
)
{1.00k
2294
1.00k
      isl_int_set(ineq->el[pos + 2 * j],
2295
1.00k
          node->cmap->row[i][j]);
2296
1.00k
      isl_int_neg(ineq->el[pos + 2 * j + 1],
2297
1.00k
          node->cmap->row[i][j]);
2298
1.00k
    }
2299
398
    isl_int_set(ineq->el[0], node->max->el[i]);
2300
398
2301
398
    k = isl_basic_set_alloc_inequality(graph->lp);
2302
398
    if (k < 0)
2303
0
      goto error;
2304
398
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2305
398
2306
398
    isl_seq_neg(ineq->el + pos, ineq->el + pos, 2 * node->nvar);
2307
398
    k = isl_basic_set_alloc_inequality(graph->lp);
2308
398
    if (k < 0)
2309
0
      goto error;
2310
398
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2311
398
  }
2312
531
  isl_vec_free(ineq);
2313
531
2314
531
  return isl_stat_ok;
2315
0
error:
2316
0
  isl_vec_free(ineq);
2317
0
  return isl_stat_error;
2318
531
}
2319
2320
/* Add constraints that bound the values of the variable and parameter
2321
 * coefficients of the schedule.
2322
 *
2323
 * The maximal value of the coefficients is defined by the option
2324
 * 'schedule_max_coefficient' and the entries in node->max.
2325
 * These latter entries are only set if either the schedule_max_coefficient
2326
 * option or the schedule_treat_coalescing option is set.
2327
 */
2328
static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
2329
  struct isl_sched_graph *graph)
2330
377
{
2331
377
  int i;
2332
377
  int max;
2333
377
2334
377
  max = isl_options_get_schedule_max_coefficient(ctx);
2335
377
2336
377
  if (
max == -1 && 377
!isl_options_get_schedule_treat_coalescing(ctx)248
)
2337
8
    return isl_stat_ok;
2338
377
2339
900
  
for (i = 0; 369
i < graph->n900
;
++i531
)
{531
2340
531
    struct isl_sched_node *node = &graph->node[i];
2341
531
2342
531
    if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
2343
0
      return isl_stat_error;
2344
531
  }
2345
369
2346
369
  return isl_stat_ok;
2347
369
}
2348
2349
/* Add a constraint to graph->lp that equates the value at position
2350
 * "sum_pos" to the sum of the "n" values starting at "first".
2351
 */
2352
static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
2353
  int sum_pos, int first, int n)
2354
377
{
2355
377
  int i, k;
2356
377
  int total;
2357
377
2358
377
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2359
377
2360
377
  k = isl_basic_set_alloc_equality(graph->lp);
2361
377
  if (k < 0)
2362
0
    return isl_stat_error;
2363
377
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2364
377
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2365
859
  for (i = 0; 
i < n859
;
++i482
)
2366
482
    isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
2367
377
2368
377
  return isl_stat_ok;
2369
377
}
2370
2371
/* Add a constraint to graph->lp that equates the value at position
2372
 * "sum_pos" to the sum of the parameter coefficients of all nodes.
2373
 *
2374
 * Within each node, the coefficients have the following order:
2375
 *  - c_i_0
2376
 *  - c_i_n (if parametric)
2377
 *  - positive and negative parts of c_i_x
2378
 */
2379
static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
2380
  int sum_pos)
2381
374
{
2382
374
  int i, j, k;
2383
374
  int total;
2384
374
2385
374
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2386
374
2387
374
  k = isl_basic_set_alloc_equality(graph->lp);
2388
374
  if (k < 0)
2389
0
    return isl_stat_error;
2390
374
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2391
374
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2392
896
  for (i = 0; 
i < graph->n896
;
++i522
)
{522
2393
522
    int pos = 1 + graph->node[i].start + 1;
2394
522
2395
829
    for (j = 0; 
j < graph->node[i].nparam829
;
++j307
)
2396
307
      isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2397
522
  }
2398
374
2399
374
  return isl_stat_ok;
2400
374
}
2401
2402
/* Add a constraint to graph->lp that equates the value at position
2403
 * "sum_pos" to the sum of the variable coefficients of all nodes.
2404
 *
2405
 * Within each node, the coefficients have the following order:
2406
 *  - c_i_0
2407
 *  - c_i_n (if parametric)
2408
 *  - positive and negative parts of c_i_x
2409
 */
2410
static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
2411
  int sum_pos)
2412
406
{
2413
406
  int i, j, k;
2414
406
  int total;
2415
406
2416
406
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2417
406
2418
406
  k = isl_basic_set_alloc_equality(graph->lp);
2419
406
  if (k < 0)
2420
0
    return isl_stat_error;
2421
406
  isl_seq_clr(graph->lp->eq[k], 1 + total);
2422
406
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2423
990
  for (i = 0; 
i < graph->n990
;
++i584
)
{584
2424
584
    struct isl_sched_node *node = &graph->node[i];
2425
584
    int pos = 1 + node_var_coef_offset(node);
2426
584
2427
2.60k
    for (j = 0; 
j < 2 * node->nvar2.60k
;
++j2.01k
)
2428
2.01k
      isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2429
584
  }
2430
406
2431
406
  return isl_stat_ok;
2432
406
}
2433
2434
/* Construct an ILP problem for finding schedule coefficients
2435
 * that result in non-negative, but small dependence distances
2436
 * over all dependences.
2437
 * In particular, the dependence distances over proximity edges
2438
 * are bounded by m_0 + m_n n and we compute schedule coefficients
2439
 * with small values (preferably zero) of m_n and m_0.
2440
 *
2441
 * All variables of the ILP are non-negative.  The actual coefficients
2442
 * may be negative, so each coefficient is represented as the difference
2443
 * of two non-negative variables.  The negative part always appears
2444
 * immediately before the positive part.
2445
 * Other than that, the variables have the following order
2446
 *
2447
 *  - sum of positive and negative parts of m_n coefficients
2448
 *  - m_0
2449
 *  - sum of all c_n coefficients
2450
 *    (unconstrained when computing non-parametric schedules)
2451
 *  - sum of positive and negative parts of all c_x coefficients
2452
 *  - positive and negative parts of m_n coefficients
2453
 *  - for each node
2454
 *    - c_i_0
2455
 *    - c_i_n (if parametric)
2456
 *    - positive and negative parts of c_i_x
2457
 *
2458
 * The c_i_x are not represented directly, but through the columns of
2459
 * node->cmap.  That is, the computed values are for variable t_i_x
2460
 * such that c_i_x = Q t_i_x with Q equal to node->cmap.
2461
 *
2462
 * The constraints are those from the edges plus two or three equalities
2463
 * to express the sums.
2464
 *
2465
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2466
 * Otherwise, we ignore them.
2467
 */
2468
static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
2469
  int use_coincidence)
2470
377
{
2471
377
  int i;
2472
377
  unsigned nparam;
2473
377
  unsigned total;
2474
377
  isl_space *space;
2475
377
  int parametric;
2476
377
  int param_pos;
2477
377
  int n_eq, n_ineq;
2478
377
2479
377
  parametric = ctx->opt->schedule_parametric;
2480
377
  nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
2481
377
  param_pos = 4;
2482
377
  total = param_pos + 2 * nparam;
2483
924
  for (i = 0; 
i < graph->n924
;
++i547
)
{547
2484
547
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2485
547
    if (node_update_cmap(node) < 0)
2486
0
      return isl_stat_error;
2487
547
    node->start = total;
2488
547
    total += 1 + node->nparam + 2 * node->nvar;
2489
547
  }
2490
377
2491
377
  
if (377
count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0377
)
2492
0
    return isl_stat_error;
2493
377
  
if (377
count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0377
)
2494
0
    return isl_stat_error;
2495
377
  
if (377
count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0377
)
2496
0
    return isl_stat_error;
2497
377
2498
377
  space = isl_space_set_alloc(ctx, 0, total);
2499
377
  isl_basic_set_free(graph->lp);
2500
377
  n_eq += 2 + parametric;
2501
377
2502
377
  graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
2503
377
2504
377
  if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
2505
0
    return isl_stat_error;
2506
377
  
if (377
parametric && 377
add_param_sum_constraint(graph, 2) < 0345
)
2507
0
    return isl_stat_error;
2508
377
  
if (377
add_var_sum_constraint(graph, 3) < 0377
)
2509
0
    return isl_stat_error;
2510
377
  
if (377
add_bound_constant_constraints(ctx, graph) < 0377
)
2511
0
    return isl_stat_error;
2512
377
  
if (377
add_bound_coefficient_constraints(ctx, graph) < 0377
)
2513
0
    return isl_stat_error;
2514
377
  
if (377
add_all_validity_constraints(graph, use_coincidence) < 0377
)
2515
0
    return isl_stat_error;
2516
377
  
if (377
add_all_proximity_constraints(graph, use_coincidence) < 0377
)
2517
0
    return isl_stat_error;
2518
377
2519
377
  return isl_stat_ok;
2520
377
}
2521
2522
/* Analyze the conflicting constraint found by
2523
 * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
2524
 * constraint of one of the edges between distinct nodes, living, moreover
2525
 * in distinct SCCs, then record the source and sink SCC as this may
2526
 * be a good place to cut between SCCs.
2527
 */
2528
static int check_conflict(int con, void *user)
2529
1.14k
{
2530
1.14k
  int i;
2531
1.14k
  struct isl_sched_graph *graph = user;
2532
1.14k
2533
1.14k
  if (graph->src_scc >= 0)
2534
35
    return 0;
2535
1.14k
2536
1.11k
  con -= graph->lp->n_eq;
2537
1.11k
2538
1.11k
  if (con >= graph->lp->n_ineq)
2539
536
    return 0;
2540
1.11k
2541
1.95k
  
for (i = 0; 575
i < graph->n_edge1.95k
;
++i1.38k
)
{1.38k
2542
1.38k
    if (!is_validity(&graph->edge[i]))
2543
135
      continue;
2544
1.24k
    
if (1.24k
graph->edge[i].src == graph->edge[i].dst1.24k
)
2545
663
      continue;
2546
584
    
if (584
graph->edge[i].src->scc == graph->edge[i].dst->scc584
)
2547
513
      continue;
2548
71
    
if (71
graph->edge[i].start > con71
)
2549
42
      continue;
2550
29
    
if (29
graph->edge[i].end <= con29
)
2551
21
      continue;
2552
8
    graph->src_scc = graph->edge[i].src->scc;
2553
8
    graph->dst_scc = graph->edge[i].dst->scc;
2554
8
  }
2555
575
2556
575
  return 0;
2557
1.11k
}
2558
2559
/* Check whether the next schedule row of the given node needs to be
2560
 * non-trivial.  Lower-dimensional domains may have some trivial rows,
2561
 * but as soon as the number of remaining required non-trivial rows
2562
 * is as large as the number or remaining rows to be computed,
2563
 * all remaining rows need to be non-trivial.
2564
 */
2565
static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
2566
580
{
2567
580
  return node->nvar - node->rank >= graph->maxvar - graph->n_row;
2568
580
}
2569
2570
/* Solve the ILP problem constructed in setup_lp.
2571
 * For each node such that all the remaining rows of its schedule
2572
 * need to be non-trivial, we construct a non-triviality region.
2573
 * This region imposes that the next row is independent of previous rows.
2574
 * In particular the coefficients c_i_x are represented by t_i_x
2575
 * variables with c_i_x = Q t_i_x and Q a unimodular matrix such that
2576
 * its first columns span the rows of the previously computed part
2577
 * of the schedule.  The non-triviality region enforces that at least
2578
 * one of the remaining components of t_i_x is non-zero, i.e.,
2579
 * that the new schedule row depends on at least one of the remaining
2580
 * columns of Q.
2581
 */
2582
static __isl_give isl_vec *solve_lp(struct isl_sched_graph *graph)
2583
377
{
2584
377
  int i;
2585
377
  isl_vec *sol;
2586
377
  isl_basic_set *lp;
2587
377
2588
924
  for (i = 0; 
i < graph->n924
;
++i547
)
{547
2589
547
    struct isl_sched_node *node = &graph->node[i];
2590
547
    int skip = node->rank;
2591
547
    graph->region[i].pos = node_var_coef_offset(node) + 2 * skip;
2592
547
    if (needs_row(graph, node))
2593
443
      graph->region[i].len = 2 * (node->nvar - skip);
2594
547
    else
2595
104
      graph->region[i].len = 0;
2596
547
  }
2597
377
  lp = isl_basic_set_copy(graph->lp);
2598
377
  sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
2599
377
               graph->region, &check_conflict, graph);
2600
377
  return sol;
2601
377
}
2602
2603
/* Extract the coefficients for the variables of "node" from "sol".
2604
 *
2605
 * Within each node, the coefficients have the following order:
2606
 *  - c_i_0
2607
 *  - c_i_n (if parametric)
2608
 *  - positive and negative parts of c_i_x
2609
 *
2610
 * The c_i_x^- appear before their c_i_x^+ counterpart.
2611
 *
2612
 * Return c_i_x = c_i_x^+ - c_i_x^-
2613
 */
2614
static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
2615
  __isl_keep isl_vec *sol)
2616
477
{
2617
477
  int i;
2618
477
  int pos;
2619
477
  isl_vec *csol;
2620
477
2621
477
  if (!sol)
2622
0
    return NULL;
2623
477
  csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
2624
477
  if (!csol)
2625
0
    return NULL;
2626
477
2627
477
  pos = 1 + node_var_coef_offset(node);
2628
1.34k
  for (i = 0; 
i < node->nvar1.34k
;
++i872
)
2629
872
    isl_int_sub(csol->el[i],
2630
477
          sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
2631
477
2632
477
  return csol;
2633
477
}
2634
2635
/* Update the schedules of all nodes based on the given solution
2636
 * of the LP problem.
2637
 * The new row is added to the current band.
2638
 * All possibly negative coefficients are encoded as a difference
2639
 * of two non-negative variables, so we need to perform the subtraction
2640
 * here.  Moreover, if use_cmap is set, then the solution does
2641
 * not refer to the actual coefficients c_i_x, but instead to variables
2642
 * t_i_x such that c_i_x = Q t_i_x and Q is equal to node->cmap.
2643
 * In this case, we then also need to perform this multiplication
2644
 * to obtain the values of c_i_x.
2645
 *
2646
 * If coincident is set, then the caller guarantees that the new
2647
 * row satisfies the coincidence constraints.
2648
 */
2649
static int update_schedule(struct isl_sched_graph *graph,
2650
  __isl_take isl_vec *sol, int use_cmap, int coincident)
2651
302
{
2652
302
  int i, j;
2653
302
  isl_vec *csol = NULL;
2654
302
2655
302
  if (!sol)
2656
0
    goto error;
2657
302
  
if (302
sol->size == 0302
)
2658
0
    isl_die(sol->ctx, isl_error_internal,
2659
302
      "no solution found", goto error);
2660
302
  
if (302
graph->n_total_row >= graph->max_row302
)
2661
0
    isl_die(sol->ctx, isl_error_internal,
2662
302
      "too many schedule rows", goto error);
2663
302
2664
719
  
for (i = 0; 302
i < graph->n719
;
++i417
)
{417
2665
417
    struct isl_sched_node *node = &graph->node[i];
2666
417
    int pos = node->start;
2667
417
    int row = isl_mat_rows(node->sched);
2668
417
2669
417
    isl_vec_free(csol);
2670
417
    csol = extract_var_coef(node, sol);
2671
417
    if (!csol)
2672
0
      goto error;
2673
417
2674
417
    isl_map_free(node->sched_map);
2675
417
    node->sched_map = NULL;
2676
417
    node->sched = isl_mat_add_rows(node->sched, 1);
2677
417
    if (!node->sched)
2678
0
      goto error;
2679
1.05k
    
for (j = 0; 417
j < 1 + node->nparam1.05k
;
++j633
)
2680
633
      node->sched = isl_mat_set_element(node->sched,
2681
633
            row, j, sol->el[1 + pos + j]);
2682
417
    if (use_cmap)
2683
380
      csol = isl_mat_vec_product(isl_mat_copy(node->cmap),
2684
380
               csol);
2685
417
    if (!csol)
2686
0
      goto error;
2687
1.17k
    
for (j = 0; 417
j < node->nvar1.17k
;
++j762
)
2688
762
      node->sched = isl_mat_set_element(node->sched,
2689
762
          row, 1 + node->nparam + j, csol->el[j]);
2690
417
    node->coincident[graph->n_total_row] = coincident;
2691
417
  }
2692
302
  isl_vec_free(sol);
2693
302
  isl_vec_free(csol);
2694
302
2695
302
  graph->n_row++;
2696
302
  graph->n_total_row++;
2697
302
2698
302
  return 0;
2699
0
error:
2700
0
  isl_vec_free(sol);
2701
0
  isl_vec_free(csol);
2702
0
  return -1;
2703
302
}
2704
2705
/* Convert row "row" of node->sched into an isl_aff living in "ls"
2706
 * and return this isl_aff.
2707
 */
2708
static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
2709
  struct isl_sched_node *node, int row)
2710
897
{
2711
897
  int j;
2712
897
  isl_int v;
2713
897
  isl_aff *aff;
2714
897
2715
897
  isl_int_init(v);
2716
897
2717
897
  aff = isl_aff_zero_on_domain(ls);
2718
897
  isl_mat_get_element(node->sched, row, 0, &v);
2719
897
  aff = isl_aff_set_constant(aff, v);
2720
1.35k
  for (j = 0; 
j < node->nparam1.35k
;
++j460
)
{460
2721
460
    isl_mat_get_element(node->sched, row, 1 + j, &v);
2722
460
    aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
2723
460
  }
2724
2.41k
  for (j = 0; 
j < node->nvar2.41k
;
++j1.51k
)
{1.51k
2725
1.51k
    isl_mat_get_element(node->sched, row, 1 + node->nparam + j, &v);
2726
1.51k
    aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
2727
1.51k
  }
2728
897
2729
897
  isl_int_clear(v);
2730
897
2731
897
  return aff;
2732
897
}
2733
2734
/* Convert the "n" rows starting at "first" of node->sched into a multi_aff
2735
 * and return this multi_aff.
2736
 *
2737
 * The result is defined over the uncompressed node domain.
2738
 */
2739
static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
2740
  struct isl_sched_node *node, int first, int n)
2741
671
{
2742
671
  int i;
2743
671
  isl_space *space;
2744
671
  isl_local_space *ls;
2745
671
  isl_aff *aff;
2746
671
  isl_multi_aff *ma;
2747
671
  int nrow;
2748
671
2749
671
  if (!node)
2750
0
    return NULL;
2751
671
  nrow = isl_mat_rows(node->sched);
2752
671
  if (node->compressed)
2753
65
    space = isl_multi_aff_get_domain_space(node->decompress);
2754
671
  else
2755
606
    space = isl_space_copy(node->space);
2756
671
  ls = isl_local_space_from_space(isl_space_copy(space));
2757
671
  space = isl_space_from_domain(space);
2758
671
  space = isl_space_add_dims(space, isl_dim_out, n);
2759
671
  ma = isl_multi_aff_zero(space);
2760
671
2761
1.56k
  for (i = first; 
i < first + n1.56k
;
++i897
)
{897
2762
897
    aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
2763
897
    ma = isl_multi_aff_set_aff(ma, i - first, aff);
2764
897
  }
2765
671
2766
671
  isl_local_space_free(ls);
2767
671
2768
671
  if (node->compressed)
2769
65
    ma = isl_multi_aff_pullback_multi_aff(ma,
2770
65
          isl_multi_aff_copy(node->compress));
2771
671
2772
671
  return ma;
2773
671
}
2774
2775
/* Convert node->sched into a multi_aff and return this multi_aff.
2776
 *
2777
 * The result is defined over the uncompressed node domain.
2778
 */
2779
static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
2780
  struct isl_sched_node *node)
2781
171
{
2782
171
  int nrow;
2783
171
2784
171
  nrow = isl_mat_rows(node->sched);
2785
171
  return node_extract_partial_schedule_multi_aff(node, 0, nrow);
2786
171
}
2787
2788
/* Convert node->sched into a map and return this map.
2789
 *
2790
 * The result is cached in node->sched_map, which needs to be released
2791
 * whenever node->sched is updated.
2792
 * It is defined over the uncompressed node domain.
2793
 */
2794
static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
2795
454
{
2796
454
  if (
!node->sched_map454
)
{171
2797
171
    isl_multi_aff *ma;
2798
171
2799
171
    ma = node_extract_schedule_multi_aff(node);
2800
171
    node->sched_map = isl_map_from_multi_aff(ma);
2801
171
  }
2802
454
2803
454
  return isl_map_copy(node->sched_map);
2804
454
}
2805
2806
/* Construct a map that can be used to update a dependence relation
2807
 * based on the current schedule.
2808
 * That is, construct a map expressing that source and sink
2809
 * are executed within the same iteration of the current schedule.
2810
 * This map can then be intersected with the dependence relation.
2811
 * This is not the most efficient way, but this shouldn't be a critical
2812
 * operation.
2813
 */
2814
static __isl_give isl_map *specializer(struct isl_sched_node *src,
2815
  struct isl_sched_node *dst)
2816
198
{
2817
198
  isl_map *src_sched, *dst_sched;
2818
198
2819
198
  src_sched = node_extract_schedule(src);
2820
198
  dst_sched = node_extract_schedule(dst);
2821
198
  return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
2822
198
}
2823
2824
/* Intersect the domains of the nested relations in domain and range
2825
 * of "umap" with "map".
2826
 */
2827
static __isl_give isl_union_map *intersect_domains(
2828
  __isl_take isl_union_map *umap, __isl_keep isl_map *map)
2829
69
{
2830
69
  isl_union_set *uset;
2831
69
2832
69
  umap = isl_union_map_zip(umap);
2833
69
  uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
2834
69
  umap = isl_union_map_intersect_domain(umap, uset);
2835
69
  umap = isl_union_map_zip(umap);
2836
69
  return umap;
2837
69
}
2838
2839
/* Update the dependence relation of the given edge based
2840
 * on the current schedule.
2841
 * If the dependence is carried completely by the current schedule, then
2842
 * it is removed from the edge_tables.  It is kept in the list of edges
2843
 * as otherwise all edge_tables would have to be recomputed.
2844
 */
2845
static int update_edge(struct isl_sched_graph *graph,
2846
  struct isl_sched_edge *edge)
2847
198
{
2848
198
  int empty;
2849
198
  isl_map *id;
2850
198
2851
198
  id = specializer(edge->src, edge->dst);
2852
198
  edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
2853
198
  if (!edge->map)
2854
0
    goto error;
2855
198
2856
198
  
if (198
edge->tagged_condition198
)
{31
2857
31
    edge->tagged_condition =
2858
31
      intersect_domains(edge->tagged_condition, id);
2859
31
    if (!edge->tagged_condition)
2860
0
      goto error;
2861
31
  }
2862
198
  
if (198
edge->tagged_validity198
)
{38
2863
38
    edge->tagged_validity =
2864
38
      intersect_domains(edge->tagged_validity, id);
2865
38
    if (!edge->tagged_validity)
2866
0
      goto error;
2867
38
  }
2868
198
2869
198
  empty = isl_map_plain_is_empty(edge->map);
2870
198
  if (empty < 0)
2871
0
    goto error;
2872
198
  
if (198
empty198
)
2873
108
    graph_remove_edge(graph, edge);
2874
198
2875
198
  isl_map_free(id);
2876
198
  return 0;
2877
0
error:
2878
0
  isl_map_free(id);
2879
0
  return -1;
2880
198
}
2881
2882
/* Does the domain of "umap" intersect "uset"?
2883
 */
2884
static int domain_intersects(__isl_keep isl_union_map *umap,
2885
  __isl_keep isl_union_set *uset)
2886
39
{
2887
39
  int empty;
2888
39
2889
39
  umap = isl_union_map_copy(umap);
2890
39
  umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
2891
39
  empty = isl_union_map_is_empty(umap);
2892
39
  isl_union_map_free(umap);
2893
39
2894
39
  return empty < 0 ? 
-10
:
!empty39
;
2895
39
}
2896
2897
/* Does the range of "umap" intersect "uset"?
2898
 */
2899
static int range_intersects(__isl_keep isl_union_map *umap,
2900
  __isl_keep isl_union_set *uset)
2901
26
{
2902
26
  int empty;
2903
26
2904
26
  umap = isl_union_map_copy(umap);
2905
26
  umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
2906
26
  empty = isl_union_map_is_empty(umap);
2907
26
  isl_union_map_free(umap);
2908
26
2909
26
  return empty < 0 ? 
-10
:
!empty26
;
2910
26
}
2911
2912
/* Are the condition dependences of "edge" local with respect to
2913
 * the current schedule?
2914
 *
2915
 * That is, are domain and range of the condition dependences mapped
2916
 * to the same point?
2917
 *
2918
 * In other words, is the condition false?
2919
 */
2920
static int is_condition_false(struct isl_sched_edge *edge)
2921
29
{
2922
29
  isl_union_map *umap;
2923
29
  isl_map *map, *sched, *test;
2924
29
  int empty, local;
2925
29
2926
29
  empty = isl_union_map_is_empty(edge->tagged_condition);
2927
29
  if (
empty < 0 || 29
empty29
)
2928
0
    return empty;
2929
29
2930
29
  umap = isl_union_map_copy(edge->tagged_condition);
2931
29
  umap = isl_union_map_zip(umap);
2932
29
  umap = isl_union_set_unwrap(isl_union_map_domain(umap));
2933
29
  map = isl_map_from_union_map(umap);
2934
29
2935
29
  sched = node_extract_schedule(edge->src);
2936
29
  map = isl_map_apply_domain(map, sched);
2937
29
  sched = node_extract_schedule(edge->dst);
2938
29
  map = isl_map_apply_range(map, sched);
2939
29
2940
29
  test = isl_map_identity(isl_map_get_space(map));
2941
29
  local = isl_map_is_subset(map, test);
2942
29
  isl_map_free(map);
2943
29
  isl_map_free(test);
2944
29
2945
29
  return local;
2946
29
}
2947
2948
/* For each conditional validity constraint that is adjacent
2949
 * to a condition with domain in condition_source or range in condition_sink,
2950
 * turn it into an unconditional validity constraint.
2951
 */
2952
static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
2953
  __isl_take isl_union_set *condition_source,
2954
  __isl_take isl_union_set *condition_sink)
2955
9
{
2956
9
  int i;
2957
9
2958
9
  condition_source = isl_union_set_coalesce(condition_source);
2959
9
  condition_sink = isl_union_set_coalesce(condition_sink);
2960
9
2961
49
  for (i = 0; 
i < graph->n_edge49
;
++i40
)
{40
2962
40
    int adjacent;
2963
40
    isl_union_map *validity;
2964
40
2965
40
    if (!is_conditional_validity(&graph->edge[i]))
2966
17
      continue;
2967
23
    
if (23
is_validity(&graph->edge[i])23
)
2968
4
      continue;
2969
23
2970
19
    validity = graph->edge[i].tagged_validity;
2971
19
    adjacent = domain_intersects(validity, condition_sink);
2972
19
    if (
adjacent >= 0 && 19
!adjacent19
)
2973
18
      adjacent = range_intersects(validity, condition_source);
2974
19
    if (adjacent < 0)
2975
0
      goto error;
2976
19
    
if (19
!adjacent19
)
2977
17
      continue;
2978
19
2979
2
    set_validity(&graph->edge[i]);
2980
2
  }
2981
9
2982
9
  isl_union_set_free(condition_source);
2983
9
  isl_union_set_free(condition_sink);
2984
9
  return 0;
2985
0
error:
2986
0
  isl_union_set_free(condition_source);
2987
0
  isl_union_set_free(condition_sink);
2988
0
  return -1;
2989
9
}
2990
2991
/* Update the dependence relations of all edges based on the current schedule
2992
 * and enforce conditional validity constraints that are adjacent
2993
 * to satisfied condition constraints.
2994
 *
2995
 * First check if any of the condition constraints are satisfied
2996
 * (i.e., not local to the outer schedule) and keep track of
2997
 * their domain and range.
2998
 * Then update all dependence relations (which removes the non-local
2999
 * constraints).
3000
 * Finally, if any condition constraints turned out to be satisfied,
3001
 * then turn all adjacent conditional validity constraints into
3002
 * unconditional validity constraints.
3003
 */
3004
static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
3005
81
{
3006
81
  int i;
3007
81
  int any = 0;
3008
81
  isl_union_set *source, *sink;
3009
81
3010
81
  source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3011
81
  sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3012
279
  for (i = 0; 
i < graph->n_edge279
;
++i198
)
{198
3013
198
    int local;
3014
198
    isl_union_set *uset;
3015
198
    isl_union_map *umap;
3016
198
3017
198
    if (!is_condition(&graph->edge[i]))
3018
167
      continue;
3019
31
    
if (31
is_local(&graph->edge[i])31
)
3020
17
      continue;
3021
14
    local = is_condition_false(&graph->edge[i]);
3022
14
    if (local < 0)
3023
0
      goto error;
3024
14
    
if (14
local14
)
3025
4
      continue;
3026
14
3027
10
    any = 1;
3028
10
3029
10
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3030
10
    uset = isl_union_map_domain(umap);
3031
10
    source = isl_union_set_union(source, uset);
3032
10
3033
10
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3034
10
    uset = isl_union_map_range(umap);
3035
10
    sink = isl_union_set_union(sink, uset);
3036
10
  }
3037
81
3038
279
  
for (i = graph->n_edge - 1; 81
i >= 0279
;
--i198
)
{198
3039
198
    if (update_edge(graph, &graph->edge[i]) < 0)
3040
0
      goto error;
3041
198
  }
3042
81
3043
81
  
if (81
any81
)
3044
9
    return unconditionalize_adjacent_validity(graph, source, sink);
3045
81
3046
72
  isl_union_set_free(source);
3047
72
  isl_union_set_free(sink);
3048
72
  return 0;
3049
0
error:
3050
0
  isl_union_set_free(source);
3051
0
  isl_union_set_free(sink);
3052
0
  return -1;
3053
81
}
3054
3055
static void next_band(struct isl_sched_graph *graph)
3056
83
{
3057
83
  graph->band_start = graph->n_total_row;
3058
83
}
3059
3060
/* Return the union of the universe domains of the nodes in "graph"
3061
 * that satisfy "pred".
3062
 */
3063
static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
3064
  struct isl_sched_graph *graph,
3065
  int (*pred)(struct isl_sched_node *node, int data), int data)
3066
157
{
3067
157
  int i;
3068
157
  isl_set *set;
3069
157
  isl_union_set *dom;
3070
157
3071
263
  for (i = 0; 
i < graph->n263
;
++i106
)
3072
263
    
if (263
pred(&graph->node[i], data)263
)
3073
157
      break;
3074
157
3075
157
  if (i >= graph->n)
3076
0
    isl_die(ctx, isl_error_internal,
3077
157
      "empty component", return NULL);
3078
157
3079
157
  set = isl_set_universe(isl_space_copy(graph->node[i].space));
3080
157
  dom = isl_union_set_from_set(set);
3081
157
3082
272
  for (i = i + 1; 
i < graph->n272
;
++i115
)
{115
3083
115
    if (!pred(&graph->node[i], data))
3084
108
      continue;
3085
7
    set = isl_set_universe(isl_space_copy(graph->node[i].space));
3086
7
    dom = isl_union_set_union(dom, isl_union_set_from_set(set));
3087
7
  }
3088
157
3089
157
  return dom;
3090
157
}
3091
3092
/* Return a list of unions of universe domains, where each element
3093
 * in the list corresponds to an SCC (or WCC) indexed by node->scc.
3094
 */
3095
static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
3096
  struct isl_sched_graph *graph)
3097
69
{
3098
69
  int i;
3099
69
  isl_union_set_list *filters;
3100
69
3101
69
  filters = isl_union_set_list_alloc(ctx, graph->scc);
3102
222
  for (i = 0; 
i < graph->scc222
;
++i153
)
{153
3103
153
    isl_union_set *dom;
3104
153
3105
153
    dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
3106
153
    filters = isl_union_set_list_add(filters, dom);
3107
153
  }
3108
69
3109
69
  return filters;
3110
69
}
3111
3112
/* Return a list of two unions of universe domains, one for the SCCs up
3113
 * to and including graph->src_scc and another for the other SCCs.
3114
 */
3115
static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
3116
  struct isl_sched_graph *graph)
3117
2
{
3118
2
  isl_union_set *dom;
3119
2
  isl_union_set_list *filters;
3120
2
3121
2
  filters = isl_union_set_list_alloc(ctx, 2);
3122
2
  dom = isl_sched_graph_domain(ctx, graph,
3123
2
          &node_scc_at_most, graph->src_scc);
3124
2
  filters = isl_union_set_list_add(filters, dom);
3125
2
  dom = isl_sched_graph_domain(ctx, graph,
3126
2
          &node_scc_at_least, graph->src_scc + 1);
3127
2
  filters = isl_union_set_list_add(filters, dom);
3128
2
3129
2
  return filters;
3130
2
}
3131
3132
/* Copy nodes that satisfy node_pred from the src dependence graph
3133
 * to the dst dependence graph.
3134
 */
3135
static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
3136
  int (*node_pred)(struct isl_sched_node *node, int data), int data)
3137
122
{
3138
122
  int i;
3139
122
3140
122
  dst->n = 0;
3141
428
  for (i = 0; 
i < src->n428
;
++i306
)
{306
3142
306
    int j;
3143
306
3144
306
    if (!node_pred(&src->node[i], data))
3145
160
      continue;
3146
306
3147
146
    j = dst->n;
3148
146
    dst->node[j].space = isl_space_copy(src->node[i].space);
3149
146
    dst->node[j].compressed = src->node[i].compressed;
3150
146
    dst->node[j].hull = isl_set_copy(src->node[i].hull);
3151
146
    dst->node[j].compress =
3152
146
      isl_multi_aff_copy(src->node[i].compress);
3153
146
    dst->node[j].decompress =
3154
146
      isl_multi_aff_copy(src->node[i].decompress);
3155
146
    dst->node[j].nvar = src->node[i].nvar;
3156
146
    dst->node[j].nparam = src->node[i].nparam;
3157
146
    dst->node[j].sched = isl_mat_copy(src->node[i].sched);
3158
146
    dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
3159
146
    dst->node[j].coincident = src->node[i].coincident;
3160
146
    dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
3161
146
    dst->node[j].max = isl_vec_copy(src->node[i].max);
3162
146
    dst->n++;
3163
146
3164
146
    if (
!dst->node[j].space || 146
!dst->node[j].sched146
)
3165
0
      return -1;
3166
146
    
if (146
dst->node[j].compressed &&146
3167
8
        
(!dst->node[j].hull || 8
!dst->node[j].compress8
||
3168
8
         !dst->node[j].decompress))
3169
0
      return -1;
3170
146
  }
3171
122
3172
122
  return 0;
3173
122
}
3174
3175
/* Copy non-empty edges that satisfy edge_pred from the src dependence graph
3176
 * to the dst dependence graph.
3177
 * If the source or destination node of the edge is not in the destination
3178
 * graph, then it must be a backward proximity edge and it should simply
3179
 * be ignored.
3180
 */
3181
static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
3182
  struct isl_sched_graph *src,
3183
  int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
3184
122
{
3185
122
  int i;
3186
122
  enum isl_edge_type t;
3187
122
3188
122
  dst->n_edge = 0;
3189
462
  for (i = 0; 
i < src->n_edge462
;
++i340
)
{340
3190
340
    struct isl_sched_edge *edge = &src->edge[i];
3191
340
    isl_map *map;
3192
340
    isl_union_map *tagged_condition;
3193
340
    isl_union_map *tagged_validity;
3194
340
    struct isl_sched_node *dst_src, *dst_dst;
3195
340
3196
340
    if (!edge_pred(edge, data))
3197
260
      continue;
3198
340
3199
80
    
if (80
isl_map_plain_is_empty(edge->map)80
)
3200
9
      continue;
3201
80
3202
71
    dst_src = graph_find_node(ctx, dst, edge->src->space);
3203
71
    dst_dst = graph_find_node(ctx, dst, edge->dst->space);
3204
71
    if (
!dst_src || 71
!dst_dst71
)
{0
3205
0
      if (
is_validity(edge) || 0
is_conditional_validity(edge)0
)
3206
0
        isl_die(ctx, isl_error_internal,
3207
0
          "backward (conditional) validity edge",
3208
0
          return -1);
3209
0
      continue;
3210
0
    }
3211
71
3212
71
    map = isl_map_copy(edge->map);
3213
71
    tagged_condition = isl_union_map_copy(edge->tagged_condition);
3214
71
    tagged_validity = isl_union_map_copy(edge->tagged_validity);
3215
71
3216
71
    dst->edge[dst->n_edge].src = dst_src;
3217
71
    dst->edge[dst->n_edge].dst = dst_dst;
3218
71
    dst->edge[dst->n_edge].map = map;
3219
71
    dst->edge[dst->n_edge].tagged_condition = tagged_condition;
3220
71
    dst->edge[dst->n_edge].tagged_validity = tagged_validity;
3221
71
    dst->edge[dst->n_edge].types = edge->types;
3222
71
    dst->n_edge++;
3223
71
3224
71
    if (
edge->tagged_condition && 71
!tagged_condition9
)
3225
0
      return -1;
3226
71
    
if (71
edge->tagged_validity && 71
!tagged_validity6
)
3227
0
      return -1;
3228
71
3229
426
    
for (t = isl_edge_first; 71
t <= isl_edge_last426
;
++t355
)
{355
3230
355
      if (edge !=
3231
355
          graph_find_edge(src, t, edge->src, edge->dst))
3232
174
        continue;
3233
181
      
if (181
graph_edge_table_add(ctx, dst, t,181
3234
181
              &dst->edge[dst->n_edge - 1]) < 0)
3235
0
        return -1;
3236
181
    }
3237
71
  }
3238
122
3239
122
  return 0;
3240
122
}
3241
3242
/* Compute the maximal number of variables over all nodes.
3243
 * This is the maximal number of linearly independent schedule
3244
 * rows that we need to compute.
3245
 * Just in case we end up in a part of the dependence graph
3246
 * with only lower-dimensional domains, we make sure we will
3247
 * compute the required amount of extra linearly independent rows.
3248
 */
3249
static int compute_maxvar(struct isl_sched_graph *graph)
3250
247
{
3251
247
  int i;
3252
247
3253
247
  graph->maxvar = 0;
3254
602
  for (i = 0; 
i < graph->n602
;
++i355
)
{355
3255
355
    struct isl_sched_node *node = &graph->node[i];
3256
355
    int nvar;
3257
355
3258
355
    if (node_update_cmap(node) < 0)
3259
0
      return -1;
3260
355
    nvar = node->nvar + graph->n_row - node->rank;
3261
355
    if (nvar > graph->maxvar)
3262
255
      graph->maxvar = nvar;
3263
355
  }
3264
247
3265
247
  return 0;
3266
247
}
3267
3268
/* Extract the subgraph of "graph" that consists of the node satisfying
3269
 * "node_pred" and the edges satisfying "edge_pred" and store
3270
 * the result in "sub".
3271
 */
3272
static int extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
3273
  int (*node_pred)(struct isl_sched_node *node, int data),
3274
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3275
  int data, struct isl_sched_graph *sub)
3276
122
{
3277
122
  int i, n = 0, n_edge = 0;
3278
122
  int t;
3279
122
3280
428
  for (i = 0; 
i < graph->n428
;
++i306
)
3281
306
    
if (306
node_pred(&graph->node[i], data)306
)
3282
146
      ++n;
3283
462
  for (i = 0; 
i < graph->n_edge462
;
++i340
)
3284
340
    
if (340
edge_pred(&graph->edge[i], data)340
)
3285
80
      ++n_edge;
3286
122
  if (graph_alloc(ctx, sub, n, n_edge) < 0)
3287
0
    return -1;
3288
122
  
if (122
copy_nodes(sub, graph, node_pred, data) < 0122
)
3289
0
    return -1;
3290
122
  
if (122
graph_init_table(ctx, sub) < 0122
)
3291
0
    return -1;
3292
732
  
for (t = 0; 122
t <= isl_edge_last732
;
++t610
)
3293
610
    sub->max_edge[t] = graph->max_edge[t];
3294
122
  if (graph_init_edge_tables(ctx, sub) < 0)
3295
0
    return -1;
3296
122
  
if (122
copy_edges(ctx, sub, graph, edge_pred, data) < 0122
)
3297
0
    return -1;
3298
122
  sub->n_row = graph->n_row;
3299
122
  sub->max_row = graph->max_row;
3300
122
  sub->n_total_row = graph->n_total_row;
3301
122
  sub->band_start = graph->band_start;
3302
122
3303
122
  return 0;
3304
122
}
3305
3306
static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
3307
  struct isl_sched_graph *graph);
3308
static __isl_give isl_schedule_node *compute_schedule_wcc(
3309
  isl_schedule_node *node, struct isl_sched_graph *graph);
3310
3311
/* Compute a schedule for a subgraph of "graph".  In particular, for
3312
 * the graph composed of nodes that satisfy node_pred and edges that
3313
 * that satisfy edge_pred.
3314
 * If the subgraph is known to consist of a single component, then wcc should
3315
 * be set and then we call compute_schedule_wcc on the constructed subgraph.
3316
 * Otherwise, we call compute_schedule, which will check whether the subgraph
3317
 * is connected.
3318
 *
3319
 * The schedule is inserted at "node" and the updated schedule node
3320
 * is returned.
3321
 */
3322
static __isl_give isl_schedule_node *compute_sub_schedule(
3323
  __isl_take isl_schedule_node *node, isl_ctx *ctx,
3324
  struct isl_sched_graph *graph,
3325
  int (*node_pred)(struct isl_sched_node *node, int data),
3326
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3327
  int data, int wcc)
3328
58
{
3329
58
  struct isl_sched_graph split = { 0 };
3330
58
3331
58
  if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
3332
58
        &split) < 0)
3333
0
    goto error;
3334
58
3335
58
  
if (58
wcc58
)
3336
44
    node = compute_schedule_wcc(node, &split);
3337
58
  else
3338
14
    node = compute_schedule(node, &split);
3339
58
3340
58
  graph_free(ctx, &split);
3341
58
  return node;
3342
0
error:
3343
0
  graph_free(ctx, &split);
3344
0
  return isl_schedule_node_free(node);
3345
58
}
3346
3347
static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
3348
498
{
3349
204
  return edge->src->scc == scc && edge->dst->scc == scc;
3350
498
}
3351
3352
static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
3353
14
{
3354
14
  return edge->dst->scc <= scc;
3355
14
}
3356
3357
static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
3358
14
{
3359
14
  return edge->src->scc >= scc;
3360
14
}
3361
3362
/* Reset the current band by dropping all its schedule rows.
3363
 */
3364
static int reset_band(struct isl_sched_graph *graph)
3365
8
{
3366
8
  int i;
3367
8
  int drop;
3368
8
3369
8
  drop = graph->n_total_row - graph->band_start;
3370
8
  graph->n_total_row -= drop;
3371
8
  graph->n_row -= drop;
3372
8
3373
28
  for (i = 0; 
i < graph->n28
;
++i20
)
{20
3374
20
    struct isl_sched_node *node = &graph->node[i];
3375
20
3376
20
    isl_map_free(node->sched_map);
3377
20
    node->sched_map = NULL;
3378
20
3379
20
    node->sched = isl_mat_drop_rows(node->sched,
3380
20
            graph->band_start, drop);
3381
20
3382
20
    if (!node->sched)
3383
0
      return -1;
3384
20
  }
3385
8
3386
8
  return 0;
3387
8
}
3388
3389
/* Split the current graph into two parts and compute a schedule for each
3390
 * part individually.  In particular, one part consists of all SCCs up
3391
 * to and including graph->src_scc, while the other part contains the other
3392
 * SCCs.  The split is enforced by a sequence node inserted at position "node"
3393
 * in the schedule tree.  Return the updated schedule node.
3394
 * If either of these two parts consists of a sequence, then it is spliced
3395
 * into the sequence containing the two parts.
3396
 *
3397
 * The current band is reset. It would be possible to reuse
3398
 * the previously computed rows as the first rows in the next
3399
 * band, but recomputing them may result in better rows as we are looking
3400
 * at a smaller part of the dependence graph.
3401
 */
3402
static __isl_give isl_schedule_node *compute_split_schedule(
3403
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3404
2
{
3405
2
  int is_seq;
3406
2
  isl_ctx *ctx;
3407
2
  isl_union_set_list *filters;
3408
2
3409
2
  if (!node)
3410
0
    return NULL;
3411
2
3412
2
  
if (2
reset_band(graph) < 02
)
3413
0
    return isl_schedule_node_free(node);
3414
2
3415
2
  next_band(graph);
3416
2
3417
2
  ctx = isl_schedule_node_get_ctx(node);
3418
2
  filters = extract_split(ctx, graph);
3419
2
  node = isl_schedule_node_insert_sequence(node, filters);
3420
2
  node = isl_schedule_node_child(node, 1);
3421
2
  node = isl_schedule_node_child(node, 0);
3422
2
3423
2
  node = compute_sub_schedule(node, ctx, graph,
3424
2
        &node_scc_at_least, &edge_src_scc_at_least,
3425
2
        graph->src_scc + 1, 0);
3426
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3427
2
  node = isl_schedule_node_parent(node);
3428
2
  node = isl_schedule_node_parent(node);
3429
2
  if (is_seq)
3430
0
    node = isl_schedule_node_sequence_splice_child(node, 1);
3431
2
  node = isl_schedule_node_child(node, 0);
3432
2
  node = isl_schedule_node_child(node, 0);
3433
2
  node = compute_sub_schedule(node, ctx, graph,
3434
2
        &node_scc_at_most, &edge_dst_scc_at_most,
3435
2
        graph->src_scc, 0);
3436
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3437
2
  node = isl_schedule_node_parent(node);
3438
2
  node = isl_schedule_node_parent(node);
3439
2
  if (is_seq)
3440
0
    node = isl_schedule_node_sequence_splice_child(node, 0);
3441
2
3442
2
  return node;
3443
2
}
3444
3445
/* Insert a band node at position "node" in the schedule tree corresponding
3446
 * to the current band in "graph".  Mark the band node permutable
3447
 * if "permutable" is set.
3448
 * The partial schedules and the coincidence property are extracted
3449
 * from the graph nodes.
3450
 * Return the updated schedule node.
3451
 */
3452
static __isl_give isl_schedule_node *insert_current_band(
3453
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3454
  int permutable)
3455
161
{
3456
161
  int i;
3457
161
  int start, end, n;
3458
161
  isl_multi_aff *ma;
3459
161
  isl_multi_pw_aff *mpa;
3460
161
  isl_multi_union_pw_aff *mupa;
3461
161
3462
161
  if (!node)
3463
0
    return NULL;
3464
161
3465
161
  
if (161
graph->n < 1161
)
3466
0
    isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
3467
161
      "graph should have at least one node",
3468
161
      return isl_schedule_node_free(node));
3469
161
3470
161
  start = graph->band_start;
3471
161
  end = graph->n_total_row;
3472
161
  n = end - start;
3473
161
3474
161
  ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
3475
161
  mpa = isl_multi_pw_aff_from_multi_aff(ma);
3476
161
  mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3477
161
3478
237
  for (i = 1; 
i < graph->n237
;
++i76
)
{76
3479
76
    isl_multi_union_pw_aff *mupa_i;
3480
76
3481
76
    ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
3482
76
                start, n);
3483
76
    mpa = isl_multi_pw_aff_from_multi_aff(ma);
3484
76
    mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3485
76
    mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
3486
76
  }
3487
161
  node = isl_schedule_node_insert_partial_schedule(node, mupa);
3488
161
3489
414
  for (i = 0; 
i < n414
;
++i253
)
3490
253
    node = isl_schedule_node_band_member_set_coincident(node, i,
3491
253
          graph->node[0].coincident[start + i]);
3492
161
  node = isl_schedule_node_band_set_permutable(node, permutable);
3493
161
3494
161
  return node;
3495
161
}
3496
3497
/* Update the dependence relations based on the current schedule,
3498
 * add the current band to "node" and then continue with the computation
3499
 * of the next band.
3500
 * Return the updated schedule node.
3501
 */
3502
static __isl_give isl_schedule_node *compute_next_band(
3503
  __isl_take isl_schedule_node *node,
3504
  struct isl_sched_graph *graph, int permutable)
3505
38
{
3506
38
  isl_ctx *ctx;
3507
38
3508
38
  if (!node)
3509
0
    return NULL;
3510
38
3511
38
  ctx = isl_schedule_node_get_ctx(node);
3512
38
  if (update_edges(ctx, graph) < 0)
3513
0
    return isl_schedule_node_free(node);
3514
38
  node = insert_current_band(node, graph, permutable);
3515
38
  next_band(graph);
3516
38
3517
38
  node = isl_schedule_node_child(node, 0);
3518
38
  node = compute_schedule(node, graph);
3519
38
  node = isl_schedule_node_parent(node);
3520
38
3521
38
  return node;
3522
38
}
3523
3524
/* Add the constraints "coef" derived from an edge from "node" to itself
3525
 * to graph->lp in order to respect the dependences and to try and carry them.
3526
 * "pos" is the sequence number of the edge that needs to be carried.
3527
 * "coef" represents general constraints on coefficients (c_0, c_n, c_x)
3528
 * of valid constraints for (y - x) with x and y instances of the node.
3529
 *
3530
 * The constraints added to graph->lp need to enforce
3531
 *
3532
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_j_0 + c_j_n n + c_j_x x)
3533
 *  = c_j_x (y - x) >= e_i
3534
 *
3535
 * for each (x,y) in the dependence relation of the edge.
3536
 * That is, (-e_i, 0, c_j_x) needs to be plugged in for (c_0, c_n, c_x),
3537
 * taking into account that each coefficient in c_j_x is represented
3538
 * as a pair of non-negative coefficients.
3539
 */
3540
static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
3541
  struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
3542
45
{
3543
45
  int offset;
3544
45
  isl_ctx *ctx;
3545
45
  isl_dim_map *dim_map;
3546
45
3547
45
  if (!coef)
3548
0
    return isl_stat_error;
3549
45
3550
45
  ctx = isl_basic_set_get_ctx(coef);
3551
45
  offset = coef_var_offset(coef);
3552
45
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
3553
45
  isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3554
45
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3555
45
3556
45
  return isl_stat_ok;
3557
45
}
3558
3559
/* Add the constraints "coef" derived from an edge from "src" to "dst"
3560
 * to graph->lp in order to respect the dependences and to try and carry them.
3561
 * "pos" is the sequence number of the edge that needs to be carried.
3562
 * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
3563
 * of valid constraints for (x, y) with x and y instances of "src" and "dst".
3564
 *
3565
 * The constraints added to graph->lp need to enforce
3566
 *
3567
 *  (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
3568
 *
3569
 * for each (x,y) in the dependence relation of the edge.
3570
 * That is,
3571
 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3572
 * needs to be plugged in for (c_0, c_n, c_x, c_y),
3573
 * taking into account that each coefficient in c_j_x and c_k_x is represented
3574
 * as a pair of non-negative coefficients.
3575
 */
3576
static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
3577
  struct isl_sched_node *src, struct isl_sched_node *dst,
3578
  __isl_take isl_basic_set *coef, int pos)
3579
14
{
3580
14
  int offset;
3581
14
  isl_ctx *ctx;
3582
14
  isl_dim_map *dim_map;
3583
14
3584
14
  if (!coef)
3585
0
    return isl_stat_error;
3586
14
3587
14
  ctx = isl_basic_set_get_ctx(coef);
3588
14
  offset = coef_var_offset(coef);
3589
14
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
3590
14
  isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3591
14
  graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3592
14
3593
14
  return isl_stat_ok;
3594
14
}
3595
3596
/* Data structure collecting information used during the construction
3597
 * of an LP for carrying dependences.
3598
 *
3599
 * "intra" is a sequence of coefficient constraints for intra-node edges.
3600
 * "inter" is a sequence of coefficient constraints for inter-node edges.
3601
 */
3602
struct isl_carry {
3603
  isl_basic_set_list *intra;
3604
  isl_basic_set_list *inter;
3605
};
3606
3607
/* Free all the data stored in "carry".
3608
 */
3609
static void isl_carry_clear(struct isl_carry *carry)
3610
30
{
3611
30
  isl_basic_set_list_free(carry->intra);
3612
30
  isl_basic_set_list_free(carry->inter);
3613
30
}
3614
3615
/* Return a pointer to the node in "graph" that lives in "space".
3616
 * If the requested node has been compressed, then "space"
3617
 * corresponds to the compressed space.
3618
 *
3619
 * First try and see if "space" is the space of an uncompressed node.
3620
 * If so, return that node.
3621
 * Otherwise, "space" was constructed by construct_compressed_id and
3622
 * contains a user pointer pointing to the node in the tuple id.
3623
 */
3624
static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
3625
  struct isl_sched_graph *graph, __isl_keep isl_space *space)
3626
73
{
3627
73
  isl_id *id;
3628
73
  struct isl_sched_node *node;
3629
73
3630
73
  if (!space)
3631
0
    return NULL;
3632
73
3633
73
  node = graph_find_node(ctx, graph, space);
3634
73
  if (node)
3635
73
    return node;
3636
73
3637
0
  id = isl_space_get_tuple_id(space, isl_dim_set);
3638
0
  node = isl_id_get_user(id);
3639
0
  isl_id_free(id);
3640
0
3641
0
  if (!node)
3642
0
    return NULL;
3643
0
3644
0
  
if (0
!(node >= &graph->node[0] && 0
node < &graph->node[graph->n]0
))
3645
0
    isl_die(ctx, isl_error_internal,
3646
0
      "space points to invalid node", return NULL);
3647
0
3648
0
  return node;
3649
0
}
3650
3651
/* Internal data structure for add_all_constraints.
3652
 *
3653
 * "graph" is the schedule constraint graph for which an LP problem
3654
 * is being constructed.
3655
 * "pos" is the position of the next edge that needs to be carried.
3656
 */
3657
struct isl_add_all_constraints_data {
3658
  isl_ctx *ctx;
3659
  struct isl_sched_graph *graph;
3660
  int pos;
3661
};
3662
3663
/* Add the constraints "coef" derived from an edge from a node to itself
3664
 * to data->graph->lp in order to respect the dependences and
3665
 * to try and carry them.
3666
 *
3667
 * The space of "coef" is of the form
3668
 *
3669
 *  coefficients[[c_cst, c_n] -> S[c_x]]
3670
 *
3671
 * with S[c_x] the (compressed) space of the node.
3672
 * Extract the node from the space and call add_intra_constraints.
3673
 */
3674
static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
3675
45
{
3676
45
  struct isl_add_all_constraints_data *data = user;
3677
45
  isl_space *space;
3678
45
  struct isl_sched_node *node;
3679
45
3680
45
  space = isl_basic_set_get_space(coef);
3681
45
  space = isl_space_range(isl_space_unwrap(space));
3682
45
  node = graph_find_compressed_node(data->ctx, data->graph, space);
3683
45
  isl_space_free(space);
3684
45
  return add_intra_constraints(data->graph, node, coef, data->pos++);
3685
45
}
3686
3687
/* Add the constraints "coef" derived from an edge from a node j
3688
 * to a node k to data->graph->lp in order to respect the dependences and
3689
 * to try and carry them.
3690
 *
3691
 * The space of "coef" is of the form
3692
 *
3693
 *  coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
3694
 *
3695
 * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
3696
 * Extract the nodes from the space and call add_inter_constraints.
3697
 */
3698
static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
3699
14
{
3700
14
  struct isl_add_all_constraints_data *data = user;
3701
14
  isl_space *space, *dom;
3702
14
  struct isl_sched_node *src, *dst;
3703
14
3704
14
  space = isl_basic_set_get_space(coef);
3705
14
  space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
3706
14
  dom = isl_space_domain(isl_space_copy(space));
3707
14
  src = graph_find_compressed_node(data->ctx, data->graph, dom);
3708
14
  isl_space_free(dom);
3709
14
  space = isl_space_range(space);
3710
14
  dst = graph_find_compressed_node(data->ctx, data->graph, space);
3711
14
  isl_space_free(space);
3712
14
3713
14
  return add_inter_constraints(data->graph, src, dst, coef, data->pos++);
3714
14
}
3715
3716
/* Add constraints to graph->lp that force all (conditional) validity
3717
 * dependences to be respected and attempt to carry them.
3718
 * "intra" is the sequence of coefficient constraints for intra-node edges.
3719
 * "inter" is the sequence of coefficient constraints for inter-node edges.
3720
 */
3721
static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
3722
  __isl_keep isl_basic_set_list *intra,
3723
  __isl_keep isl_basic_set_list *inter)
3724
29
{
3725
29
  struct isl_add_all_constraints_data data = { ctx, graph };
3726
29
3727
29
  data.pos = 0;
3728
29
  if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
3729
0
    return isl_stat_error;
3730
29
  
if (29
isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 029
)
3731
0
    return isl_stat_error;
3732
29
  return isl_stat_ok;
3733
29
}
3734
3735
/* Internal data structure for count_all_constraints
3736
 * for keeping track of the number of equality and inequality constraints.
3737
 */
3738
struct isl_sched_count {
3739
  int n_eq;
3740
  int n_ineq;
3741
};
3742
3743
/* Add the number of equality and inequality constraints of "bset"
3744
 * to data->n_eq and data->n_ineq.
3745
 */
3746
static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
3747
59
{
3748
59
  struct isl_sched_count *data = user;
3749
59
3750
59
  data->n_eq += isl_basic_set_n_equality(bset);
3751
59
  data->n_ineq += isl_basic_set_n_inequality(bset);
3752
59
  isl_basic_set_free(bset);
3753
59
3754
59
  return isl_stat_ok;
3755
59
}
3756
3757
/* Count the number of equality and inequality constraints
3758
 * that will be added to the carry_lp problem.
3759
 * We count each edge exactly once.
3760
 * "intra" is the sequence of coefficient constraints for intra-node edges.
3761
 * "inter" is the sequence of coefficient constraints for inter-node edges.
3762
 */
3763
static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
3764
  __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
3765
29
{
3766
29
  struct isl_sched_count data;
3767
29
3768
29
  data.n_eq = data.n_ineq = 0;
3769
29
  if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
3770
0
    return isl_stat_error;
3771
29
  
if (29
isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 029
)
3772
0
    return isl_stat_error;
3773
29
3774
29
  *n_eq = data.n_eq;
3775
29
  *n_ineq = data.n_ineq;
3776
29
3777
29
  return isl_stat_ok;
3778
29
}
3779
3780
/* Construct an LP problem for finding schedule coefficients
3781
 * such that the schedule carries as many validity dependences as possible.
3782
 * In particular, for each dependence i, we bound the dependence distance
3783
 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
3784
 * of all e_i's.  Dependences with e_i = 0 in the solution are simply
3785
 * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
3786
 * "intra" is the sequence of coefficient constraints for intra-node edges.
3787
 * "inter" is the sequence of coefficient constraints for inter-node edges.
3788
 * "n_edge" is the total number of edges.
3789
 *
3790
 * All variables of the LP are non-negative.  The actual coefficients
3791
 * may be negative, so each coefficient is represented as the difference
3792
 * of two non-negative variables.  The negative part always appears
3793
 * immediately before the positive part.
3794
 * Other than that, the variables have the following order
3795
 *
3796
 *  - sum of (1 - e_i) over all edges
3797
 *  - sum of all c_n coefficients
3798
 *    (unconstrained when computing non-parametric schedules)
3799
 *  - sum of positive and negative parts of all c_x coefficients
3800
 *  - for each edge
3801
 *    - e_i
3802
 *  - for each node
3803
 *    - c_i_0
3804
 *    - c_i_n (if parametric)
3805
 *    - positive and negative parts of c_i_x
3806
 *
3807
 * The constraints are those from the (validity) edges plus three equalities
3808
 * to express the sums and n_edge inequalities to express e_i <= 1.
3809
 */
3810
static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
3811
  int n_edge, __isl_keep isl_basic_set_list *intra,
3812
  __isl_keep isl_basic_set_list *inter)
3813
29
{
3814
29
  int i;
3815
29
  int k;
3816
29
  isl_space *dim;
3817
29
  unsigned total;
3818
29
  int n_eq, n_ineq;
3819
29
3820
29
  total = 3 + n_edge;
3821
66
  for (i = 0; 
i < graph->n66
;
++i37
)
{37
3822
37
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
3823
37
    node->start = total;
3824
37
    total += 1 + node->nparam + 2 * node->nvar;
3825
37
  }
3826
29
3827
29
  if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
3828
0
    return isl_stat_error;
3829
29
3830
29
  dim = isl_space_set_alloc(ctx, 0, total);
3831
29
  isl_basic_set_free(graph->lp);
3832
29
  n_eq += 3;
3833
29
  n_ineq += n_edge;
3834
29
  graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
3835
29
  graph->lp = isl_basic_set_set_rational(graph->lp);
3836
29
3837
29
  k = isl_basic_set_alloc_equality(graph->lp);
3838
29
  if (k < 0)
3839
0
    return isl_stat_error;
3840
29
  isl_seq_clr(graph->lp->eq[k], 1 + total);
3841
29
  isl_int_set_si(graph->lp->eq[k][0], -n_edge);
3842
29
  isl_int_set_si(graph->lp->eq[k][1], 1);
3843
88
  for (i = 0; 
i < n_edge88
;
++i59
)
3844
59
    isl_int_set_si(graph->lp->eq[k][4 + i], 1);
3845
29
3846
29
  if (add_param_sum_constraint(graph, 1) < 0)
3847
0
    return isl_stat_error;
3848
29
  
if (29
add_var_sum_constraint(graph, 2) < 029
)
3849
0
    return isl_stat_error;
3850
29
3851
88
  
for (i = 0; 29
i < n_edge88
;
++i59
)
{59
3852
59
    k = isl_basic_set_alloc_inequality(graph->lp);
3853
59
    if (k < 0)
3854
0
      return isl_stat_error;
3855
59
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
3856
59
    isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
3857
59
    isl_int_set_si(graph->lp->ineq[k][0], 1);
3858
59
  }
3859
29
3860
29
  
if (29
add_all_constraints(ctx, graph, intra, inter) < 029
)
3861
0
    return isl_stat_error;
3862
29
3863
29
  return isl_stat_ok;
3864
29
}
3865
3866
static __isl_give isl_schedule_node *compute_component_schedule(
3867
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3868
  int wcc);
3869
3870
/* Comparison function for sorting the statements based on
3871
 * the corresponding value in "r".
3872
 */
3873
static int smaller_value(const void *a, const void *b, void *data)
3874
8
{
3875
8
  isl_vec *r = data;
3876
8
  const int *i1 = a;
3877
8
  const int *i2 = b;
3878
8
3879
8
  return isl_int_cmp(r->el[*i1], r->el[*i2]);
3880
8
}
3881
3882
/* If the schedule_split_scaled option is set and if the linear
3883
 * parts of the scheduling rows for all nodes in the graphs have
3884
 * a non-trivial common divisor, then split off the remainder of the
3885
 * constant term modulo this common divisor from the linear part.
3886
 * Otherwise, insert a band node directly and continue with
3887
 * the construction of the schedule.
3888
 *
3889
 * If a non-trivial common divisor is found, then
3890
 * the linear part is reduced and the remainder is enforced
3891
 * by a sequence node with the children placed in the order
3892
 * of this remainder.
3893
 * In particular, we assign an scc index based on the remainder and
3894
 * then rely on compute_component_schedule to insert the sequence and
3895
 * to continue the schedule construction on each part.
3896
 */
3897
static __isl_give isl_schedule_node *split_scaled(
3898
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3899
29
{
3900
29
  int i;
3901
29
  int row;
3902
29
  int scc;
3903
29
  isl_ctx *ctx;
3904
29
  isl_int gcd, gcd_i;
3905
29
  isl_vec *r;
3906
29
  int *order;
3907
29
3908
29
  if (!node)
3909
0
    return NULL;
3910
29
3911
29
  ctx = isl_schedule_node_get_ctx(node);
3912
29
  if (!ctx->opt->schedule_split_scaled)
3913
0
    return compute_next_band(node, graph, 0);
3914
29
  
if (29
graph->n <= 129
)
3915
23
    return compute_next_band(node, graph, 0);
3916
29
3917
6
  
isl_int_init6
(gcd);6
3918
6
  isl_int_init(gcd_i);
3919
6
3920
6
  isl_int_set_si(gcd, 0);
3921
6
3922
6
  row = isl_mat_rows(graph->node[0].sched) - 1;
3923
6
3924
20
  for (i = 0; 
i < graph->n20
;
++i14
)
{14
3925
14
    struct isl_sched_node *node = &graph->node[i];
3926
14
    int cols = isl_mat_cols(node->sched);
3927
14
3928
14
    isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
3929
14
    isl_int_gcd(gcd, gcd, gcd_i);
3930
14
  }
3931
6
3932
6
  isl_int_clear(gcd_i);
3933
6
3934
6
  if (
isl_int_cmp_si6
(gcd, 1) <= 06
)
{2
3935
2
    isl_int_clear(gcd);
3936
2
    return compute_next_band(node, graph, 0);
3937
2
  }
3938
6
3939
4
  r = isl_vec_alloc(ctx, graph->n);
3940
4
  order = isl_calloc_array(ctx, int, graph->n);
3941
4
  if (
!r || 4
!order4
)
3942
0
    goto error;
3943
4
3944
14
  
for (i = 0; 4
i < graph->n14
;
++i10
)
{10
3945
10
    struct isl_sched_node *node = &graph->node[i];
3946
10
3947
10
    order[i] = i;
3948
10
    isl_int_fdiv_r(r->el[i], node->sched->row[row][0], gcd);
3949
10
    isl_int_fdiv_q(node->sched->row[row][0],
3950
10
             node->sched->row[row][0], gcd);
3951
10
    isl_int_mul(node->sched->row[row][0],
3952
10
          node->sched->row[row][0], gcd);
3953
10
    node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
3954
10
    if (!node->sched)
3955
0
      goto error;
3956
10
  }
3957
4
3958
4
  
if (4
isl_sort(order, graph->n, sizeof(order[0]), &smaller_value, r) < 04
)
3959
0
    goto error;
3960
4
3961
4
  scc = 0;
3962
14
  for (i = 0; 
i < graph->n14
;
++i10
)
{10
3963
10
    if (
i > 0 && 10
isl_int_ne6
(r->el[order[i - 1]], r->el[order[i]]))
3964
6
      ++scc;
3965
10
    graph->node[order[i]].scc = scc;
3966
10
  }
3967
4
  graph->scc = ++scc;
3968
4
  graph->weak = 0;
3969
4
3970
4
  isl_int_clear(gcd);
3971
4
  isl_vec_free(r);
3972
4
  free(order);
3973
4
3974
4
  if (update_edges(ctx, graph) < 0)
3975
0
    return isl_schedule_node_free(node);
3976
4
  node = insert_current_band(node, graph, 0);
3977
4
  next_band(graph);
3978
4
3979
4
  node = isl_schedule_node_child(node, 0);
3980
4
  node = compute_component_schedule(node, graph, 0);
3981
4
  node = isl_schedule_node_parent(node);
3982
4
3983
4
  return node;
3984
0
error:
3985
0
  isl_vec_free(r);
3986
0
  free(order);
3987
0
  isl_int_clear(gcd);
3988
0
  return isl_schedule_node_free(node);
3989
4
}
3990
3991
/* Is the schedule row "sol" trivial on node "node"?
3992
 * That is, is the solution zero on the dimensions linearly independent of
3993
 * the previously found solutions?
3994
 * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
3995
 *
3996
 * Each coefficient is represented as the difference between
3997
 * two non-negative values in "sol".  "sol" has been computed
3998
 * in terms of the original iterators (i.e., without use of cmap).
3999
 * We construct the schedule row s and write it as a linear
4000
 * combination of (linear combinations of) previously computed schedule rows.
4001
 * s = Q c or c = U s.
4002
 * If the final entries of c are all zero, then the solution is trivial.
4003
 */
4004
static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
4005
33
{
4006
33
  int trivial;
4007
33
  isl_vec *node_sol;
4008
33
4009
33
  if (!sol)
4010
0
    return -1;
4011
33
  
if (33
node->nvar == node->rank33
)
4012
0
    return 0;
4013
33
4014
33
  node_sol = extract_var_coef(node, sol);
4015
33
  node_sol = isl_mat_vec_product(isl_mat_copy(node->cinv), node_sol);
4016
33
  if (!node_sol)
4017
0
    return -1;
4018
33
4019
33
  trivial = isl_seq_first_non_zero(node_sol->el + node->rank,
4020
33
          node->nvar - node->rank) == -1;
4021
33
4022
33
  isl_vec_free(node_sol);
4023
33
4024
33
  return trivial;
4025
33
}
4026
4027
/* Is the schedule row "sol" trivial on any node where it should
4028
 * not be trivial?
4029
 * "sol" has been computed in terms of the original iterators
4030
 * (i.e., without use of cmap).
4031
 * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
4032
 */
4033
static int is_any_trivial(struct isl_sched_graph *graph,
4034
  __isl_keep isl_vec *sol)
4035
29
{
4036
29
  int i;
4037
29
4038
58
  for (i = 0; 
i < graph->n58
;
++i29
)
{33
4039
33
    struct isl_sched_node *node = &graph->node[i];
4040
33
    int trivial;
4041
33
4042
33
    if (!needs_row(graph, node))
4043
0
      continue;
4044
33
    trivial = is_trivial(node, sol);
4045
33
    if (
trivial < 0 || 33
trivial33
)
4046
4
      return trivial;
4047
33
  }
4048
29
4049
25
  return 0;
4050
29
}
4051
4052
/* Does the schedule represented by "sol" perform loop coalescing on "node"?
4053
 * If so, return the position of the coalesced dimension.
4054
 * Otherwise, return node->nvar or -1 on error.
4055
 *
4056
 * In particular, look for pairs of coefficients c_i and c_j such that
4057
 * |c_j/c_i| >= size_i, i.e., |c_j| >= |c_i * size_i|.
4058
 * If any such pair is found, then return i.
4059
 * If size_i is infinity, then no check on c_i needs to be performed.
4060
 */
4061
static int find_node_coalescing(struct isl_sched_node *node,
4062
  __isl_keep isl_vec *sol)
4063
43
{
4064
43
  int i, j;
4065
43
  isl_int max;
4066
43
  isl_vec *csol;
4067
43
4068
43
  if (node->nvar <= 1)
4069
16
    return node->nvar;
4070
43
4071
27
  csol = extract_var_coef(node, sol);
4072
27
  if (!csol)
4073
0
    return -1;
4074
27
  
isl_int_init27
(max);27
4075
77
  for (i = 0; 
i < node->nvar77
;
++i50
)
{54
4076
54
    isl_val *v;
4077
54
4078
54
    if (isl_int_is_zero(csol->el[i]))
4079
20
      continue;
4080
34
    v = isl_multi_val_get_val(node->sizes, i);
4081
34
    if (!v)
4082
0
      goto error;
4083
34
    
if (34
!isl_val_is_int(v)34
)
{14
4084
14
      isl_val_free(v);
4085
14
      continue;
4086
14
    }
4087
20
    
isl_int_mul20
(max, v->n, csol->el[i]);20
4088
20
    isl_val_free(v);
4089
20
4090
52
    for (j = 0; 
j < node->nvar52
;
++j32
)
{36
4091
36
      if (j == i)
4092
16
        continue;
4093
20
      
if (20
isl_int_abs_ge20
(csol->el[j], max))
4094
4
        break;
4095
20
    }
4096
20
    if (j < node->nvar)
4097
4
      break;
4098
20
  }
4099
27
4100
27
  
isl_int_clear27
(max);27
4101
27
  isl_vec_free(csol);
4102
27
  return i;
4103
0
error:
4104
0
  isl_int_clear(max);
4105
0
  isl_vec_free(csol);
4106
0
  return -1;
4107
27
}
4108
4109
/* Force the schedule coefficient at position "pos" of "node" to be zero
4110
 * in "tl".
4111
 * The coefficient is encoded as the difference between two non-negative
4112
 * variables.  Force these two variables to have the same value.
4113
 */
4114
static __isl_give isl_tab_lexmin *zero_out_node_coef(
4115
  __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
4116
4
{
4117
4
  int dim;
4118
4
  isl_ctx *ctx;
4119
4
  isl_vec *eq;
4120
4
4121
4
  ctx = isl_space_get_ctx(node->space);
4122
4
  dim = isl_tab_lexmin_dim(tl);
4123
4
  if (dim < 0)
4124
0
    return isl_tab_lexmin_free(tl);
4125
4
  eq = isl_vec_alloc(ctx, 1 + dim);
4126
4
  eq = isl_vec_clr(eq);
4127
4
  if (!eq)
4128
0
    return isl_tab_lexmin_free(tl);
4129
4
4130
4
  pos = 1 + node_var_coef_offset(node) + 2 * pos;
4131
4
  isl_int_set_si(eq->el[pos], 1);
4132
4
  isl_int_set_si(eq->el[pos + 1], -1);
4133
4
  tl = isl_tab_lexmin_add_eq(tl, eq->el);
4134
4
  isl_vec_free(eq);
4135
4
4136
4
  return tl;
4137
4
}
4138
4139
/* Return the lexicographically smallest rational point in the basic set
4140
 * from which "tl" was constructed, double checking that this input set
4141
 * was not empty.
4142
 */
4143
static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
4144
35
{
4145
35
  isl_vec *sol;
4146
35
4147
35
  sol = isl_tab_lexmin_get_solution(tl);
4148
35
  if (!sol)
4149
0
    return NULL;
4150
35
  
if (35
sol->size == 035
)
4151
0
    isl_die(isl_vec_get_ctx(sol), isl_error_internal,
4152
35
      "error in schedule construction",
4153
35
      return isl_vec_free(sol));
4154
35
  return sol;
4155
35
}
4156
4157
/* Does the solution "sol" of the LP problem constructed by setup_carry_lp
4158
 * carry any of the "n_edge" groups of dependences?
4159
 * The value in the first position is the sum of (1 - e_i) over all "n_edge"
4160
 * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
4161
 * by the edge are carried by the solution.
4162
 * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
4163
 * one of those is carried.
4164
 *
4165
 * Note that despite the fact that the problem is solved using a rational
4166
 * solver, the solution is guaranteed to be integral.
4167
 * Specifically, the dependence distance lower bounds e_i (and therefore
4168
 * also their sum) are integers.  See Lemma 5 of [1].
4169
 *
4170
 * Any potential denominator of the sum is cleared by this function.
4171
 * The denominator is not relevant for any of the other elements
4172
 * in the solution.
4173
 *
4174
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4175
 *     Problem, Part II: Multi-Dimensional Time.
4176
 *     In Intl. Journal of Parallel Programming, 1992.
4177
 */
4178
static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
4179
35
{
4180
35
  isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
4181
35
  isl_int_set_si(sol->el[0], 1);
4182
35
  return isl_int_cmp_si(sol->el[1], n_edge) < 0;
4183
35
}
4184
4185
/* Return the lexicographically smallest rational point in "lp",
4186
 * assuming that all variables are non-negative and performing some
4187
 * additional sanity checks.
4188
 * If "want_integral" is set, then compute the lexicographically smallest
4189
 * integer point instead.
4190
 * In particular, "lp" should not be empty by construction.
4191
 * Double check that this is the case.
4192
 * If dependences are not carried for any of the "n_edge" edges,
4193
 * then return an empty vector.
4194
 *
4195
 * If the schedule_treat_coalescing option is set and
4196
 * if the computed schedule performs loop coalescing on a given node,
4197
 * i.e., if it is of the form
4198
 *
4199
 *  c_i i + c_j j + ...
4200
 *
4201
 * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
4202
 * to cut out this solution.  Repeat this process until no more loop
4203
 * coalescing occurs or until no more dependences can be carried.
4204
 * In the latter case, revert to the previously computed solution.
4205
 *
4206
 * If the caller requests an integral solution and if coalescing should
4207
 * be treated, then perform the coalescing treatment first as
4208
 * an integral solution computed before coalescing treatment
4209
 * would carry the same number of edges and would therefore probably
4210
 * also be coalescing.
4211
 *
4212
 * To allow the coalescing treatment to be performed first,
4213
 * the initial solution is allowed to be rational and it is only
4214
 * cut out (if needed) in the next iteration, if no coalescing measures
4215
 * were taken.
4216
 */
4217
static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
4218
  __isl_take isl_basic_set *lp, int n_edge, int want_integral)
4219
29
{
4220
29
  int i, pos, cut;
4221
29
  isl_ctx *ctx;
4222
29
  isl_tab_lexmin *tl;
4223
29
  isl_vec *sol, *prev = NULL;
4224
29
  int treat_coalescing;
4225
29
4226
29
  if (!lp)
4227
0
    return NULL;
4228
29
  ctx = isl_basic_set_get_ctx(lp);
4229
29
  treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
4230
29
  tl = isl_tab_lexmin_from_basic_set(lp);
4231
29
4232
29
  cut = 0;
4233
35
  do {
4234
35
    int integral;
4235
35
4236
35
    if (cut)
4237
2
      tl = isl_tab_lexmin_cut_to_integer(tl);
4238
35
    sol = non_empty_solution(tl);
4239
35
    if (!sol)
4240
0
      goto error;
4241
35
4242
35
    
integral = 35
isl_int_is_one35
(sol->el[0]);
4243
35
    if (
!carries_dependences(sol, n_edge)35
)
{2
4244
2
      if (!prev)
4245
0
        prev = isl_vec_alloc(ctx, 0);
4246
2
      isl_vec_free(sol);
4247
2
      sol = prev;
4248
2
      break;
4249
2
    }
4250
33
    prev = isl_vec_free(prev);
4251
21
    cut = want_integral && !integral;
4252
33
    if (cut)
4253
2
      prev = sol;
4254
33
    if (!treat_coalescing)
4255
0
      continue;
4256
72
    
for (i = 0; 33
i < graph->n72
;
++i39
)
{43
4257
43
      struct isl_sched_node *node = &graph->node[i];
4258
43
4259
43
      pos = find_node_coalescing(node, sol);
4260
43
      if (pos < 0)
4261
0
        goto error;
4262
43
      
if (43
pos < node->nvar43
)
4263
4
        break;
4264
43
    }
4265
33
    
if (33
i < graph->n33
)
{4
4266
4
      prev = sol;
4267
4
      tl = zero_out_node_coef(tl, &graph->node[i], pos);
4268
4
      cut = 0;
4269
4
    }
4270
33
  } while (prev);
4271
29
4272
29
  isl_tab_lexmin_free(tl);
4273
29
4274
29
  return sol;
4275
0
error:
4276
0
  isl_tab_lexmin_free(tl);
4277
0
  isl_vec_free(prev);
4278
0
  isl_vec_free(sol);
4279
0
  return NULL;
4280
29
}
4281
4282
/* If "edge" is an edge from a node to itself, then add the corresponding
4283
 * dependence relation to "umap".
4284
 * If "node" has been compressed, then the dependence relation
4285
 * is also compressed first.
4286
 */
4287
static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
4288
  struct isl_sched_edge *edge)
4289
39
{
4290
39
  isl_map *map;
4291
39
  struct isl_sched_node *node = edge->src;
4292
39
4293
39
  if (edge->src != edge->dst)
4294
14
    return umap;
4295
39
4296
25
  map = isl_map_copy(edge->map);
4297
25
  if (
node->compressed25
)
{0
4298
0
    map = isl_map_preimage_domain_multi_aff(map,
4299
0
            isl_multi_aff_copy(node->decompress));
4300
0
    map = isl_map_preimage_range_multi_aff(map,
4301
0
            isl_multi_aff_copy(node->decompress));
4302
0
  }
4303
25
  umap = isl_union_map_add_map(umap, map);
4304
25
  return umap;
4305
39
}
4306
4307
/* If "edge" is an edge from a node to another node, then add the corresponding
4308
 * dependence relation to "umap".
4309
 * If the source or destination nodes of "edge" have been compressed,
4310
 * then the dependence relation is also compressed first.
4311
 */
4312
static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
4313
  struct isl_sched_edge *edge)
4314
39
{
4315
39
  isl_map *map;
4316
39
4317
39
  if (edge->src == edge->dst)
4318
25
    return umap;
4319
39
4320
14
  map = isl_map_copy(edge->map);
4321
14
  if (edge->src->compressed)
4322
0
    map = isl_map_preimage_domain_multi_aff(map,
4323
0
            isl_multi_aff_copy(edge->src->decompress));
4324
14
  if (edge->dst->compressed)
4325
0
    map = isl_map_preimage_range_multi_aff(map,
4326
0
            isl_multi_aff_copy(edge->dst->decompress));
4327
14
  umap = isl_union_map_add_map(umap, map);
4328
14
  return umap;
4329
39
}
4330
4331
/* For each (conditional) validity edge in "graph",
4332
 * add the corresponding dependence relation using "add"
4333
 * to a collection of dependence relations and return the result.
4334
 * If "coincidence" is set, then coincidence edges are considered as well.
4335
 */
4336
static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
4337
  __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
4338
    struct isl_sched_edge *edge), int coincidence)
4339
60
{
4340
60
  int i;
4341
60
  isl_space *space;
4342
60
  isl_union_map *umap;
4343
60
4344
60
  space = isl_space_copy(graph->node[0].space);
4345
60
  umap = isl_union_map_empty(space);
4346
60
4347
140
  for (i = 0; 
i < graph->n_edge140
;
++i80
)
{80
4348
80
    struct isl_sched_edge *edge = &graph->edge[i];
4349
80
4350
80
    if (!is_any_validity(edge) &&
4351
2
        
(!coincidence || 2
!is_coincidence(edge)0
))
4352
2
      continue;
4353
80
4354
78
    umap = add(umap, edge);
4355
78
  }
4356
60
4357
60
  return umap;
4358
60
}
4359
4360
/* For each dependence relation on a (conditional) validity edge
4361
 * from a node to itself,
4362
 * construct the set of coefficients of valid constraints for elements
4363
 * in that dependence relation and collect the results.
4364
 * If "coincidence" is set, then coincidence edges are considered as well.
4365
 *
4366
 * In particular, for each dependence relation R, constraints
4367
 * on coefficients (c_0, c_n, c_x) are constructed such that
4368
 *
4369
 *  c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
4370
 *
4371
 * This computation is essentially the same as that performed
4372
 * by intra_coefficients, except that it operates on multiple
4373
 * edges together.
4374
 *
4375
 * Note that if a dependence relation is a union of basic maps,
4376
 * then each basic map needs to be treated individually as it may only
4377
 * be possible to carry the dependences expressed by some of those
4378
 * basic maps and not all of them.
4379
 * The collected validity constraints are therefore not coalesced and
4380
 * it is assumed that they are not coalesced automatically.
4381
 * Duplicate basic maps can be removed, however.
4382
 * In particular, if the same basic map appears as a disjunct
4383
 * in multiple edges, then it only needs to be carried once.
4384
 */
4385
static __isl_give isl_basic_set_list *collect_intra_validity(
4386
  struct isl_sched_graph *graph, int coincidence)
4387
30
{
4388
30
  isl_union_map *intra;
4389
30
  isl_union_set *delta;
4390
30
  isl_basic_set_list *list;
4391
30
4392
30
  intra = collect_validity(graph, &add_intra, coincidence);
4393
30
  delta = isl_union_map_deltas(intra);
4394
30
  delta = isl_union_set_remove_divs(delta);
4395
30
  list = isl_union_set_get_basic_set_list(delta);
4396
30
  isl_union_set_free(delta);
4397
30
4398
30
  return isl_basic_set_list_coefficients(list);
4399
30
}
4400
4401
/* For each dependence relation on a (conditional) validity edge
4402
 * from a node to some other node,
4403
 * construct the set of coefficients of valid constraints for elements
4404
 * in that dependence relation and collect the results.
4405
 * If "coincidence" is set, then coincidence edges are considered as well.
4406
 *
4407
 * In particular, for each dependence relation R, constraints
4408
 * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
4409
 *
4410
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
4411
 *
4412
 * This computation is essentially the same as that performed
4413
 * by inter_coefficients, except that it operates on multiple
4414
 * edges together.
4415
 *
4416
 * Note that if a dependence relation is a union of basic maps,
4417
 * then each basic map needs to be treated individually as it may only
4418
 * be possible to carry the dependences expressed by some of those
4419
 * basic maps and not all of them.
4420
 * The collected validity constraints are therefore not coalesced and
4421
 * it is assumed that they are not coalesced automatically.
4422
 * Duplicate basic maps can be removed, however.
4423
 * In particular, if the same basic map appears as a disjunct
4424
 * in multiple edges, then it only needs to be carried once.
4425
 */
4426
static __isl_give isl_basic_set_list *collect_inter_validity(
4427
  struct isl_sched_graph *graph, int coincidence)
4428
30
{
4429
30
  isl_union_map *inter;
4430
30
  isl_union_set *wrap;
4431
30
  isl_basic_set_list *list;
4432
30
4433
30
  inter = collect_validity(graph, &add_inter, coincidence);
4434
30
  inter = isl_union_map_remove_divs(inter);
4435
30
  wrap = isl_union_map_wrap(inter);
4436
30
  list = isl_union_set_get_basic_set_list(wrap);
4437
30
  isl_union_set_free(wrap);
4438
30
  return isl_basic_set_list_coefficients(list);
4439
30
}
4440
4441
/* Construct an LP problem for finding schedule coefficients
4442
 * such that the schedule carries as many of the validity dependences
4443
 * as possible and
4444
 * return the lexicographically smallest non-trivial solution.
4445
 * If "fallback" is set, then the carrying is performed as a fallback
4446
 * for the Pluto-like scheduler.
4447
 * If "coincidence" is set, then try and carry coincidence edges as well.
4448
 *
4449
 * The variable "n_edge" stores the number of groups that should be carried.
4450
 * If none of the "n_edge" groups can be carried
4451
 * then return an empty vector.
4452
 * If, moreover, "n_edge" is zero, then the LP problem does not even
4453
 * need to be constructed.
4454
 *
4455
 * If a fallback solution is being computed, then compute an integral solution
4456
 * for the coefficients rather than using the numerators
4457
 * of a rational solution.
4458
 */
4459
static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
4460
  struct isl_sched_graph *graph, int fallback, int coincidence)
4461
30
{
4462
30
  int n_intra, n_inter;
4463
30
  int n_edge;
4464
30
  isl_basic_set *lp;
4465
30
  struct isl_carry carry = { 0 };
4466
30
4467
30
  carry.intra = collect_intra_validity(graph, coincidence);
4468
30
  carry.inter = collect_inter_validity(graph, coincidence);
4469
30
  if (
!carry.intra || 30
!carry.inter30
)
4470
0
    goto error;
4471
30
  n_intra = isl_basic_set_list_n_basic_set(carry.intra);
4472
30
  n_inter = isl_basic_set_list_n_basic_set(carry.inter);
4473
30
  n_edge = n_intra + n_inter;
4474
30
  if (
n_edge == 030
)
{1
4475
1
    isl_carry_clear(&carry);
4476
1
    return isl_vec_alloc(ctx, 0);
4477
1
  }
4478
30
4479
29
  
if (29
setup_carry_lp(ctx, graph, n_edge, carry.intra, carry.inter) < 029
)
4480
0
    goto error;
4481
29
4482
29
  isl_carry_clear(&carry);
4483
29
  lp = isl_basic_set_copy(graph->lp);
4484
29
  return non_neg_lexmin(graph, lp, n_edge, fallback);
4485
0
error:
4486
0
  isl_carry_clear(&carry);
4487
0
  return NULL;
4488
29
}
4489
4490
/* Construct a schedule row for each node such that as many validity dependences
4491
 * as possible are carried and then continue with the next band.
4492
 * If "fallback" is set, then the carrying is performed as a fallback
4493
 * for the Pluto-like scheduler.
4494
 * If "coincidence" is set, then try and carry coincidence edges as well.
4495
 *
4496
 * If there are no validity dependences, then no dependence can be carried and
4497
 * the procedure is guaranteed to fail.  If there is more than one component,
4498
 * then try computing a schedule on each component separately
4499
 * to prevent or at least postpone this failure.
4500
 *
4501
 * If a schedule row is computed, then check that dependences are carried
4502
 * for at least one of the edges.
4503
 *
4504
 * If the computed schedule row turns out to be trivial on one or
4505
 * more nodes where it should not be trivial, then we throw it away
4506
 * and try again on each component separately.
4507
 *
4508
 * If there is only one component, then we accept the schedule row anyway,
4509
 * but we do not consider it as a complete row and therefore do not
4510
 * increment graph->n_row.  Note that the ranks of the nodes that
4511
 * do get a non-trivial schedule part will get updated regardless and
4512
 * graph->maxvar is computed based on these ranks.  The test for
4513
 * whether more schedule rows are required in compute_schedule_wcc
4514
 * is therefore not affected.
4515
 *
4516
 * Insert a band corresponding to the schedule row at position "node"
4517
 * of the schedule tree and continue with the construction of the schedule.
4518
 * This insertion and the continued construction is performed by split_scaled
4519
 * after optionally checking for non-trivial common divisors.
4520
 */
4521
static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node,
4522
  struct isl_sched_graph *graph, int fallback, int coincidence)
4523
30
{
4524
30
  int trivial;
4525
30
  isl_ctx *ctx;
4526
30
  isl_vec *sol;
4527
30
4528
30
  if (!node)
4529
0
    return NULL;
4530
30
4531
30
  ctx = isl_schedule_node_get_ctx(node);
4532
30
  sol = compute_carrying_sol(ctx, graph, fallback, coincidence);
4533
30
  if (!sol)
4534
0
    return isl_schedule_node_free(node);
4535
30
  
if (30
sol->size == 030
)
{1
4536
1
    isl_vec_free(sol);
4537
1
    if (graph->scc > 1)
4538
1
      return compute_component_schedule(node, graph, 1);
4539
0
    
isl_die0
(ctx, isl_error_unknown, "unable to carry dependences",0
4540
0
      return isl_schedule_node_free(node));
4541
0
  }
4542
30
4543
29
  trivial = is_any_trivial(graph, sol);
4544
29
  if (
trivial < 029
)
{0
4545
0
    sol = isl_vec_free(sol);
4546
29
  } else 
if (29
trivial && 29
graph->scc > 14
)
{0
4547
0
    isl_vec_free(sol);
4548
0
    return compute_component_schedule(node, graph, 1);
4549
0
  }
4550
29
4551
29
  
if (29
update_schedule(graph, sol, 0, 0) < 029
)
4552
0
    return isl_schedule_node_free(node);
4553
29
  
if (29
trivial29
)
4554
4
    graph->n_row--;
4555
29
4556
29
  return split_scaled(node, graph);
4557
29
}
4558
4559
/* Construct a schedule row for each node such that as many validity dependences
4560
 * as possible are carried and then continue with the next band.
4561
 * Do so as a fallback for the Pluto-like scheduler.
4562
 * If "coincidence" is set, then try and carry coincidence edges as well.
4563
 */
4564
static __isl_give isl_schedule_node *carry_fallback(
4565
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4566
  int coincidence)
4567
18
{
4568
18
  return carry(node, graph, 1, coincidence);
4569
18
}
4570
4571
/* Construct a schedule row for each node such that as many validity dependences
4572
 * as possible are carried and then continue with the next band.
4573
 * Do so for the case where the Feautrier scheduler was selected
4574
 * by the user.
4575
 */
4576
static __isl_give isl_schedule_node *carry_feautrier(
4577
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4578
12
{
4579
12
  return carry(node, graph, 0, 0);
4580
12
}
4581
4582
/* Construct a schedule row for each node such that as many validity dependences
4583
 * as possible are carried and then continue with the next band.
4584
 * Do so as a fallback for the Pluto-like scheduler.
4585
 */
4586
static __isl_give isl_schedule_node *carry_dependences(
4587
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4588
9
{
4589
9
  return carry_fallback(node, graph, 0);
4590
9
}
4591
4592
/* Construct a schedule row for each node such that as many validity or
4593
 * coincidence dependences as possible are carried and
4594
 * then continue with the next band.
4595
 * Do so as a fallback for the Pluto-like scheduler.
4596
 */
4597
static __isl_give isl_schedule_node *carry_coincidence(
4598
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4599
9
{
4600
9
  return carry_fallback(node, graph, 1);
4601
9
}
4602
4603
/* Topologically sort statements mapped to the same schedule iteration
4604
 * and add insert a sequence node in front of "node"
4605
 * corresponding to this order.
4606
 * If "initialized" is set, then it may be assumed that compute_maxvar
4607
 * has been called on the current band.  Otherwise, call
4608
 * compute_maxvar if and before carry_dependences gets called.
4609
 *
4610
 * If it turns out to be impossible to sort the statements apart,
4611
 * because different dependences impose different orderings
4612
 * on the statements, then we extend the schedule such that
4613
 * it carries at least one more dependence.
4614
 */
4615
static __isl_give isl_schedule_node *sort_statements(
4616
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4617
  int initialized)
4618
149
{
4619
149
  isl_ctx *ctx;
4620
149
  isl_union_set_list *filters;
4621
149
4622
149
  if (!node)
4623
0
    return NULL;
4624
149
4625
149
  ctx = isl_schedule_node_get_ctx(node);
4626
149
  if (graph->n < 1)
4627
0
    isl_die(ctx, isl_error_internal,
4628
149
      "graph should have at least one node",
4629
149
      return isl_schedule_node_free(node));
4630
149
4631
149
  
if (149
graph->n == 1149
)
4632
110
    return node;
4633
149
4634
39
  
if (39
update_edges(ctx, graph) < 039
)
4635
0
    return isl_schedule_node_free(node);
4636
39
4637
39
  
if (39
graph->n_edge == 039
)
4638
0
    return node;
4639
39
4640
39
  
if (39
detect_sccs(ctx, graph) < 039
)
4641
0
    return isl_schedule_node_free(node);
4642
39
4643
39
  next_band(graph);
4644
39
  if (
graph->scc < graph->n39
)
{2
4645
2
    if (
!initialized && 2
compute_maxvar(graph) < 00
)
4646
0
      return isl_schedule_node_free(node);
4647
2
    return carry_dependences(node, graph);
4648
2
  }
4649
39
4650
37
  filters = extract_sccs(ctx, graph);
4651
37
  node = isl_schedule_node_insert_sequence(node, filters);
4652
37
4653
37
  return node;
4654
39
}
4655
4656
/* Are there any (non-empty) (conditional) validity edges in the graph?
4657
 */
4658
static int has_validity_edges(struct isl_sched_graph *graph)
4659
26
{
4660
26
  int i;
4661
26
4662
34
  for (i = 0; 
i < graph->n_edge34
;
++i8
)
{20
4663
20
    int empty;
4664
20
4665
20
    empty = isl_map_plain_is_empty(graph->edge[i].map);
4666
20
    if (empty < 0)
4667
0
      return -1;
4668
20
    
if (20
empty20
)
4669
6
      continue;
4670
14
    
if (14
is_any_validity(&graph->edge[i])14
)
4671
12
      return 1;
4672
14
  }
4673
26
4674
14
  return 0;
4675
26
}
4676
4677
/* Should we apply a Feautrier step?
4678
 * That is, did the user request the Feautrier algorithm and are
4679
 * there any validity dependences (left)?
4680
 */
4681
static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
4682
185
{
4683
185
  if (
ctx->opt->schedule_algorithm != 185
ISL_SCHEDULE_ALGORITHM_FEAUTRIER185
)
4684
159
    return 0;
4685
185
4686
26
  return has_validity_edges(graph);
4687
185
}
4688
4689
/* Compute a schedule for a connected dependence graph using Feautrier's
4690
 * multi-dimensional scheduling algorithm and return the updated schedule node.
4691
 *
4692
 * The original algorithm is described in [1].
4693
 * The main idea is to minimize the number of scheduling dimensions, by
4694
 * trying to satisfy as many dependences as possible per scheduling dimension.
4695
 *
4696
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4697
 *     Problem, Part II: Multi-Dimensional Time.
4698
 *     In Intl. Journal of Parallel Programming, 1992.
4699
 */
4700
static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
4701
  isl_schedule_node *node, struct isl_sched_graph *graph)
4702
12
{
4703
12
  return carry_feautrier(node, graph);
4704
12
}
4705
4706
/* Turn off the "local" bit on all (condition) edges.
4707
 */
4708
static void clear_local_edges(struct isl_sched_graph *graph)
4709
219
{
4710
219
  int i;
4711
219
4712
471
  for (i = 0; 
i < graph->n_edge471
;
++i252
)
4713
252
    
if (252
is_condition(&graph->edge[i])252
)
4714
34
      clear_local(&graph->edge[i]);
4715
219
}
4716
4717
/* Does "graph" have both condition and conditional validity edges?
4718
 */
4719
static int need_condition_check(struct isl_sched_graph *graph)
4720
219
{
4721
219
  int i;
4722
219
  int any_condition = 0;
4723
219
  int any_conditional_validity = 0;
4724
219
4725
471
  for (i = 0; 
i < graph->n_edge471
;
++i252
)
{252
4726
252
    if (is_condition(&graph->edge[i]))
4727
34
      any_condition = 1;
4728
252
    if (is_conditional_validity(&graph->edge[i]))
4729
40
      any_conditional_validity = 1;
4730
252
  }
4731
219
4732
19
  return any_condition && any_conditional_validity;
4733
219
}
4734
4735
/* Does "graph" contain any coincidence edge?
4736
 */
4737
static int has_any_coincidence(struct isl_sched_graph *graph)
4738
219
{
4739
219
  int i;
4740
219
4741
331
  for (i = 0; 
i < graph->n_edge331
;
++i112
)
4742
207
    
if (207
is_coincidence(&graph->edge[i])207
)
4743
95
      return 1;
4744
219
4745
124
  return 0;
4746
219
}
4747
4748
/* Extract the final schedule row as a map with the iteration domain
4749
 * of "node" as domain.
4750
 */
4751
static __isl_give isl_map *final_row(struct isl_sched_node *node)
4752
142
{
4753
142
  isl_multi_aff *ma;
4754
142
  int row;
4755
142
4756
142
  row = isl_mat_rows(node->sched) - 1;
4757
142
  ma = node_extract_partial_schedule_multi_aff(node, row, 1);
4758
142
  return isl_map_from_multi_aff(ma);
4759
142
}
4760
4761
/* Is the conditional validity dependence in the edge with index "edge_index"
4762
 * violated by the latest (i.e., final) row of the schedule?
4763
 * That is, is i scheduled after j
4764
 * for any conditional validity dependence i -> j?
4765
 */
4766
static int is_violated(struct isl_sched_graph *graph, int edge_index)
4767
71
{
4768
71
  isl_map *src_sched, *dst_sched, *map;
4769
71
  struct isl_sched_edge *edge = &graph->edge[edge_index];
4770
71
  int empty;
4771
71
4772
71
  src_sched = final_row(edge->src);
4773
71
  dst_sched = final_row(edge->dst);
4774
71
  map = isl_map_copy(edge->map);
4775
71
  map = isl_map_apply_domain(map, src_sched);
4776
71
  map = isl_map_apply_range(map, dst_sched);
4777
71
  map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
4778
71
  empty = isl_map_is_empty(map);
4779
71
  isl_map_free(map);
4780
71
4781
71
  if (empty < 0)
4782
0
    return -1;
4783
71
4784
71
  return !empty;
4785
71
}
4786
4787
/* Does "graph" have any satisfied condition edges that
4788
 * are adjacent to the conditional validity constraint with
4789
 * domain "conditional_source" and range "conditional_sink"?
4790
 *
4791
 * A satisfied condition is one that is not local.
4792
 * If a condition was forced to be local already (i.e., marked as local)
4793
 * then there is no need to check if it is in fact local.
4794
 *
4795
 * Additionally, mark all adjacent condition edges found as local.
4796
 */
4797
static int has_adjacent_true_conditions(struct isl_sched_graph *graph,
4798
  __isl_keep isl_union_set *conditional_source,
4799
  __isl_keep isl_union_set *conditional_sink)
4800
11
{
4801
11
  int i;
4802
11
  int any = 0;
4803
11
4804
60
  for (i = 0; 
i < graph->n_edge60
;
++i49
)
{49
4805
49
    int adjacent, local;
4806
49
    isl_union_map *condition;
4807
49
4808
49
    if (!is_condition(&graph->edge[i]))
4809
29
      continue;
4810
20
    
if (20
is_local(&graph->edge[i])20
)
4811
0
      continue;
4812
20
4813
20
    condition = graph->edge[i].tagged_condition;
4814
20
    adjacent = domain_intersects(condition, conditional_sink);
4815
20
    if (
adjacent >= 0 && 20
!adjacent20
)
4816
8
      adjacent = range_intersects(condition,
4817
8
              conditional_source);
4818
20
    if (adjacent < 0)
4819
0
      return -1;
4820
20
    
if (20
!adjacent20
)
4821
5
      continue;
4822
20
4823
15
    set_local(&graph->edge[i]);
4824
15
4825
15
    local = is_condition_false(&graph->edge[i]);
4826
15
    if (local < 0)
4827
0
      return -1;
4828
15
    
if (15
!local15
)
4829
8
      any = 1;
4830
15
  }
4831
11
4832
11
  return any;
4833
11
}
4834
4835
/* Are there any violated conditional validity dependences with
4836
 * adjacent condition dependences that are not local with respect
4837
 * to the current schedule?
4838
 * That is, is the conditional validity constraint violated?
4839
 *
4840
 * Additionally, mark all those adjacent condition dependences as local.
4841
 * We also mark those adjacent condition dependences that were not marked
4842
 * as local before, but just happened to be local already.  This ensures
4843
 * that they remain local if the schedule is recomputed.
4844
 *
4845
 * We first collect domain and range of all violated conditional validity
4846
 * dependences and then check if there are any adjacent non-local
4847
 * condition dependences.
4848
 */
4849
static int has_violated_conditional_constraint(isl_ctx *ctx,
4850
  struct isl_sched_graph *graph)
4851
31
{
4852
31
  int i;
4853
31
  int any = 0;
4854
31
  isl_union_set *source, *sink;
4855
31
4856
31
  source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4857
31
  sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4858
168
  for (i = 0; 
i < graph->n_edge168
;
++i137
)
{137
4859
137
    isl_union_set *uset;
4860
137
    isl_union_map *umap;
4861
137
    int violated;
4862
137
4863
137
    if (!is_conditional_validity(&graph->edge[i]))
4864
66
      continue;
4865
137
4866
71
    violated = is_violated(graph, i);
4867
71
    if (violated < 0)
4868
0
      goto error;
4869
71
    
if (71
!violated71
)
4870
52
      continue;
4871
71
4872
19
    any = 1;
4873
19
4874
19
    umap = isl_union_map_copy(graph->edge[i].tagged_validity);
4875
19
    uset = isl_union_map_domain(umap);
4876
19
    source = isl_union_set_union(source, uset);
4877
19
    source = isl_union_set_coalesce(source);
4878
19
4879
19
    umap = isl_union_map_copy(graph->edge[i].tagged_validity);
4880
19
    uset = isl_union_map_range(umap);
4881
19
    sink = isl_union_set_union(sink, uset);
4882
19
    sink = isl_union_set_coalesce(sink);
4883
19
  }
4884
31
4885
31
  
if (31
any31
)
4886
11
    any = has_adjacent_true_conditions(graph, source, sink);
4887
31
4888
31
  isl_union_set_free(source);
4889
31
  isl_union_set_free(sink);
4890
31
  return any;
4891
0
error:
4892
0
  isl_union_set_free(source);
4893
0
  isl_union_set_free(sink);
4894
0
  return -1;
4895
31
}
4896
4897
/* Examine the current band (the rows between graph->band_start and
4898
 * graph->n_total_row), deciding whether to drop it or add it to "node"
4899
 * and then continue with the computation of the next band, if any.
4900
 * If "initialized" is set, then it may be assumed that compute_maxvar
4901
 * has been called on the current band.  Otherwise, call
4902
 * compute_maxvar if and before carry_dependences gets called.
4903
 *
4904
 * The caller keeps looking for a new row as long as
4905
 * graph->n_row < graph->maxvar.  If the latest attempt to find
4906
 * such a row failed (i.e., we still have graph->n_row < graph->maxvar),
4907
 * then we either
4908
 * - split between SCCs and start over (assuming we found an interesting
4909
 *  pair of SCCs between which to split)
4910
 * - continue with the next band (assuming the current band has at least
4911
 *  one row)
4912
 * - if outer coincidence needs to be enforced, then try to carry as many
4913
 *  validity or coincidence dependences as possible and
4914
 *  continue with the next band
4915
 * - try to carry as many validity dependences as possible and
4916
 *  continue with the next band
4917
 * In each case, we first insert a band node in the schedule tree
4918
 * if any rows have been computed.
4919
 *
4920
 * If the caller managed to complete the schedule, we insert a band node
4921
 * (if any schedule rows were computed) and we finish off by topologically
4922
 * sorting the statements based on the remaining dependences.
4923
 */
4924
static __isl_give isl_schedule_node *compute_schedule_finish_band(
4925
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4926
  int initialized)
4927
180
{
4928
180
  int insert;
4929
180
4930
180
  if (!node)
4931
0
    return NULL;
4932
180
4933
180
  
if (180
graph->n_row < graph->maxvar180
)
{31
4934
31
    isl_ctx *ctx;
4935
31
    int empty = graph->n_total_row == graph->band_start;
4936
31
4937
31
    ctx = isl_schedule_node_get_ctx(node);
4938
31
    if (
!ctx->opt->schedule_maximize_band_depth && 31
!empty29
)
4939
12
      return compute_next_band(node, graph, 1);
4940
19
    
if (19
graph->src_scc >= 019
)
4941
2
      return compute_split_schedule(node, graph);
4942
17
    
if (17
!empty17
)
4943
1
      return compute_next_band(node, graph, 1);
4944
16
    
if (16
!initialized && 16
compute_maxvar(graph) < 00
)
4945
0
      return isl_schedule_node_free(node);
4946
16
    
if (16
isl_options_get_schedule_outer_coincidence(ctx)16
)
4947
9
      return carry_coincidence(node, graph);
4948
7
    return carry_dependences(node, graph);
4949
16
  }
4950
180
4951
149
  insert = graph->n_total_row > graph->band_start;
4952
149
  if (
insert149
)
{119
4953
119
    node = insert_current_band(node, graph, 1);
4954
119
    node = isl_schedule_node_child(node, 0);
4955
119
  }
4956
149
  node = sort_statements(node, graph, initialized);
4957
149
  if (insert)
4958
119
    node = isl_schedule_node_parent(node);
4959
149
4960
149
  return node;
4961
180
}
4962
4963
/* Construct a band of schedule rows for a connected dependence graph.
4964
 * The caller is responsible for determining the strongly connected
4965
 * components and calling compute_maxvar first.
4966
 *
4967
 * We try to find a sequence of as many schedule rows as possible that result
4968
 * in non-negative dependence distances (independent of the previous rows
4969
 * in the sequence, i.e., such that the sequence is tilable), with as
4970
 * many of the initial rows as possible satisfying the coincidence constraints.
4971
 * The computation stops if we can't find any more rows or if we have found
4972
 * all the rows we wanted to find.
4973
 *
4974
 * If ctx->opt->schedule_outer_coincidence is set, then we force the
4975
 * outermost dimension to satisfy the coincidence constraints.  If this
4976
 * turns out to be impossible, we fall back on the general scheme above
4977
 * and try to carry as many dependences as possible.
4978
 *
4979
 * If "graph" contains both condition and conditional validity dependences,
4980
 * then we need to check that that the conditional schedule constraint
4981
 * is satisfied, i.e., there are no violated conditional validity dependences
4982
 * that are adjacent to any non-local condition dependences.
4983
 * If there are, then we mark all those adjacent condition dependences
4984
 * as local and recompute the current band.  Those dependences that
4985
 * are marked local will then be forced to be local.
4986
 * The initial computation is performed with no dependences marked as local.
4987
 * If we are lucky, then there will be no violated conditional validity
4988
 * dependences adjacent to any non-local condition dependences.
4989
 * Otherwise, we mark some additional condition dependences as local and
4990
 * recompute.  We continue this process until there are no violations left or
4991
 * until we are no longer able to compute a schedule.
4992
 * Since there are only a finite number of dependences,
4993
 * there will only be a finite number of iterations.
4994
 */
4995
static isl_stat compute_schedule_wcc_band(isl_ctx *ctx,
4996
  struct isl_sched_graph *graph)
4997
219
{
4998
219
  int has_coincidence;
4999
219
  int use_coincidence;
5000
219
  int force_coincidence = 0;
5001
219
  int check_conditional;
5002
219
5003
219
  if (sort_sccs(graph) < 0)
5004
0
    return isl_stat_error;
5005
219
5006
219
  clear_local_edges(graph);
5007
219
  check_conditional = need_condition_check(graph);
5008
219
  has_coincidence = has_any_coincidence(graph);
5009
219
5010
219
  if (ctx->opt->schedule_outer_coincidence)
5011
14
    force_coincidence = 1;
5012
219
5013
219
  use_coincidence = has_coincidence;
5014
562
  while (
graph->n_row < graph->maxvar562
)
{377
5015
377
    isl_vec *sol;
5016
377
    int violated;
5017
377
    int coincident;
5018
377
5019
377
    graph->src_scc = -1;
5020
377
    graph->dst_scc = -1;
5021
377
5022
377
    if (setup_lp(ctx, graph, use_coincidence) < 0)
5023
0
      return isl_stat_error;
5024
377
    sol = solve_lp(graph);
5025
377
    if (!sol)
5026
0
      return isl_stat_error;
5027
377
    
if (377
sol->size == 0377
)
{104
5028
104
      int empty = graph->n_total_row == graph->band_start;
5029
104
5030
104
      isl_vec_free(sol);
5031
104
      if (
use_coincidence && 104
(!force_coincidence || 79
!empty9
))
{70
5032
70
        use_coincidence = 0;
5033
70
        continue;
5034
70
      }
5035
34
      return isl_stat_ok;
5036
104
    }
5037
273
    
coincident = !has_coincidence || 273
use_coincidence129
;
5038
273
    if (update_schedule(graph, sol, 1, coincident) < 0)
5039
0
      return isl_stat_error;
5040
273
5041
273
    
if (273
!check_conditional273
)
5042
242
      continue;
5043
31
    violated = has_violated_conditional_constraint(ctx, graph);
5044
31
    if (violated < 0)
5045
0
      return isl_stat_error;
5046
31
    
if (31
!violated31
)
5047
25
      continue;
5048
6
    
if (6
reset_band(graph) < 06
)
5049
0
      return isl_stat_error;
5050
6
    use_coincidence = has_coincidence;
5051
6
  }
5052
219
5053
185
  return isl_stat_ok;
5054
219
}
5055
5056
/* Compute a schedule for a connected dependence graph by considering
5057
 * the graph as a whole and return the updated schedule node.
5058
 *
5059
 * The actual schedule rows of the current band are computed by
5060
 * compute_schedule_wcc_band.  compute_schedule_finish_band takes
5061
 * care of integrating the band into "node" and continuing
5062
 * the computation.
5063
 */
5064
static __isl_give isl_schedule_node *compute_schedule_wcc_whole(
5065
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5066
157
{
5067
157
  isl_ctx *ctx;
5068
157
5069
157
  if (!node)
5070
0
    return NULL;
5071
157
5072
157
  ctx = isl_schedule_node_get_ctx(node);
5073
157
  if (compute_schedule_wcc_band(ctx, graph) < 0)
5074
0
    return isl_schedule_node_free(node);
5075
157
5076
157
  return compute_schedule_finish_band(node, graph, 1);
5077
157
}
5078
5079
/* Clustering information used by compute_schedule_wcc_clustering.
5080