Coverage Report

Created: 2017-03-28 09:59

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/External/isl/isl_scheduler.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2011      INRIA Saclay
3
 * Copyright 2012-2014 Ecole Normale Superieure
4
 * Copyright 2015-2016 Sven Verdoolaege
5
 * Copyright 2016      INRIA Paris
6
 *
7
 * Use of this software is governed by the MIT license
8
 *
9
 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
10
 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
11
 * 91893 Orsay, France
12
 * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
13
 * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
14
 * CS 42112, 75589 Paris Cedex 12, France
15
 */
16
17
#include <isl_ctx_private.h>
18
#include <isl_map_private.h>
19
#include <isl_space_private.h>
20
#include <isl_aff_private.h>
21
#include <isl/hash.h>
22
#include <isl/constraint.h>
23
#include <isl/schedule.h>
24
#include <isl_schedule_constraints.h>
25
#include <isl/schedule_node.h>
26
#include <isl_mat_private.h>
27
#include <isl_vec_private.h>
28
#include <isl/set.h>
29
#include <isl/union_set.h>
30
#include <isl_seq.h>
31
#include <isl_tab.h>
32
#include <isl_dim_map.h>
33
#include <isl/map_to_basic_set.h>
34
#include <isl_sort.h>
35
#include <isl_options_private.h>
36
#include <isl_tarjan.h>
37
#include <isl_morph.h>
38
#include <isl/ilp.h>
39
#include <isl_val_private.h>
40
41
/*
42
 * The scheduling algorithm implemented in this file was inspired by
43
 * Bondhugula et al., "Automatic Transformations for Communication-Minimized
44
 * Parallelization and Locality Optimization in the Polyhedral Model".
45
 */
46
47
48
/* Internal information about a node that is used during the construction
49
 * of a schedule.
50
 * space represents the space in which the domain lives
51
 * sched is a matrix representation of the schedule being constructed
52
 *  for this node; if compressed is set, then this schedule is
53
 *  defined over the compressed domain space
54
 * sched_map is an isl_map representation of the same (partial) schedule
55
 *  sched_map may be NULL; if compressed is set, then this map
56
 *  is defined over the uncompressed domain space
57
 * rank is the number of linearly independent rows in the linear part
58
 *  of sched
59
 * the columns of cmap represent a change of basis for the schedule
60
 *  coefficients; the first rank columns span the linear part of
61
 *  the schedule rows
62
 * cinv is the inverse of cmap.
63
 * ctrans is the transpose of cmap.
64
 * start is the first variable in the LP problem in the sequences that
65
 *  represents the schedule coefficients of this node
66
 * nvar is the dimension of the domain
67
 * nparam is the number of parameters or 0 if we are not constructing
68
 *  a parametric schedule
69
 *
70
 * If compressed is set, then hull represents the constraints
71
 * that were used to derive the compression, while compress and
72
 * decompress map the original space to the compressed space and
73
 * vice versa.
74
 *
75
 * scc is the index of SCC (or WCC) this node belongs to
76
 *
77
 * "cluster" is only used inside extract_clusters and identifies
78
 * the cluster of SCCs that the node belongs to.
79
 *
80
 * coincident contains a boolean for each of the rows of the schedule,
81
 * indicating whether the corresponding scheduling dimension satisfies
82
 * the coincidence constraints in the sense that the corresponding
83
 * dependence distances are zero.
84
 *
85
 * If the schedule_treat_coalescing option is set, then
86
 * "sizes" contains the sizes of the (compressed) instance set
87
 * in each direction.  If there is no fixed size in a given direction,
88
 * then the corresponding size value is set to infinity.
89
 * If the schedule_treat_coalescing option or the schedule_max_coefficient
90
 * option is set, then "max" contains the maximal values for
91
 * schedule coefficients of the (compressed) variables.  If no bound
92
 * needs to be imposed on a particular variable, then the corresponding
93
 * value is negative.
94
 */
95
struct isl_sched_node {
96
  isl_space *space;
97
  int compressed;
98
  isl_set *hull;
99
  isl_multi_aff *compress;
100
  isl_multi_aff *decompress;
101
  isl_mat *sched;
102
  isl_map *sched_map;
103
  int  rank;
104
  isl_mat *cmap;
105
  isl_mat *cinv;
106
  isl_mat *ctrans;
107
  int  start;
108
  int  nvar;
109
  int  nparam;
110
111
  int  scc;
112
  int  cluster;
113
114
  int *coincident;
115
116
  isl_multi_val *sizes;
117
  isl_vec *max;
118
};
119
120
static int node_has_space(const void *entry, const void *val)
121
1.44k
{
122
1.44k
  struct isl_sched_node *node = (struct isl_sched_node *)entry;
123
1.44k
  isl_space *dim = (isl_space *)val;
124
1.44k
125
1.44k
  return isl_space_is_equal(node->space, dim);
126
1.44k
}
127
128
static int node_scc_exactly(struct isl_sched_node *node, int scc)
129
798
{
130
798
  return node->scc == scc;
131
798
}
132
133
static int node_scc_at_most(struct isl_sched_node *node, int scc)
134
18
{
135
18
  return node->scc <= scc;
136
18
}
137
138
static int node_scc_at_least(struct isl_sched_node *node, int scc)
139
18
{
140
18
  return node->scc >= scc;
141
18
}
142
143
/* An edge in the dependence graph.  An edge may be used to
144
 * ensure validity of the generated schedule, to minimize the dependence
145
 * distance or both
146
 *
147
 * map is the dependence relation, with i -> j in the map if j depends on i
148
 * tagged_condition and tagged_validity contain the union of all tagged
149
 *  condition or conditional validity dependence relations that
150
 *  specialize the dependence relation "map"; that is,
151
 *  if (i -> a) -> (j -> b) is an element of "tagged_condition"
152
 *  or "tagged_validity", then i -> j is an element of "map".
153
 *  If these fields are NULL, then they represent the empty relation.
154
 * src is the source node
155
 * dst is the sink node
156
 *
157
 * types is a bit vector containing the types of this edge.
158
 * validity is set if the edge is used to ensure correctness
159
 * coincidence is used to enforce zero dependence distances
160
 * proximity is set if the edge is used to minimize dependence distances
161
 * condition is set if the edge represents a condition
162
 *  for a conditional validity schedule constraint
163
 * local can only be set for condition edges and indicates that
164
 *  the dependence distance over the edge should be zero
165
 * conditional_validity is set if the edge is used to conditionally
166
 *  ensure correctness
167
 *
168
 * For validity edges, start and end mark the sequence of inequality
169
 * constraints in the LP problem that encode the validity constraint
170
 * corresponding to this edge.
171
 *
172
 * During clustering, an edge may be marked "no_merge" if it should
173
 * not be used to merge clusters.
174
 * The weight is also only used during clustering and it is
175
 * an indication of how many schedule dimensions on either side
176
 * of the schedule constraints can be aligned.
177
 * If the weight is negative, then this means that this edge was postponed
178
 * by has_bounded_distances or any_no_merge.  The original weight can
179
 * be retrieved by adding 1 + graph->max_weight, with "graph"
180
 * the graph containing this edge.
181
 */
182
struct isl_sched_edge {
183
  isl_map *map;
184
  isl_union_map *tagged_condition;
185
  isl_union_map *tagged_validity;
186
187
  struct isl_sched_node *src;
188
  struct isl_sched_node *dst;
189
190
  unsigned types;
191
192
  int start;
193
  int end;
194
195
  int no_merge;
196
  int weight;
197
};
198
199
/* Is "edge" marked as being of type "type"?
200
 */
201
static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
202
11.1k
{
203
11.1k
  return ISL_FL_ISSET(edge->types, 1 << type);
204
11.1k
}
205
206
/* Mark "edge" as being of type "type".
207
 */
208
static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
209
595
{
210
595
  ISL_FL_SET(edge->types, 1 << type);
211
595
}
212
213
/* No longer mark "edge" as being of type "type"?
214
 */
215
static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
216
34
{
217
34
  ISL_FL_CLR(edge->types, 1 << type);
218
34
}
219
220
/* Is "edge" marked as a validity edge?
221
 */
222
static int is_validity(struct isl_sched_edge *edge)
223
3.28k
{
224
3.28k
  return is_type(edge, isl_edge_validity);
225
3.28k
}
226
227
/* Mark "edge" as a validity edge.
228
 */
229
static void set_validity(struct isl_sched_edge *edge)
230
2
{
231
2
  set_type(edge, isl_edge_validity);
232
2
}
233
234
/* Is "edge" marked as a proximity edge?
235
 */
236
static int is_proximity(struct isl_sched_edge *edge)
237
1.32k
{
238
1.32k
  return is_type(edge, isl_edge_proximity);
239
1.32k
}
240
241
/* Is "edge" marked as a local edge?
242
 */
243
static int is_local(struct isl_sched_edge *edge)
244
1.83k
{
245
1.83k
  return is_type(edge, isl_edge_local);
246
1.83k
}
247
248
/* Mark "edge" as a local edge.
249
 */
250
static void set_local(struct isl_sched_edge *edge)
251
14
{
252
14
  set_type(edge, isl_edge_local);
253
14
}
254
255
/* No longer mark "edge" as a local edge.
256
 */
257
static void clear_local(struct isl_sched_edge *edge)
258
34
{
259
34
  clear_type(edge, isl_edge_local);
260
34
}
261
262
/* Is "edge" marked as a coincidence edge?
263
 */
264
static int is_coincidence(struct isl_sched_edge *edge)
265
1.81k
{
266
1.81k
  return is_type(edge, isl_edge_coincidence);
267
1.81k
}
268
269
/* Is "edge" marked as a condition edge?
270
 */
271
static int is_condition(struct isl_sched_edge *edge)
272
1.48k
{
273
1.48k
  return is_type(edge, isl_edge_condition);
274
1.48k
}
275
276
/* Is "edge" marked as a conditional validity edge?
277
 */
278
static int is_conditional_validity(struct isl_sched_edge *edge)
279
1.16k
{
280
1.16k
  return is_type(edge, isl_edge_conditional_validity);
281
1.16k
}
282
283
/* Internal information about the dependence graph used during
284
 * the construction of the schedule.
285
 *
286
 * intra_hmap is a cache, mapping dependence relations to their dual,
287
 *  for dependences from a node to itself
288
 * inter_hmap is a cache, mapping dependence relations to their dual,
289
 *  for dependences between distinct nodes
290
 * if compression is involved then the key for these maps
291
 * is the original, uncompressed dependence relation, while
292
 * the value is the dual of the compressed dependence relation.
293
 *
294
 * n is the number of nodes
295
 * node is the list of nodes
296
 * maxvar is the maximal number of variables over all nodes
297
 * max_row is the allocated number of rows in the schedule
298
 * n_row is the current (maximal) number of linearly independent
299
 *  rows in the node schedules
300
 * n_total_row is the current number of rows in the node schedules
301
 * band_start is the starting row in the node schedules of the current band
302
 * root is set if this graph is the original dependence graph,
303
 *  without any splitting
304
 *
305
 * sorted contains a list of node indices sorted according to the
306
 *  SCC to which a node belongs
307
 *
308
 * n_edge is the number of edges
309
 * edge is the list of edges
310
 * max_edge contains the maximal number of edges of each type;
311
 *  in particular, it contains the number of edges in the inital graph.
312
 * edge_table contains pointers into the edge array, hashed on the source
313
 *  and sink spaces; there is one such table for each type;
314
 *  a given edge may be referenced from more than one table
315
 *  if the corresponding relation appears in more than one of the
316
 *  sets of dependences; however, for each type there is only
317
 *  a single edge between a given pair of source and sink space
318
 *  in the entire graph
319
 *
320
 * node_table contains pointers into the node array, hashed on the space
321
 *
322
 * region contains a list of variable sequences that should be non-trivial
323
 *
324
 * lp contains the (I)LP problem used to obtain new schedule rows
325
 *
326
 * src_scc and dst_scc are the source and sink SCCs of an edge with
327
 *  conflicting constraints
328
 *
329
 * scc represents the number of components
330
 * weak is set if the components are weakly connected
331
 *
332
 * max_weight is used during clustering and represents the maximal
333
 * weight of the relevant proximity edges.
334
 */
335
struct isl_sched_graph {
336
  isl_map_to_basic_set *intra_hmap;
337
  isl_map_to_basic_set *inter_hmap;
338
339
  struct isl_sched_node *node;
340
  int n;
341
  int maxvar;
342
  int max_row;
343
  int n_row;
344
345
  int *sorted;
346
347
  int n_total_row;
348
  int band_start;
349
350
  int root;
351
352
  struct isl_sched_edge *edge;
353
  int n_edge;
354
  int max_edge[isl_edge_last + 1];
355
  struct isl_hash_table *edge_table[isl_edge_last + 1];
356
357
  struct isl_hash_table *node_table;
358
  struct isl_region *region;
359
360
  isl_basic_set *lp;
361
362
  int src_scc;
363
  int dst_scc;
364
365
  int scc;
366
  int weak;
367
368
  int max_weight;
369
};
370
371
/* Initialize node_table based on the list of nodes.
372
 */
373
static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
374
245
{
375
245
  int i;
376
245
377
245
  graph->node_table = isl_hash_table_alloc(ctx, graph->n);
378
245
  if (!graph->node_table)
379
0
    return -1;
380
245
381
617
  
for (i = 0; 245
i < graph->n617
;
++i372
)
{372
382
372
    struct isl_hash_table_entry *entry;
383
372
    uint32_t hash;
384
372
385
372
    hash = isl_space_get_hash(graph->node[i].space);
386
372
    entry = isl_hash_table_find(ctx, graph->node_table, hash,
387
372
              &node_has_space,
388
372
              graph->node[i].space, 1);
389
372
    if (!entry)
390
0
      return -1;
391
372
    entry->data = &graph->node[i];
392
372
  }
393
245
394
245
  return 0;
395
245
}
396
397
/* Return a pointer to the node that lives within the given space,
398
 * or NULL if there is no such node.
399
 */
400
static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
401
  struct isl_sched_graph *graph, __isl_keep isl_space *dim)
402
1.47k
{
403
1.47k
  struct isl_hash_table_entry *entry;
404
1.47k
  uint32_t hash;
405
1.47k
406
1.47k
  hash = isl_space_get_hash(dim);
407
1.47k
  entry = isl_hash_table_find(ctx, graph->node_table, hash,
408
1.47k
            &node_has_space, dim, 0);
409
1.47k
410
1.44k
  return entry ? entry->data : NULL;
411
1.47k
}
412
413
static int edge_has_src_and_dst(const void *entry, const void *val)
414
1.10k
{
415
1.10k
  const struct isl_sched_edge *edge = entry;
416
1.10k
  const struct isl_sched_edge *temp = val;
417
1.10k
418
1.10k
  return edge->src == temp->src && edge->dst == temp->dst;
419
1.10k
}
420
421
/* Add the given edge to graph->edge_table[type].
422
 */
423
static isl_stat graph_edge_table_add(isl_ctx *ctx,
424
  struct isl_sched_graph *graph, enum isl_edge_type type,
425
  struct isl_sched_edge *edge)
426
762
{
427
762
  struct isl_hash_table_entry *entry;
428
762
  uint32_t hash;
429
762
430
762
  hash = isl_hash_init();
431
762
  hash = isl_hash_builtin(hash, edge->src);
432
762
  hash = isl_hash_builtin(hash, edge->dst);
433
762
  entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
434
762
            &edge_has_src_and_dst, edge, 1);
435
762
  if (!entry)
436
0
    return isl_stat_error;
437
762
  entry->data = edge;
438
762
439
762
  return isl_stat_ok;
440
762
}
441
442
/* Allocate the edge_tables based on the maximal number of edges of
443
 * each type.
444
 */
445
static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
446
245
{
447
245
  int i;
448
245
449
1.47k
  for (i = 0; 
i <= isl_edge_last1.47k
;
++i1.22k
)
{1.22k
450
1.22k
    graph->edge_table[i] = isl_hash_table_alloc(ctx,
451
1.22k
                  graph->max_edge[i]);
452
1.22k
    if (!graph->edge_table[i])
453
0
      return -1;
454
1.22k
  }
455
245
456
245
  return 0;
457
245
}
458
459
/* If graph->edge_table[type] contains an edge from the given source
460
 * to the given destination, then return the hash table entry of this edge.
461
 * Otherwise, return NULL.
462
 */
463
static struct isl_hash_table_entry *graph_find_edge_entry(
464
  struct isl_sched_graph *graph,
465
  enum isl_edge_type type,
466
  struct isl_sched_node *src, struct isl_sched_node *dst)
467
4.00k
{
468
4.00k
  isl_ctx *ctx = isl_space_get_ctx(src->space);
469
4.00k
  uint32_t hash;
470
4.00k
  struct isl_sched_edge temp = { .src = src, .dst = dst };
471
4.00k
472
4.00k
  hash = isl_hash_init();
473
4.00k
  hash = isl_hash_builtin(hash, temp.src);
474
4.00k
  hash = isl_hash_builtin(hash, temp.dst);
475
4.00k
  return isl_hash_table_find(ctx, graph->edge_table[type], hash,
476
4.00k
            &edge_has_src_and_dst, &temp, 0);
477
4.00k
}
478
479
480
/* If graph->edge_table[type] contains an edge from the given source
481
 * to the given destination, then return this edge.
482
 * Otherwise, return NULL.
483
 */
484
static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
485
  enum isl_edge_type type,
486
  struct isl_sched_node *src, struct isl_sched_node *dst)
487
3.46k
{
488
3.46k
  struct isl_hash_table_entry *entry;
489
3.46k
490
3.46k
  entry = graph_find_edge_entry(graph, type, src, dst);
491
3.46k
  if (!entry)
492
2.59k
    return NULL;
493
3.46k
494
870
  return entry->data;
495
3.46k
}
496
497
/* Check whether the dependence graph has an edge of the given type
498
 * between the given two nodes.
499
 */
500
static isl_bool graph_has_edge(struct isl_sched_graph *graph,
501
  enum isl_edge_type type,
502
  struct isl_sched_node *src, struct isl_sched_node *dst)
503
1.57k
{
504
1.57k
  struct isl_sched_edge *edge;
505
1.57k
  isl_bool empty;
506
1.57k
507
1.57k
  edge = graph_find_edge(graph, type, src, dst);
508
1.57k
  if (!edge)
509
1.27k
    return 0;
510
1.57k
511
305
  empty = isl_map_plain_is_empty(edge->map);
512
305
  if (empty < 0)
513
0
    return isl_bool_error;
514
305
515
305
  return !empty;
516
305
}
517
518
/* Look for any edge with the same src, dst and map fields as "model".
519
 *
520
 * Return the matching edge if one can be found.
521
 * Return "model" if no matching edge is found.
522
 * Return NULL on error.
523
 */
524
static struct isl_sched_edge *graph_find_matching_edge(
525
  struct isl_sched_graph *graph, struct isl_sched_edge *model)
526
579
{
527
579
  enum isl_edge_type i;
528
579
  struct isl_sched_edge *edge;
529
579
530
1.75k
  for (i = isl_edge_first; 
i <= isl_edge_last1.75k
;
++i1.18k
)
{1.52k
531
1.52k
    int is_equal;
532
1.52k
533
1.52k
    edge = graph_find_edge(graph, i, model->src, model->dst);
534
1.52k
    if (!edge)
535
1.16k
      continue;
536
362
    is_equal = isl_map_plain_is_equal(model->map, edge->map);
537
362
    if (is_equal < 0)
538
0
      return NULL;
539
362
    
if (362
is_equal362
)
540
343
      return edge;
541
362
  }
542
579
543
236
  return model;
544
579
}
545
546
/* Remove the given edge from all the edge_tables that refer to it.
547
 */
548
static void graph_remove_edge(struct isl_sched_graph *graph,
549
  struct isl_sched_edge *edge)
550
108
{
551
108
  isl_ctx *ctx = isl_map_get_ctx(edge->map);
552
108
  enum isl_edge_type i;
553
108
554
648
  for (i = isl_edge_first; 
i <= isl_edge_last648
;
++i540
)
{540
555
540
    struct isl_hash_table_entry *entry;
556
540
557
540
    entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
558
540
    if (!entry)
559
303
      continue;
560
237
    
if (237
entry->data != edge237
)
561
22
      continue;
562
215
    isl_hash_table_remove(ctx, graph->edge_table[i], entry);
563
215
  }
564
108
}
565
566
/* Check whether the dependence graph has any edge
567
 * between the given two nodes.
568
 */
569
static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
570
  struct isl_sched_node *src, struct isl_sched_node *dst)
571
306
{
572
306
  enum isl_edge_type i;
573
306
  isl_bool r;
574
306
575
1.12k
  for (i = isl_edge_first; 
i <= isl_edge_last1.12k
;
++i822
)
{986
576
986
    r = graph_has_edge(graph, i, src, dst);
577
986
    if (
r < 0 || 986
r986
)
578
164
      return r;
579
986
  }
580
306
581
142
  return r;
582
306
}
583
584
/* Check whether the dependence graph has a validity edge
585
 * between the given two nodes.
586
 *
587
 * Conditional validity edges are essentially validity edges that
588
 * can be ignored if the corresponding condition edges are iteration private.
589
 * Here, we are only checking for the presence of validity
590
 * edges, so we need to consider the conditional validity edges too.
591
 * In particular, this function is used during the detection
592
 * of strongly connected components and we cannot ignore
593
 * conditional validity edges during this detection.
594
 */
595
static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
596
  struct isl_sched_node *src, struct isl_sched_node *dst)
597
357
{
598
357
  isl_bool r;
599
357
600
357
  r = graph_has_edge(graph, isl_edge_validity, src, dst);
601
357
  if (
r < 0 || 357
r357
)
602
121
    return r;
603
357
604
236
  return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
605
357
}
606
607
static int graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
608
  int n_node, int n_edge)
609
245
{
610
245
  int i;
611
245
612
245
  graph->n = n_node;
613
245
  graph->n_edge = n_edge;
614
245
  graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
615
245
  graph->sorted = isl_calloc_array(ctx, int, graph->n);
616
245
  graph->region = isl_alloc_array(ctx, struct isl_region, graph->n);
617
245
  graph->edge = isl_calloc_array(ctx,
618
245
          struct isl_sched_edge, graph->n_edge);
619
245
620
245
  graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
621
245
  graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
622
245
623
245
  if (
!graph->node || 245
!graph->region245
||
(graph->n_edge && 245
!graph->edge159
) ||
624
245
      !graph->sorted)
625
0
    return -1;
626
245
627
619
  
for(i = 0; 245
i < graph->n619
;
++i374
)
628
374
    graph->sorted[i] = i;
629
245
630
245
  return 0;
631
245
}
632
633
static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
634
264
{
635
264
  int i;
636
264
637
264
  isl_map_to_basic_set_free(graph->intra_hmap);
638
264
  isl_map_to_basic_set_free(graph->inter_hmap);
639
264
640
264
  if (graph->node)
641
617
    
for (i = 0; 245
i < graph->n617
;
++i372
)
{372
642
372
      isl_space_free(graph->node[i].space);
643
372
      isl_set_free(graph->node[i].hull);
644
372
      isl_multi_aff_free(graph->node[i].compress);
645
372
      isl_multi_aff_free(graph->node[i].decompress);
646
372
      isl_mat_free(graph->node[i].sched);
647
372
      isl_map_free(graph->node[i].sched_map);
648
372
      isl_mat_free(graph->node[i].cmap);
649
372
      isl_mat_free(graph->node[i].cinv);
650
372
      isl_mat_free(graph->node[i].ctrans);
651
372
      if (graph->root)
652
232
        free(graph->node[i].coincident);
653
372
      isl_multi_val_free(graph->node[i].sizes);
654
372
      isl_vec_free(graph->node[i].max);
655
372
    }
656
264
  free(graph->node);
657
264
  free(graph->sorted);
658
264
  if (graph->edge)
659
554
    
for (i = 0; 245
i < graph->n_edge554
;
++i309
)
{309
660
309
      isl_map_free(graph->edge[i].map);
661
309
      isl_union_map_free(graph->edge[i].tagged_condition);
662
309
      isl_union_map_free(graph->edge[i].tagged_validity);
663
309
    }
664
264
  free(graph->edge);
665
264
  free(graph->region);
666
1.58k
  for (i = 0; 
i <= isl_edge_last1.58k
;
++i1.32k
)
667
1.32k
    isl_hash_table_free(ctx, graph->edge_table[i]);
668
264
  isl_hash_table_free(ctx, graph->node_table);
669
264
  isl_basic_set_free(graph->lp);
670
264
}
671
672
/* For each "set" on which this function is called, increment
673
 * graph->n by one and update graph->maxvar.
674
 */
675
static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
676
234
{
677
234
  struct isl_sched_graph *graph = user;
678
234
  int nvar = isl_set_dim(set, isl_dim_set);
679
234
680
234
  graph->n++;
681
234
  if (nvar > graph->maxvar)
682
142
    graph->maxvar = nvar;
683
234
684
234
  isl_set_free(set);
685
234
686
234
  return isl_stat_ok;
687
234
}
688
689
/* Compute the number of rows that should be allocated for the schedule.
690
 * In particular, we need one row for each variable or one row
691
 * for each basic map in the dependences.
692
 * Note that it is practically impossible to exhaust both
693
 * the number of dependences and the number of variables.
694
 */
695
static isl_stat compute_max_row(struct isl_sched_graph *graph,
696
  __isl_keep isl_schedule_constraints *sc)
697
130
{
698
130
  int n_edge;
699
130
  isl_stat r;
700
130
  isl_union_set *domain;
701
130
702
130
  graph->n = 0;
703
130
  graph->maxvar = 0;
704
130
  domain = isl_schedule_constraints_get_domain(sc);
705
130
  r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
706
130
  isl_union_set_free(domain);
707
130
  if (r < 0)
708
0
    return isl_stat_error;
709
130
  n_edge = isl_schedule_constraints_n_basic_map(sc);
710
130
  if (n_edge < 0)
711
0
    return isl_stat_error;
712
130
  graph->max_row = n_edge + graph->maxvar;
713
130
714
130
  return isl_stat_ok;
715
130
}
716
717
/* Does "bset" have any defining equalities for its set variables?
718
 */
719
static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
720
232
{
721
232
  int i, n;
722
232
723
232
  if (!bset)
724
0
    return isl_bool_error;
725
232
726
232
  n = isl_basic_set_dim(bset, isl_dim_set);
727
578
  for (i = 0; 
i < n578
;
++i346
)
{350
728
350
    isl_bool has;
729
350
730
350
    has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
731
350
              NULL);
732
350
    if (
has < 0 || 350
has350
)
733
4
      return has;
734
350
  }
735
232
736
228
  return isl_bool_false;
737
232
}
738
739
/* Set the entries of node->max to the value of the schedule_max_coefficient
740
 * option, if set.
741
 */
742
static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
743
4
{
744
4
  int max;
745
4
746
4
  max = isl_options_get_schedule_max_coefficient(ctx);
747
4
  if (max == -1)
748
4
    return isl_stat_ok;
749
4
750
0
  node->max = isl_vec_alloc(ctx, node->nvar);
751
0
  node->max = isl_vec_set_si(node->max, max);
752
0
  if (!node->max)
753
0
    return isl_stat_error;
754
0
755
0
  return isl_stat_ok;
756
0
}
757
758
/* Set the entries of node->max to the minimum of the schedule_max_coefficient
759
 * option (if set) and half of the minimum of the sizes in the other
760
 * dimensions.  If the minimum of the sizes is one, half of the size
761
 * is zero and this value is reset to one.
762
 * If the global minimum is unbounded (i.e., if both
763
 * the schedule_max_coefficient is not set and the sizes in the other
764
 * dimensions are unbounded), then store a negative value.
765
 * If the schedule coefficient is close to the size of the instance set
766
 * in another dimension, then the schedule may represent a loop
767
 * coalescing transformation (especially if the coefficient
768
 * in that other dimension is one).  Forcing the coefficient to be
769
 * smaller than or equal to half the minimal size should avoid this
770
 * situation.
771
 */
772
static isl_stat compute_max_coefficient(isl_ctx *ctx,
773
  struct isl_sched_node *node)
774
228
{
775
228
  int max;
776
228
  int i, j;
777
228
  isl_vec *v;
778
228
779
228
  max = isl_options_get_schedule_max_coefficient(ctx);
780
228
  v = isl_vec_alloc(ctx, node->nvar);
781
228
  if (!v)
782
0
    return isl_stat_error;
783
228
784
560
  
for (i = 0; 228
i < node->nvar560
;
++i332
)
{332
785
332
    isl_int_set_si(v->el[i], max);
786
332
    isl_int_mul_si(v->el[i], v->el[i], 2);
787
332
  }
788
228
789
560
  for (i = 0; 
i < node->nvar560
;
++i332
)
{332
790
332
    isl_val *size;
791
332
792
332
    size = isl_multi_val_get_val(node->sizes, i);
793
332
    if (!size)
794
0
      goto error;
795
332
    
if (332
!isl_val_is_int(size)332
)
{195
796
195
      isl_val_free(size);
797
195
      continue;
798
195
    }
799
434
    
for (j = 0; 137
j < node->nvar434
;
++j297
)
{297
800
297
      if (j == i)
801
137
        continue;
802
160
      
if (160
isl_int_is_neg160
(v->el[j]) ||160
803
139
          isl_int_gt(v->el[j], size->n))
804
23
        isl_int_set(v->el[j], size->n);
805
160
    }
806
137
    isl_val_free(size);
807
137
  }
808
228
809
560
  
for (i = 0; 228
i < node->nvar560
;
++i332
)
{332
810
332
    isl_int_fdiv_q_ui(v->el[i], v->el[i], 2);
811
332
    if (isl_int_is_zero(v->el[i]))
812
10
      isl_int_set_si(v->el[i], 1);
813
332
  }
814
228
815
228
  node->max = v;
816
228
  return isl_stat_ok;
817
0
error:
818
0
  isl_vec_free(v);
819
0
  return isl_stat_error;
820
228
}
821
822
/* Compute and return the size of "set" in dimension "dim".
823
 * The size is taken to be the difference in values for that variable
824
 * for fixed values of the other variables.
825
 * In particular, the variable is first isolated from the other variables
826
 * in the range of a map
827
 *
828
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
829
 *
830
 * and then duplicated
831
 *
832
 *  [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
833
 *
834
 * The shared variables are then projected out and the maximal value
835
 * of i_dim' - i_dim is computed.
836
 */
837
static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
838
332
{
839
332
  isl_map *map;
840
332
  isl_local_space *ls;
841
332
  isl_aff *obj;
842
332
  isl_val *v;
843
332
844
332
  map = isl_set_project_onto_map(set, isl_dim_set, dim, 1);
845
332
  map = isl_map_project_out(map, isl_dim_in, dim, 1);
846
332
  map = isl_map_range_product(map, isl_map_copy(map));
847
332
  map = isl_set_unwrap(isl_map_range(map));
848
332
  set = isl_map_deltas(map);
849
332
  ls = isl_local_space_from_space(isl_set_get_space(set));
850
332
  obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
851
332
  v = isl_set_max_val(set, obj);
852
332
  isl_aff_free(obj);
853
332
  isl_set_free(set);
854
332
855
332
  return v;
856
332
}
857
858
/* Compute the size of the instance set "set" of "node", after compression,
859
 * as well as bounds on the corresponding coefficients, if needed.
860
 *
861
 * The sizes are needed when the schedule_treat_coalescing option is set.
862
 * The bounds are needed when the schedule_treat_coalescing option or
863
 * the schedule_max_coefficient option is set.
864
 *
865
 * If the schedule_treat_coalescing option is not set, then at most
866
 * the bounds need to be set and this is done in set_max_coefficient.
867
 * Otherwise, compress the domain if needed, compute the size
868
 * in each direction and store the results in node->size.
869
 * Finally, set the bounds on the coefficients based on the sizes
870
 * and the schedule_max_coefficient option in compute_max_coefficient.
871
 */
872
static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
873
  __isl_take isl_set *set)
874
232
{
875
232
  int j, n;
876
232
  isl_multi_val *mv;
877
232
878
232
  if (
!isl_options_get_schedule_treat_coalescing(ctx)232
)
{4
879
4
    isl_set_free(set);
880
4
    return set_max_coefficient(ctx, node);
881
4
  }
882
232
883
228
  
if (228
node->compressed228
)
884
4
    set = isl_set_preimage_multi_aff(set,
885
4
          isl_multi_aff_copy(node->decompress));
886
228
  mv = isl_multi_val_zero(isl_set_get_space(set));
887
228
  n = isl_set_dim(set, isl_dim_set);
888
560
  for (j = 0; 
j < n560
;
++j332
)
{332
889
332
    isl_val *v;
890
332
891
332
    v = compute_size(isl_set_copy(set), j);
892
332
    mv = isl_multi_val_set_val(mv, j, v);
893
332
  }
894
228
  node->sizes = mv;
895
228
  isl_set_free(set);
896
228
  if (!node->sizes)
897
0
    return isl_stat_error;
898
228
  return compute_max_coefficient(ctx, node);
899
228
}
900
901
/* Add a new node to the graph representing the given instance set.
902
 * "nvar" is the (possibly compressed) number of variables and
903
 * may be smaller than then number of set variables in "set"
904
 * if "compressed" is set.
905
 * If "compressed" is set, then "hull" represents the constraints
906
 * that were used to derive the compression, while "compress" and
907
 * "decompress" map the original space to the compressed space and
908
 * vice versa.
909
 * If "compressed" is not set, then "hull", "compress" and "decompress"
910
 * should be NULL.
911
 *
912
 * Compute the size of the instance set and bounds on the coefficients,
913
 * if needed.
914
 */
915
static isl_stat add_node(struct isl_sched_graph *graph,
916
  __isl_take isl_set *set, int nvar, int compressed,
917
  __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
918
  __isl_take isl_multi_aff *decompress)
919
232
{
920
232
  int nparam;
921
232
  isl_ctx *ctx;
922
232
  isl_mat *sched;
923
232
  isl_space *space;
924
232
  int *coincident;
925
232
  struct isl_sched_node *node;
926
232
927
232
  if (!set)
928
0
    return isl_stat_error;
929
232
930
232
  ctx = isl_set_get_ctx(set);
931
232
  nparam = isl_set_dim(set, isl_dim_param);
932
232
  if (!ctx->opt->schedule_parametric)
933
24
    nparam = 0;
934
232
  sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
935
232
  node = &graph->node[graph->n];
936
232
  graph->n++;
937
232
  space = isl_set_get_space(set);
938
232
  node->space = space;
939
232
  node->nvar = nvar;
940
232
  node->nparam = nparam;
941
232
  node->sched = sched;
942
232
  node->sched_map = NULL;
943
232
  coincident = isl_calloc_array(ctx, int, graph->max_row);
944
232
  node->coincident = coincident;
945
232
  node->compressed = compressed;
946
232
  node->hull = hull;
947
232
  node->compress = compress;
948
232
  node->decompress = decompress;
949
232
  if (compute_sizes_and_max(ctx, node, set) < 0)
950
0
    return isl_stat_error;
951
232
952
232
  
if (232
!space || 232
!sched232
||
(graph->max_row && 232
!coincident230
))
953
0
    return isl_stat_error;
954
232
  
if (232
compressed && 232
(!hull || 4
!compress4
||
!decompress4
))
955
0
    return isl_stat_error;
956
232
957
232
  return isl_stat_ok;
958
232
}
959
960
/* Add a new node to the graph representing the given set.
961
 *
962
 * If any of the set variables is defined by an equality, then
963
 * we perform variable compression such that we can perform
964
 * the scheduling on the compressed domain.
965
 */
966
static isl_stat extract_node(__isl_take isl_set *set, void *user)
967
232
{
968
232
  int nvar;
969
232
  isl_bool has_equality;
970
232
  isl_basic_set *hull;
971
232
  isl_set *hull_set;
972
232
  isl_morph *morph;
973
232
  isl_multi_aff *compress, *decompress;
974
232
  struct isl_sched_graph *graph = user;
975
232
976
232
  hull = isl_set_affine_hull(isl_set_copy(set));
977
232
  hull = isl_basic_set_remove_divs(hull);
978
232
  nvar = isl_set_dim(set, isl_dim_set);
979
232
  has_equality = has_any_defining_equality(hull);
980
232
981
232
  if (has_equality < 0)
982
0
    goto error;
983
232
  
if (232
!has_equality232
)
{228
984
228
    isl_basic_set_free(hull);
985
228
    return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
986
228
  }
987
232
988
4
  morph = isl_basic_set_variable_compression(hull, isl_dim_set);
989
4
  nvar = isl_morph_ran_dim(morph, isl_dim_set);
990
4
  compress = isl_morph_get_var_multi_aff(morph);
991
4
  morph = isl_morph_inverse(morph);
992
4
  decompress = isl_morph_get_var_multi_aff(morph);
993
4
  isl_morph_free(morph);
994
4
995
4
  hull_set = isl_set_from_basic_set(hull);
996
4
  return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
997
0
error:
998
0
  isl_basic_set_free(hull);
999
0
  isl_set_free(set);
1000
0
  return isl_stat_error;
1001
232
}
1002
1003
struct isl_extract_edge_data {
1004
  enum isl_edge_type type;
1005
  struct isl_sched_graph *graph;
1006
};
1007
1008
/* Merge edge2 into edge1, freeing the contents of edge2.
1009
 * Return 0 on success and -1 on failure.
1010
 *
1011
 * edge1 and edge2 are assumed to have the same value for the map field.
1012
 */
1013
static int merge_edge(struct isl_sched_edge *edge1,
1014
  struct isl_sched_edge *edge2)
1015
343
{
1016
343
  edge1->types |= edge2->types;
1017
343
  isl_map_free(edge2->map);
1018
343
1019
343
  if (
is_condition(edge2)343
)
{20
1020
20
    if (!edge1->tagged_condition)
1021
20
      edge1->tagged_condition = edge2->tagged_condition;
1022
20
    else
1023
0
      edge1->tagged_condition =
1024
0
        isl_union_map_union(edge1->tagged_condition,
1025
0
                edge2->tagged_condition);
1026
20
  }
1027
343
1028
343
  if (
is_conditional_validity(edge2)343
)
{3
1029
3
    if (!edge1->tagged_validity)
1030
3
      edge1->tagged_validity = edge2->tagged_validity;
1031
3
    else
1032
0
      edge1->tagged_validity =
1033
0
        isl_union_map_union(edge1->tagged_validity,
1034
0
                edge2->tagged_validity);
1035
3
  }
1036
343
1037
343
  if (
is_condition(edge2) && 343
!edge1->tagged_condition20
)
1038
0
    return -1;
1039
343
  
if (343
is_conditional_validity(edge2) && 343
!edge1->tagged_validity3
)
1040
0
    return -1;
1041
343
1042
343
  return 0;
1043
343
}
1044
1045
/* Insert dummy tags in domain and range of "map".
1046
 *
1047
 * In particular, if "map" is of the form
1048
 *
1049
 *  A -> B
1050
 *
1051
 * then return
1052
 *
1053
 *  [A -> dummy_tag] -> [B -> dummy_tag]
1054
 *
1055
 * where the dummy_tags are identical and equal to any dummy tags
1056
 * introduced by any other call to this function.
1057
 */
1058
static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1059
8
{
1060
8
  static char dummy;
1061
8
  isl_ctx *ctx;
1062
8
  isl_id *id;
1063
8
  isl_space *space;
1064
8
  isl_set *domain, *range;
1065
8
1066
8
  ctx = isl_map_get_ctx(map);
1067
8
1068
8
  id = isl_id_alloc(ctx, NULL, &dummy);
1069
8
  space = isl_space_params(isl_map_get_space(map));
1070
8
  space = isl_space_set_from_params(space);
1071
8
  space = isl_space_set_tuple_id(space, isl_dim_set, id);
1072
8
  space = isl_space_map_from_set(space);
1073
8
1074
8
  domain = isl_map_wrap(map);
1075
8
  range = isl_map_wrap(isl_map_universe(space));
1076
8
  map = isl_map_from_domain_and_range(domain, range);
1077
8
  map = isl_map_zip(map);
1078
8
1079
8
  return map;
1080
8
}
1081
1082
/* Given that at least one of "src" or "dst" is compressed, return
1083
 * a map between the spaces of these nodes restricted to the affine
1084
 * hull that was used in the compression.
1085
 */
1086
static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1087
  struct isl_sched_node *dst)
1088
18
{
1089
18
  isl_set *dom, *ran;
1090
18
1091
18
  if (src->compressed)
1092
10
    dom = isl_set_copy(src->hull);
1093
18
  else
1094
8
    dom = isl_set_universe(isl_space_copy(src->space));
1095
18
  if (dst->compressed)
1096
10
    ran = isl_set_copy(dst->hull);
1097
18
  else
1098
8
    ran = isl_set_universe(isl_space_copy(dst->space));
1099
18
1100
18
  return isl_map_from_domain_and_range(dom, ran);
1101
18
}
1102
1103
/* Intersect the domains of the nested relations in domain and range
1104
 * of "tagged" with "map".
1105
 */
1106
static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1107
  __isl_keep isl_map *map)
1108
10
{
1109
10
  isl_set *set;
1110
10
1111
10
  tagged = isl_map_zip(tagged);
1112
10
  set = isl_map_wrap(isl_map_copy(map));
1113
10
  tagged = isl_map_intersect_domain(tagged, set);
1114
10
  tagged = isl_map_zip(tagged);
1115
10
  return tagged;
1116
10
}
1117
1118
/* Return a pointer to the node that lives in the domain space of "map"
1119
 * or NULL if there is no such node.
1120
 */
1121
static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1122
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1123
603
{
1124
603
  struct isl_sched_node *node;
1125
603
  isl_space *space;
1126
603
1127
603
  space = isl_space_domain(isl_map_get_space(map));
1128
603
  node = graph_find_node(ctx, graph, space);
1129
603
  isl_space_free(space);
1130
603
1131
603
  return node;
1132
603
}
1133
1134
/* Return a pointer to the node that lives in the range space of "map"
1135
 * or NULL if there is no such node.
1136
 */
1137
static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1138
  struct isl_sched_graph *graph, __isl_keep isl_map *map)
1139
603
{
1140
603
  struct isl_sched_node *node;
1141
603
  isl_space *space;
1142
603
1143
603
  space = isl_space_range(isl_map_get_space(map));
1144
603
  node = graph_find_node(ctx, graph, space);
1145
603
  isl_space_free(space);
1146
603
1147
603
  return node;
1148
603
}
1149
1150
/* Add a new edge to the graph based on the given map
1151
 * and add it to data->graph->edge_table[data->type].
1152
 * If a dependence relation of a given type happens to be identical
1153
 * to one of the dependence relations of a type that was added before,
1154
 * then we don't create a new edge, but instead mark the original edge
1155
 * as also representing a dependence of the current type.
1156
 *
1157
 * Edges of type isl_edge_condition or isl_edge_conditional_validity
1158
 * may be specified as "tagged" dependence relations.  That is, "map"
1159
 * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1160
 * the dependence on iterations and a and b are tags.
1161
 * edge->map is set to the relation containing the elements i -> j,
1162
 * while edge->tagged_condition and edge->tagged_validity contain
1163
 * the union of all the "map" relations
1164
 * for which extract_edge is called that result in the same edge->map.
1165
 *
1166
 * If the source or the destination node is compressed, then
1167
 * intersect both "map" and "tagged" with the constraints that
1168
 * were used to construct the compression.
1169
 * This ensures that there are no schedule constraints defined
1170
 * outside of these domains, while the scheduler no longer has
1171
 * any control over those outside parts.
1172
 */
1173
static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1174
603
{
1175
603
  isl_ctx *ctx = isl_map_get_ctx(map);
1176
603
  struct isl_extract_edge_data *data = user;
1177
603
  struct isl_sched_graph *graph = data->graph;
1178
603
  struct isl_sched_node *src, *dst;
1179
603
  struct isl_sched_edge *edge;
1180
603
  isl_map *tagged = NULL;
1181
603
1182
603
  if (data->type == isl_edge_condition ||
1183
578
      
data->type == isl_edge_conditional_validity578
)
{55
1184
55
    if (
isl_map_can_zip(map)55
)
{47
1185
47
      tagged = isl_map_copy(map);
1186
47
      map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1187
8
    } else {
1188
8
      tagged = insert_dummy_tags(isl_map_copy(map));
1189
8
    }
1190
55
  }
1191
603
1192
603
  src = find_domain_node(ctx, graph, map);
1193
603
  dst = find_range_node(ctx, graph, map);
1194
603
1195
603
  if (
!src || 603
!dst603
)
{24
1196
24
    isl_map_free(map);
1197
24
    isl_map_free(tagged);
1198
24
    return isl_stat_ok;
1199
24
  }
1200
603
1201
579
  
if (579
src->compressed || 579
dst->compressed569
)
{18
1202
18
    isl_map *hull;
1203
18
    hull = extract_hull(src, dst);
1204
18
    if (tagged)
1205
10
      tagged = map_intersect_domains(tagged, hull);
1206
18
    map = isl_map_intersect(map, hull);
1207
18
  }
1208
579
1209
579
  graph->edge[graph->n_edge].src = src;
1210
579
  graph->edge[graph->n_edge].dst = dst;
1211
579
  graph->edge[graph->n_edge].map = map;
1212
579
  graph->edge[graph->n_edge].types = 0;
1213
579
  graph->edge[graph->n_edge].tagged_condition = NULL;
1214
579
  graph->edge[graph->n_edge].tagged_validity = NULL;
1215
579
  set_type(&graph->edge[graph->n_edge], data->type);
1216
579
  if (data->type == isl_edge_condition)
1217
25
    graph->edge[graph->n_edge].tagged_condition =
1218
25
          isl_union_map_from_map(tagged);
1219
579
  if (data->type == isl_edge_conditional_validity)
1220
30
    graph->edge[graph->n_edge].tagged_validity =
1221
30
          isl_union_map_from_map(tagged);
1222
579
1223
579
  edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1224
579
  if (
!edge579
)
{0
1225
0
    graph->n_edge++;
1226
0
    return isl_stat_error;
1227
0
  }
1228
579
  
if (579
edge == &graph->edge[graph->n_edge]579
)
1229
236
    return graph_edge_table_add(ctx, graph, data->type,
1230
236
            &graph->edge[graph->n_edge++]);
1231
579
1232
343
  
if (343
merge_edge(edge, &graph->edge[graph->n_edge]) < 0343
)
1233
0
    return -1;
1234
343
1235
343
  return graph_edge_table_add(ctx, graph, data->type, edge);
1236
343
}
1237
1238
/* Initialize the schedule graph "graph" from the schedule constraints "sc".
1239
 *
1240
 * The context is included in the domain before the nodes of
1241
 * the graphs are extracted in order to be able to exploit
1242
 * any possible additional equalities.
1243
 * Note that this intersection is only performed locally here.
1244
 */
1245
static isl_stat graph_init(struct isl_sched_graph *graph,
1246
  __isl_keep isl_schedule_constraints *sc)
1247
130
{
1248
130
  isl_ctx *ctx;
1249
130
  isl_union_set *domain;
1250
130
  isl_union_map *c;
1251
130
  struct isl_extract_edge_data data;
1252
130
  enum isl_edge_type i;
1253
130
  isl_stat r;
1254
130
1255
130
  if (!sc)
1256
0
    return isl_stat_error;
1257
130
1258
130
  ctx = isl_schedule_constraints_get_ctx(sc);
1259
130
1260
130
  domain = isl_schedule_constraints_get_domain(sc);
1261
130
  graph->n = isl_union_set_n_set(domain);
1262
130
  isl_union_set_free(domain);
1263
130
1264
130
  if (graph_alloc(ctx, graph, graph->n,
1265
130
      isl_schedule_constraints_n_map(sc)) < 0)
1266
0
    return isl_stat_error;
1267
130
1268
130
  
if (130
compute_max_row(graph, sc) < 0130
)
1269
0
    return isl_stat_error;
1270
130
  graph->root = 1;
1271
130
  graph->n = 0;
1272
130
  domain = isl_schedule_constraints_get_domain(sc);
1273
130
  domain = isl_union_set_intersect_params(domain,
1274
130
            isl_schedule_constraints_get_context(sc));
1275
130
  r = isl_union_set_foreach_set(domain, &extract_node, graph);
1276
130
  isl_union_set_free(domain);
1277
130
  if (r < 0)
1278
0
    return isl_stat_error;
1279
130
  
if (130
graph_init_table(ctx, graph) < 0130
)
1280
0
    return isl_stat_error;
1281
780
  
for (i = isl_edge_first; 130
i <= isl_edge_last780
;
++i650
)
{650
1282
650
    c = isl_schedule_constraints_get(sc, i);
1283
650
    graph->max_edge[i] = isl_union_map_n_map(c);
1284
650
    isl_union_map_free(c);
1285
650
    if (!c)
1286
0
      return isl_stat_error;
1287
650
  }
1288
130
  
if (130
graph_init_edge_tables(ctx, graph) < 0130
)
1289
0
    return isl_stat_error;
1290
130
  graph->n_edge = 0;
1291
130
  data.graph = graph;
1292
780
  for (i = isl_edge_first; 
i <= isl_edge_last780
;
++i650
)
{650
1293
650
    isl_stat r;
1294
650
1295
650
    data.type = i;
1296
650
    c = isl_schedule_constraints_get(sc, i);
1297
650
    r = isl_union_map_foreach_map(c, &extract_edge, &data);
1298
650
    isl_union_map_free(c);
1299
650
    if (r < 0)
1300
0
      return isl_stat_error;
1301
650
  }
1302
130
1303
130
  return isl_stat_ok;
1304
130
}
1305
1306
/* Check whether there is any dependence from node[j] to node[i]
1307
 * or from node[i] to node[j].
1308
 */
1309
static isl_bool node_follows_weak(int i, int j, void *user)
1310
206
{
1311
206
  isl_bool f;
1312
206
  struct isl_sched_graph *graph = user;
1313
206
1314
206
  f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
1315
206
  if (
f < 0 || 206
f206
)
1316
106
    return f;
1317
100
  return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
1318
206
}
1319
1320
/* Check whether there is a (conditional) validity dependence from node[j]
1321
 * to node[i], forcing node[i] to follow node[j].
1322
 */
1323
static isl_bool node_follows_strong(int i, int j, void *user)
1324
295
{
1325
295
  struct isl_sched_graph *graph = user;
1326
295
1327
295
  return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
1328
295
}
1329
1330
/* Use Tarjan's algorithm for computing the strongly connected components
1331
 * in the dependence graph only considering those edges defined by "follows".
1332
 */
1333
static int detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
1334
  isl_bool (*follows)(int i, int j, void *user))
1335
393
{
1336
393
  int i, n;
1337
393
  struct isl_tarjan_graph *g = NULL;
1338
393
1339
393
  g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
1340
393
  if (!g)
1341
0
    return -1;
1342
393
1343
393
  graph->scc = 0;
1344
393
  i = 0;
1345
393
  n = graph->n;
1346
911
  while (
n911
)
{518
1347
1.17k
    while (
g->order[i] != -11.17k
)
{652
1348
652
      graph->node[g->order[i]].scc = graph->scc;
1349
652
      --n;
1350
652
      ++i;
1351
652
    }
1352
518
    ++i;
1353
518
    graph->scc++;
1354
518
  }
1355
393
1356
393
  isl_tarjan_graph_free(g);
1357
393
1358
393
  return 0;
1359
393
}
1360
1361
/* Apply Tarjan's algorithm to detect the strongly connected components
1362
 * in the dependence graph.
1363
 * Only consider the (conditional) validity dependences and clear "weak".
1364
 */
1365
static int detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1366
250
{
1367
250
  graph->weak = 0;
1368
250
  return detect_ccs(ctx, graph, &node_follows_strong);
1369
250
}
1370
1371
/* Apply Tarjan's algorithm to detect the (weakly) connected components
1372
 * in the dependence graph.
1373
 * Consider all dependences and set "weak".
1374
 */
1375
static int detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1376
128
{
1377
128
  graph->weak = 1;
1378
128
  return detect_ccs(ctx, graph, &node_follows_weak);
1379
128
}
1380
1381
static int cmp_scc(const void *a, const void *b, void *data)
1382
89
{
1383
89
  struct isl_sched_graph *graph = data;
1384
89
  const int *i1 = a;
1385
89
  const int *i2 = b;
1386
89
1387
89
  return graph->node[*i1].scc - graph->node[*i2].scc;
1388
89
}
1389
1390
/* Sort the elements of graph->sorted according to the corresponding SCCs.
1391
 */
1392
static int sort_sccs(struct isl_sched_graph *graph)
1393
214
{
1394
214
  return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
1395
214
}
1396
1397
/* Given a dependence relation R from "node" to itself,
1398
 * construct the set of coefficients of valid constraints for elements
1399
 * in that dependence relation.
1400
 * In particular, the result contains tuples of coefficients
1401
 * c_0, c_n, c_x such that
1402
 *
1403
 *  c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
1404
 *
1405
 * or, equivalently,
1406
 *
1407
 *  c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
1408
 *
1409
 * We choose here to compute the dual of delta R.
1410
 * Alternatively, we could have computed the dual of R, resulting
1411
 * in a set of tuples c_0, c_n, c_x, c_y, and then
1412
 * plugged in (c_0, c_n, c_x, -c_x).
1413
 *
1414
 * If "node" has been compressed, then the dependence relation
1415
 * is also compressed before the set of coefficients is computed.
1416
 */
1417
static __isl_give isl_basic_set *intra_coefficients(
1418
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1419
  __isl_take isl_map *map)
1420
891
{
1421
891
  isl_set *delta;
1422
891
  isl_map *key;
1423
891
  isl_basic_set *coef;
1424
891
  isl_maybe_isl_basic_set m;
1425
891
1426
891
  m = isl_map_to_basic_set_try_get(graph->intra_hmap, map);
1427
891
  if (
m.valid < 0 || 891
m.valid891
)
{763
1428
763
    isl_map_free(map);
1429
763
    return m.value;
1430
763
  }
1431
891
1432
128
  key = isl_map_copy(map);
1433
128
  if (
node->compressed128
)
{0
1434
0
    map = isl_map_preimage_domain_multi_aff(map,
1435
0
            isl_multi_aff_copy(node->decompress));
1436
0
    map = isl_map_preimage_range_multi_aff(map,
1437
0
            isl_multi_aff_copy(node->decompress));
1438
0
  }
1439
128
  delta = isl_set_remove_divs(isl_map_deltas(map));
1440
128
  coef = isl_set_coefficients(delta);
1441
128
  graph->intra_hmap = isl_map_to_basic_set_set(graph->intra_hmap, key,
1442
128
          isl_basic_set_copy(coef));
1443
128
1444
128
  return coef;
1445
891
}
1446
1447
/* Given a dependence relation R, construct the set of coefficients
1448
 * of valid constraints for elements in that dependence relation.
1449
 * In particular, the result contains tuples of coefficients
1450
 * c_0, c_n, c_x, c_y such that
1451
 *
1452
 *  c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
1453
 *
1454
 * If the source or destination nodes of "edge" have been compressed,
1455
 * then the dependence relation is also compressed before
1456
 * the set of coefficients is computed.
1457
 */
1458
static __isl_give isl_basic_set *inter_coefficients(
1459
  struct isl_sched_graph *graph, struct isl_sched_edge *edge,
1460
  __isl_take isl_map *map)
1461
619
{
1462
619
  isl_set *set;
1463
619
  isl_map *key;
1464
619
  isl_basic_set *coef;
1465
619
  isl_maybe_isl_basic_set m;
1466
619
1467
619
  m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
1468
619
  if (
m.valid < 0 || 619
m.valid619
)
{530
1469
530
    isl_map_free(map);
1470
530
    return m.value;
1471
530
  }
1472
619
1473
89
  key = isl_map_copy(map);
1474
89
  if (edge->src->compressed)
1475
2
    map = isl_map_preimage_domain_multi_aff(map,
1476
2
            isl_multi_aff_copy(edge->src->decompress));
1477
89
  if (edge->dst->compressed)
1478
2
    map = isl_map_preimage_range_multi_aff(map,
1479
2
            isl_multi_aff_copy(edge->dst->decompress));
1480
89
  set = isl_map_wrap(isl_map_remove_divs(map));
1481
89
  coef = isl_set_coefficients(set);
1482
89
  graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
1483
89
          isl_basic_set_copy(coef));
1484
89
1485
89
  return coef;
1486
619
}
1487
1488
/* Return the position of the coefficients of the variables in
1489
 * the coefficients constraints "coef".
1490
 *
1491
 * The space of "coef" is of the form
1492
 *
1493
 *  { coefficients[[cst, params] -> S] }
1494
 *
1495
 * Return the position of S.
1496
 */
1497
static int coef_var_offset(__isl_keep isl_basic_set *coef)
1498
982
{
1499
982
  int offset;
1500
982
  isl_space *space;
1501
982
1502
982
  space = isl_space_unwrap(isl_basic_set_get_space(coef));
1503
982
  offset = isl_space_dim(space, isl_dim_in);
1504
982
  isl_space_free(space);
1505
982
1506
982
  return offset;
1507
982
}
1508
1509
/* Return the offset of the coefficients of the variables of "node"
1510
 * within the (I)LP.
1511
 *
1512
 * Within each node, the coefficients have the following order:
1513
 *  - c_i_0
1514
 *  - c_i_n (if parametric)
1515
 *  - positive and negative parts of c_i_x
1516
 */
1517
static int node_var_coef_offset(struct isl_sched_node *node)
1518
3.83k
{
1519
3.83k
  return node->start + 1 + node->nparam;
1520
3.83k
}
1521
1522
/* Construct an isl_dim_map for mapping constraints on coefficients
1523
 * for "node" to the corresponding positions in graph->lp.
1524
 * "offset" is the offset of the coefficients for the variables
1525
 * in the input constraints.
1526
 * "s" is the sign of the mapping.
1527
 *
1528
 * The input constraints are given in terms of the coefficients (c_0, c_n, c_x).
1529
 * The mapping produced by this function essentially plugs in
1530
 * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
1531
 * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
1532
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1533
 *
1534
 * The caller can extend the mapping to also map the other coefficients
1535
 * (and therefore not plug in 0).
1536
 */
1537
static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
1538
  struct isl_sched_graph *graph, struct isl_sched_node *node,
1539
  int offset, int s)
1540
577
{
1541
577
  int pos;
1542
577
  unsigned total;
1543
577
  isl_dim_map *dim_map;
1544
577
1545
577
  total = isl_basic_set_total_dim(graph->lp);
1546
577
  pos = node_var_coef_offset(node);
1547
577
  dim_map = isl_dim_map_alloc(ctx, total);
1548
577
  isl_dim_map_range(dim_map, pos, 2, offset, 1, node->nvar, -s);
1549
577
  isl_dim_map_range(dim_map, pos + 1, 2, offset, 1, node->nvar, s);
1550
577
1551
577
  return dim_map;
1552
577
}
1553
1554
/* Construct an isl_dim_map for mapping constraints on coefficients
1555
 * for "src" (node i) and "dst" (node j) to the corresponding positions
1556
 * in graph->lp.
1557
 * "offset" is the offset of the coefficients for the variables of "src"
1558
 * in the input constraints.
1559
 * "s" is the sign of the mapping.
1560
 *
1561
 * The input constraints are given in terms of the coefficients
1562
 * (c_0, c_n, c_x, c_y).
1563
 * The mapping produced by this function essentially plugs in
1564
 * (c_j_0 - c_i_0, c_j_n - c_i_n,
1565
 *  c_j_x^+ - c_j_x^-, -(c_i_x^+ - c_i_x^-)) if s = 1 and
1566
 * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
1567
 *  - (c_j_x^+ - c_j_x^-), c_i_x^+ - c_i_x^-) if s = -1.
1568
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1569
 *
1570
 * The caller can further extend the mapping.
1571
 */
1572
static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
1573
  struct isl_sched_graph *graph, struct isl_sched_node *src,
1574
  struct isl_sched_node *dst, int offset, int s)
1575
405
{
1576
405
  int pos;
1577
405
  unsigned total;
1578
405
  isl_dim_map *dim_map;
1579
405
1580
405
  total = isl_basic_set_total_dim(graph->lp);
1581
405
  dim_map = isl_dim_map_alloc(ctx, total);
1582
405
1583
405
  isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, s);
1584
405
  isl_dim_map_range(dim_map, dst->start + 1, 1, 1, 1, dst->nparam, s);
1585
405
  pos = node_var_coef_offset(dst);
1586
405
  isl_dim_map_range(dim_map, pos, 2, offset + src->nvar, 1,
1587
405
        dst->nvar, -s);
1588
405
  isl_dim_map_range(dim_map, pos + 1, 2, offset + src->nvar, 1,
1589
405
        dst->nvar, s);
1590
405
1591
405
  isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -s);
1592
405
  isl_dim_map_range(dim_map, src->start + 1, 1, 1, 1, src->nparam, -s);
1593
405
  pos = node_var_coef_offset(src);
1594
405
  isl_dim_map_range(dim_map, pos, 2, offset, 1, src->nvar, s);
1595
405
  isl_dim_map_range(dim_map, pos + 1, 2, offset, 1, src->nvar, -s);
1596
405
1597
405
  return dim_map;
1598
405
}
1599
1600
/* Add constraints to graph->lp that force validity for the given
1601
 * dependence from a node i to itself.
1602
 * That is, add constraints that enforce
1603
 *
1604
 *  (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
1605
 *  = c_i_x (y - x) >= 0
1606
 *
1607
 * for each (x,y) in R.
1608
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
1609
 * of valid constraints for (y - x) and then plug in (0, 0, c_i_x^+ - c_i_x^-),
1610
 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
1611
 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1612
 *
1613
 * Actually, we do not construct constraints for the c_i_x themselves,
1614
 * but for the coefficients of c_i_x written as a linear combination
1615
 * of the columns in node->cmap.
1616
 */
1617
static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
1618
  struct isl_sched_edge *edge)
1619
261
{
1620
261
  int offset;
1621
261
  isl_map *map = isl_map_copy(edge->map);
1622
261
  isl_ctx *ctx = isl_map_get_ctx(map);
1623
261
  isl_dim_map *dim_map;
1624
261
  isl_basic_set *coef;
1625
261
  struct isl_sched_node *node = edge->src;
1626
261
1627
261
  coef = intra_coefficients(graph, node, map);
1628
261
1629
261
  offset = coef_var_offset(coef);
1630
261
1631
261
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1632
261
              offset, isl_mat_copy(node->cmap));
1633
261
  if (!coef)
1634
0
    return isl_stat_error;
1635
261
1636
261
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
1637
261
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
1638
261
      coef->n_eq, coef->n_ineq);
1639
261
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
1640
261
                 coef, dim_map);
1641
261
1642
261
  return isl_stat_ok;
1643
261
}
1644
1645
/* Add constraints to graph->lp that force validity for the given
1646
 * dependence from node i to node j.
1647
 * That is, add constraints that enforce
1648
 *
1649
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
1650
 *
1651
 * for each (x,y) in R.
1652
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1653
 * of valid constraints for R and then plug in
1654
 * (c_j_0 - c_i_0, c_j_n - c_i_n, c_j_x^+ - c_j_x^- - (c_i_x^+ - c_i_x^-)),
1655
 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
1656
 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1657
 *
1658
 * Actually, we do not construct constraints for the c_*_x themselves,
1659
 * but for the coefficients of c_*_x written as a linear combination
1660
 * of the columns in node->cmap.
1661
 */
1662
static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
1663
  struct isl_sched_edge *edge)
1664
191
{
1665
191
  int offset;
1666
191
  isl_map *map = isl_map_copy(edge->map);
1667
191
  isl_ctx *ctx = isl_map_get_ctx(map);
1668
191
  isl_dim_map *dim_map;
1669
191
  isl_basic_set *coef;
1670
191
  struct isl_sched_node *src = edge->src;
1671
191
  struct isl_sched_node *dst = edge->dst;
1672
191
1673
191
  coef = inter_coefficients(graph, edge, map);
1674
191
1675
191
  offset = coef_var_offset(coef);
1676
191
1677
191
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1678
191
        offset, isl_mat_copy(src->cmap));
1679
191
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1680
191
        offset + src->nvar, isl_mat_copy(dst->cmap));
1681
191
  if (!coef)
1682
0
    return isl_stat_error;
1683
191
1684
191
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
1685
191
1686
191
  edge->start = graph->lp->n_ineq;
1687
191
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
1688
191
      coef->n_eq, coef->n_ineq);
1689
191
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
1690
191
                 coef, dim_map);
1691
191
  if (!graph->lp)
1692
0
    return isl_stat_error;
1693
191
  edge->end = graph->lp->n_ineq;
1694
191
1695
191
  return isl_stat_ok;
1696
191
}
1697
1698
/* Add constraints to graph->lp that bound the dependence distance for the given
1699
 * dependence from a node i to itself.
1700
 * If s = 1, we add the constraint
1701
 *
1702
 *  c_i_x (y - x) <= m_0 + m_n n
1703
 *
1704
 * or
1705
 *
1706
 *  -c_i_x (y - x) + m_0 + m_n n >= 0
1707
 *
1708
 * for each (x,y) in R.
1709
 * If s = -1, we add the constraint
1710
 *
1711
 *  -c_i_x (y - x) <= m_0 + m_n n
1712
 *
1713
 * or
1714
 *
1715
 *  c_i_x (y - x) + m_0 + m_n n >= 0
1716
 *
1717
 * for each (x,y) in R.
1718
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
1719
 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
1720
 * with each coefficient (except m_0) represented as a pair of non-negative
1721
 * coefficients.
1722
 *
1723
 * Actually, we do not construct constraints for the c_i_x themselves,
1724
 * but for the coefficients of c_i_x written as a linear combination
1725
 * of the columns in node->cmap.
1726
 *
1727
 *
1728
 * If "local" is set, then we add constraints
1729
 *
1730
 *  c_i_x (y - x) <= 0
1731
 *
1732
 * or
1733
 *
1734
 *  -c_i_x (y - x) <= 0
1735
 *
1736
 * instead, forcing the dependence distance to be (less than or) equal to 0.
1737
 * That is, we plug in (0, 0, -s * c_i_x),
1738
 * Note that dependences marked local are treated as validity constraints
1739
 * by add_all_validity_constraints and therefore also have
1740
 * their distances bounded by 0 from below.
1741
 */
1742
static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
1743
  struct isl_sched_edge *edge, int s, int local)
1744
271
{
1745
271
  int offset;
1746
271
  unsigned nparam;
1747
271
  isl_map *map = isl_map_copy(edge->map);
1748
271
  isl_ctx *ctx = isl_map_get_ctx(map);
1749
271
  isl_dim_map *dim_map;
1750
271
  isl_basic_set *coef;
1751
271
  struct isl_sched_node *node = edge->src;
1752
271
1753
271
  coef = intra_coefficients(graph, node, map);
1754
271
1755
271
  offset = coef_var_offset(coef);
1756
271
1757
271
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1758
271
              offset, isl_mat_copy(node->cmap));
1759
271
  if (!coef)
1760
0
    return isl_stat_error;
1761
271
1762
271
  nparam = isl_space_dim(node->space, isl_dim_param);
1763
271
  dim_map = intra_dim_map(ctx, graph, node, offset, -s);
1764
271
1765
271
  if (
!local271
)
{140
1766
140
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
1767
140
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
1768
140
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
1769
140
  }
1770
271
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
1771
271
      coef->n_eq, coef->n_ineq);
1772
271
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
1773
271
                 coef, dim_map);
1774
271
1775
271
  return isl_stat_ok;
1776
271
}
1777
1778
/* Add constraints to graph->lp that bound the dependence distance for the given
1779
 * dependence from node i to node j.
1780
 * If s = 1, we add the constraint
1781
 *
1782
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
1783
 *    <= m_0 + m_n n
1784
 *
1785
 * or
1786
 *
1787
 *  -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
1788
 *    m_0 + m_n n >= 0
1789
 *
1790
 * for each (x,y) in R.
1791
 * If s = -1, we add the constraint
1792
 *
1793
 *  -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
1794
 *    <= m_0 + m_n n
1795
 *
1796
 * or
1797
 *
1798
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
1799
 *    m_0 + m_n n >= 0
1800
 *
1801
 * for each (x,y) in R.
1802
 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1803
 * of valid constraints for R and then plug in
1804
 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
1805
 *  -s*c_j_x+s*c_i_x)
1806
 * with each coefficient (except m_0, c_*_0 and c_*_n)
1807
 * represented as a pair of non-negative coefficients.
1808
 *
1809
 * Actually, we do not construct constraints for the c_*_x themselves,
1810
 * but for the coefficients of c_*_x written as a linear combination
1811
 * of the columns in node->cmap.
1812
 *
1813
 *
1814
 * If "local" is set, then we add constraints
1815
 *
1816
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
1817
 *
1818
 * or
1819
 *
1820
 *  -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)) <= 0
1821
 *
1822
 * instead, forcing the dependence distance to be (less than or) equal to 0.
1823
 * That is, we plug in
1824
 * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, -s*c_j_x+s*c_i_x).
1825
 * Note that dependences marked local are treated as validity constraints
1826
 * by add_all_validity_constraints and therefore also have
1827
 * their distances bounded by 0 from below.
1828
 */
1829
static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
1830
  struct isl_sched_edge *edge, int s, int local)
1831
200
{
1832
200
  int offset;
1833
200
  unsigned nparam;
1834
200
  isl_map *map = isl_map_copy(edge->map);
1835
200
  isl_ctx *ctx = isl_map_get_ctx(map);
1836
200
  isl_dim_map *dim_map;
1837
200
  isl_basic_set *coef;
1838
200
  struct isl_sched_node *src = edge->src;
1839
200
  struct isl_sched_node *dst = edge->dst;
1840
200
1841
200
  coef = inter_coefficients(graph, edge, map);
1842
200
1843
200
  offset = coef_var_offset(coef);
1844
200
1845
200
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1846
200
        offset, isl_mat_copy(src->cmap));
1847
200
  coef = isl_basic_set_transform_dims(coef, isl_dim_set,
1848
200
        offset + src->nvar, isl_mat_copy(dst->cmap));
1849
200
  if (!coef)
1850
0
    return isl_stat_error;
1851
200
1852
200
  nparam = isl_space_dim(src->space, isl_dim_param);
1853
200
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
1854
200
1855
200
  if (
!local200
)
{119
1856
119
    isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
1857
119
    isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
1858
119
    isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
1859
119
  }
1860
200
1861
200
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
1862
200
      coef->n_eq, coef->n_ineq);
1863
200
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
1864
200
                 coef, dim_map);
1865
200
1866
200
  return isl_stat_ok;
1867
200
}
1868
1869
/* Add all validity constraints to graph->lp.
1870
 *
1871
 * An edge that is forced to be local needs to have its dependence
1872
 * distances equal to zero.  We take care of bounding them by 0 from below
1873
 * here.  add_all_proximity_constraints takes care of bounding them by 0
1874
 * from above.
1875
 *
1876
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
1877
 * Otherwise, we ignore them.
1878
 */
1879
static int add_all_validity_constraints(struct isl_sched_graph *graph,
1880
  int use_coincidence)
1881
367
{
1882
367
  int i;
1883
367
1884
925
  for (i = 0; 
i < graph->n_edge925
;
++i558
)
{558
1885
558
    struct isl_sched_edge *edge= &graph->edge[i];
1886
558
    int local;
1887
558
1888
558
    local = is_local(edge) ||
1889
535
      
(is_coincidence(edge) && 535
use_coincidence322
);
1890
558
    if (
!is_validity(edge) && 558
!local109
)
1891
106
      continue;
1892
452
    
if (452
edge->src != edge->dst452
)
1893
191
      continue;
1894
261
    
if (261
add_intra_validity_constraints(graph, edge) < 0261
)
1895
0
      return -1;
1896
261
  }
1897
367
1898
925
  
for (i = 0; 367
i < graph->n_edge925
;
++i558
)
{558
1899
558
    struct isl_sched_edge *edge = &graph->edge[i];
1900
558
    int local;
1901
558
1902
558
    local = is_local(edge) ||
1903
535
      
(is_coincidence(edge) && 535
use_coincidence322
);
1904
558
    if (
!is_validity(edge) && 558
!local109
)
1905
106
      continue;
1906
452
    
if (452
edge->src == edge->dst452
)
1907
261
      continue;
1908
191
    
if (191
add_inter_validity_constraints(graph, edge) < 0191
)
1909
0
      return -1;
1910
191
  }
1911
367
1912
367
  return 0;
1913
367
}
1914
1915
/* Add constraints to graph->lp that bound the dependence distance
1916
 * for all dependence relations.
1917
 * If a given proximity dependence is identical to a validity
1918
 * dependence, then the dependence distance is already bounded
1919
 * from below (by zero), so we only need to bound the distance
1920
 * from above.  (This includes the case of "local" dependences
1921
 * which are treated as validity dependence by add_all_validity_constraints.)
1922
 * Otherwise, we need to bound the distance both from above and from below.
1923
 *
1924
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
1925
 * Otherwise, we ignore them.
1926
 */
1927
static int add_all_proximity_constraints(struct isl_sched_graph *graph,
1928
  int use_coincidence)
1929
367
{
1930
367
  int i;
1931
367
1932
925
  for (i = 0; 
i < graph->n_edge925
;
++i558
)
{558
1933
558
    struct isl_sched_edge *edge= &graph->edge[i];
1934
558
    int local;
1935
558
1936
558
    local = is_local(edge) ||
1937
535
      
(is_coincidence(edge) && 535
use_coincidence322
);
1938
558
    if (
!is_proximity(edge) && 558
!local111
)
1939
104
      continue;
1940
454
    
if (454
edge->src == edge->dst &&454
1941
263
        add_intra_proximity_constraints(graph, edge, 1, local) < 0)
1942
0
      return -1;
1943
454
    
if (454
edge->src != edge->dst &&454
1944
191
        add_inter_proximity_constraints(graph, edge, 1, local) < 0)
1945
0
      return -1;
1946
454
    
if (454
is_validity(edge) || 454
local20
)
1947
437
      continue;
1948
17
    
if (17
edge->src == edge->dst &&17
1949
8
        add_intra_proximity_constraints(graph, edge, -1, 0) < 0)
1950
0
      return -1;
1951
17
    
if (17
edge->src != edge->dst &&17
1952
9
        add_inter_proximity_constraints(graph, edge, -1, 0) < 0)
1953
0
      return -1;
1954
17
  }
1955
367
1956
367
  return 0;
1957
367
}
1958
1959
/* Compute a basis for the rows in the linear part of the schedule
1960
 * and extend this basis to a full basis.  The remaining rows
1961
 * can then be used to force linear independence from the rows
1962
 * in the schedule.
1963
 *
1964
 * In particular, given the schedule rows S, we compute
1965
 *
1966
 *  S   = H Q
1967
 *  S U = H
1968
 *
1969
 * with H the Hermite normal form of S.  That is, all but the
1970
 * first rank columns of H are zero and so each row in S is
1971
 * a linear combination of the first rank rows of Q.
1972
 * The matrix Q is then transposed because we will write the
1973
 * coefficients of the next schedule row as a column vector s
1974
 * and express this s as a linear combination s = Q c of the
1975
 * computed basis.
1976
 * Similarly, the matrix U is transposed such that we can
1977
 * compute the coefficients c = U s from a schedule row s.
1978
 */
1979
static int node_update_cmap(struct isl_sched_node *node)
1980
933
{
1981
933
  isl_mat *H, *U, *Q;
1982
933
  int n_row = isl_mat_rows(node->sched);
1983
933
1984
933
  H = isl_mat_sub_alloc(node->sched, 0, n_row,
1985
933
            1 + node->nparam, node->nvar);
1986
933
1987
933
  H = isl_mat_left_hermite(H, 0, &U, &Q);
1988
933
  isl_mat_free(node->cmap);
1989
933
  isl_mat_free(node->cinv);
1990
933
  isl_mat_free(node->ctrans);
1991
933
  node->ctrans = isl_mat_copy(Q);
1992
933
  node->cmap = isl_mat_transpose(Q);
1993
933
  node->cinv = isl_mat_transpose(U);
1994
933
  node->rank = isl_mat_initial_non_zero_cols(H);
1995
933
  isl_mat_free(H);
1996
933
1997
933
  if (
!node->cmap || 933
!node->cinv933
||
!node->ctrans933
||
node->rank < 0933
)
1998
0
    return -1;
1999
933
  return 0;
2000
933
}
2001
2002
/* Is "edge" marked as a validity or a conditional validity edge?
2003
 */
2004
static int is_any_validity(struct isl_sched_edge *edge)
2005
132
{
2006
3
  return is_validity(edge) || is_conditional_validity(edge);
2007
132
}
2008
2009
/* How many times should we count the constraints in "edge"?
2010
 *
2011
 * If carry is set, then we are counting the number of
2012
 * (validity or conditional validity) constraints that will be added
2013
 * in setup_carry_lp and we count each edge exactly once.
2014
 *
2015
 * Otherwise, we count as follows
2016
 * validity   -> 1 (>= 0)
2017
 * validity+proximity -> 2 (>= 0 and upper bound)
2018
 * proximity    -> 2 (lower and upper bound)
2019
 * local(+any)    -> 2 (>= 0 and <= 0)
2020
 *
2021
 * If an edge is only marked conditional_validity then it counts
2022
 * as zero since it is only checked afterwards.
2023
 *
2024
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2025
 * Otherwise, we ignore them.
2026
 */
2027
static int edge_multiplicity(struct isl_sched_edge *edge, int carry,
2028
  int use_coincidence)
2029
617
{
2030
617
  if (carry)
2031
59
    return 1;
2032
558
  
if (558
is_proximity(edge) || 558
is_local(edge)111
)
2033
450
    return 2;
2034
108
  
if (108
use_coincidence && 108
is_coincidence(edge)4
)
2035
4
    return 2;
2036
104
  
if (104
is_validity(edge)104
)
2037
15
    return 1;
2038
89
  return 0;
2039
104
}
2040
2041
/* Count the number of equality and inequality constraints
2042
 * that will be added for the given map.
2043
 *
2044
 * "use_coincidence" is set if we should take into account coincidence edges.
2045
 */
2046
static isl_stat count_map_constraints(struct isl_sched_graph *graph,
2047
  struct isl_sched_edge *edge, __isl_take isl_map *map,
2048
  int *n_eq, int *n_ineq, int carry, int use_coincidence)
2049
617
{
2050
617
  isl_basic_set *coef;
2051
617
  int f = edge_multiplicity(edge, carry, use_coincidence);
2052
617
2053
617
  if (
f == 0617
)
{89
2054
89
    isl_map_free(map);
2055
89
    return isl_stat_ok;
2056
89
  }
2057
617
2058
528
  
if (528
edge->src == edge->dst528
)
2059
314
    coef = intra_coefficients(graph, edge->src, map);
2060
528
  else
2061
214
    coef = inter_coefficients(graph, edge, map);
2062
528
  if (!coef)
2063
0
    return isl_stat_error;
2064
528
  *n_eq += f * coef->n_eq;
2065
528
  *n_ineq += f * coef->n_ineq;
2066
528
  isl_basic_set_free(coef);
2067
528
2068
528
  return isl_stat_ok;
2069
528
}
2070
2071
/* Count the number of equality and inequality constraints
2072
 * that will be added to the main lp problem.
2073
 * We count as follows
2074
 * validity   -> 1 (>= 0)
2075
 * validity+proximity -> 2 (>= 0 and upper bound)
2076
 * proximity    -> 2 (lower and upper bound)
2077
 * local(+any)    -> 2 (>= 0 and <= 0)
2078
 *
2079
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2080
 * Otherwise, we ignore them.
2081
 */
2082
static int count_constraints(struct isl_sched_graph *graph,
2083
  int *n_eq, int *n_ineq, int use_coincidence)
2084
367
{
2085
367
  int i;
2086
367
2087
367
  *n_eq = *n_ineq = 0;
2088
925
  for (i = 0; 
i < graph->n_edge925
;
++i558
)
{558
2089
558
    struct isl_sched_edge *edge= &graph->edge[i];
2090
558
    isl_map *map = isl_map_copy(edge->map);
2091
558
2092
558
    if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
2093
558
              0, use_coincidence) < 0)
2094
0
      return -1;
2095
558
  }
2096
367
2097
367
  return 0;
2098
367
}
2099
2100
/* Count the number of constraints that will be added by
2101
 * add_bound_constant_constraints to bound the values of the constant terms
2102
 * and increment *n_eq and *n_ineq accordingly.
2103
 *
2104
 * In practice, add_bound_constant_constraints only adds inequalities.
2105
 */
2106
static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
2107
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2108
367
{
2109
367
  if (isl_options_get_schedule_max_constant_term(ctx) == -1)
2110
246
    return isl_stat_ok;
2111
367
2112
121
  *n_ineq += graph->n;
2113
121
2114
121
  return isl_stat_ok;
2115
367
}
2116
2117
/* Add constraints to bound the values of the constant terms in the schedule,
2118
 * if requested by the user.
2119
 *
2120
 * The maximal value of the constant terms is defined by the option
2121
 * "schedule_max_constant_term".
2122
 *
2123
 * Within each node, the coefficients have the following order:
2124
 *  - c_i_0
2125
 *  - c_i_n (if parametric)
2126
 *  - positive and negative parts of c_i_x
2127
 */
2128
static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
2129
  struct isl_sched_graph *graph)
2130
367
{
2131
367
  int i, k;
2132
367
  int max;
2133
367
  int total;
2134
367
2135
367
  max = isl_options_get_schedule_max_constant_term(ctx);
2136
367
  if (max == -1)
2137
246
    return isl_stat_ok;
2138
367
2139
121
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2140
121
2141
250
  for (i = 0; 
i < graph->n250
;
++i129
)
{129
2142
129
    struct isl_sched_node *node = &graph->node[i];
2143
129
    k = isl_basic_set_alloc_inequality(graph->lp);
2144
129
    if (k < 0)
2145
0
      return isl_stat_error;
2146
129
    isl_seq_clr(graph->lp->ineq[k], 1 +  total);
2147
129
    isl_int_set_si(graph->lp->ineq[k][1 + node->start], -1);
2148
129
    isl_int_set_si(graph->lp->ineq[k][0], max);
2149
129
  }
2150
121
2151
121
  return isl_stat_ok;
2152
121
}
2153
2154
/* Count the number of constraints that will be added by
2155
 * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
2156
 * accordingly.
2157
 *
2158
 * In practice, add_bound_coefficient_constraints only adds inequalities.
2159
 */
2160
static int count_bound_coefficient_constraints(isl_ctx *ctx,
2161
  struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2162
367
{
2163
367
  int i;
2164
367
2165
367
  if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
2166
245
      !isl_options_get_schedule_treat_coalescing(ctx))
2167
8
    return 0;
2168
367
2169
877
  
for (i = 0; 359
i < graph->n877
;
++i518
)
2170
518
    *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
2171
359
2172
359
  return 0;
2173
367
}
2174
2175
/* Add constraints to graph->lp that bound the values of
2176
 * the parameter schedule coefficients of "node" to "max" and
2177
 * the variable schedule coefficients to the corresponding entry
2178
 * in node->max.
2179
 * In either case, a negative value means that no bound needs to be imposed.
2180
 *
2181
 * For parameter coefficients, this amounts to adding a constraint
2182
 *
2183
 *  c_n <= max
2184
 *
2185
 * i.e.,
2186
 *
2187
 *  -c_n + max >= 0
2188
 *
2189
 * The variables coefficients are, however, not represented directly.
2190
 * Instead, the variables coefficients c_x are written as a linear
2191
 * combination c_x = cmap c_z of some other coefficients c_z,
2192
 * which are in turn encoded as c_z = c_z^+ - c_z^-.
2193
 * Let a_j be the elements of row i of node->cmap, then
2194
 *
2195
 *  -max_i <= c_x_i <= max_i
2196
 *
2197
 * is encoded as
2198
 *
2199
 *  -max_i <= \sum_j a_j (c_z_j^+ - c_z_j^-) <= max_i
2200
 *
2201
 * or
2202
 *
2203
 *  -\sum_j a_j (c_z_j^+ - c_z_j^-) + max_i >= 0
2204
 *  \sum_j a_j (c_z_j^+ - c_z_j^-) + max_i >= 0
2205
 */
2206
static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
2207
  struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
2208
518
{
2209
518
  int i, j, k;
2210
518
  int total;
2211
518
  isl_vec *ineq;
2212
518
2213
518
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2214
518
2215
813
  for (j = 0; 
j < node->nparam813
;
++j295
)
{295
2216
295
    int dim;
2217
295
2218
295
    if (max < 0)
2219
240
      continue;
2220
295
2221
55
    k = isl_basic_set_alloc_inequality(graph->lp);
2222
55
    if (k < 0)
2223
0
      return isl_stat_error;
2224
55
    dim = 1 + node->start + 1 + j;
2225
55
    isl_seq_clr(graph->lp->ineq[k], 1 + total);
2226
55
    isl_int_set_si(graph->lp->ineq[k][dim], -1);
2227
55
    isl_int_set_si(graph->lp->ineq[k][0], max);
2228
55
  }
2229
518
2230
518
  ineq = isl_vec_alloc(ctx, 1 + total);
2231
518
  ineq = isl_vec_clr(ineq);
2232
518
  if (!ineq)
2233
0
    return isl_stat_error;
2234
1.39k
  
for (i = 0; 518
i < node->nvar1.39k
;
++i874
)
{874
2235
874
    int pos = 1 + node_var_coef_offset(node);
2236
874
2237
874
    if (isl_int_is_neg(node->max->el[i]))
2238
493
      continue;
2239
874
2240
1.34k
    
for (j = 0; 381
j < node->nvar1.34k
;
++j966
)
{966
2241
966
      isl_int_set(ineq->el[pos + 2 * j],
2242
966
          node->cmap->row[i][j]);
2243
966
      isl_int_neg(ineq->el[pos + 2 * j + 1],
2244
966
          node->cmap->row[i][j]);
2245
966
    }
2246
381
    isl_int_set(ineq->el[0], node->max->el[i]);
2247
381
2248
381
    k = isl_basic_set_alloc_inequality(graph->lp);
2249
381
    if (k < 0)
2250
0
      goto error;
2251
381
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2252
381
2253
381
    isl_seq_neg(ineq->el + pos, ineq->el + pos, 2 * node->nvar);
2254
381
    k = isl_basic_set_alloc_inequality(graph->lp);
2255
381
    if (k < 0)
2256
0
      goto error;
2257
381
    isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2258
381
  }
2259
518
  isl_vec_free(ineq);
2260
518
2261
518
  return isl_stat_ok;
2262
0
error:
2263
0
  isl_vec_free(ineq);
2264
0
  return isl_stat_error;
2265
518
}
2266
2267
/* Add constraints that bound the values of the variable and parameter
2268
 * coefficients of the schedule.
2269
 *
2270
 * The maximal value of the coefficients is defined by the option
2271
 * 'schedule_max_coefficient' and the entries in node->max.
2272
 * These latter entries are only set if either the schedule_max_coefficient
2273
 * option or the schedule_treat_coalescing option is set.
2274
 */
2275
static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
2276
  struct isl_sched_graph *graph)
2277
367
{
2278
367
  int i;
2279
367
  int max;
2280
367
2281
367
  max = isl_options_get_schedule_max_coefficient(ctx);
2282
367
2283
367
  if (
max == -1 && 367
!isl_options_get_schedule_treat_coalescing(ctx)245
)
2284
8
    return isl_stat_ok;
2285
367
2286
877
  
for (i = 0; 359
i < graph->n877
;
++i518
)
{518
2287
518
    struct isl_sched_node *node = &graph->node[i];
2288
518
2289
518
    if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
2290
0
      return isl_stat_error;
2291
518
  }
2292
359
2293
359
  return isl_stat_ok;
2294
359
}
2295
2296
/* Add a constraint to graph->lp that equates the value at position
2297
 * "sum_pos" to the sum of the "n" values starting at "first".
2298
 */
2299
static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
2300
  int sum_pos, int first, int n)
2301
367
{
2302
367
  int i, k;
2303
367
  int total;
2304
367
2305
367
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2306
367
2307
367
  k = isl_basic_set_alloc_equality(graph->lp);
2308
367
  if (k < 0)
2309
0
    return isl_stat_error;
2310
367
  isl_seq_clr(graph->lp->eq[k], 1 +  total);
2311
367
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2312
851
  for (i = 0; 
i < n851
;
++i484
)
2313
484
    isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
2314
367
2315
367
  return isl_stat_ok;
2316
367
}
2317
2318
/* Add a constraint to graph->lp that equates the value at position
2319
 * "sum_pos" to the sum of the parameter coefficients of all nodes.
2320
 *
2321
 * Within each node, the coefficients have the following order:
2322
 *  - c_i_0
2323
 *  - c_i_n (if parametric)
2324
 *  - positive and negative parts of c_i_x
2325
 */
2326
static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
2327
  int sum_pos)
2328
364
{
2329
364
  int i, j, k;
2330
364
  int total;
2331
364
2332
364
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2333
364
2334
364
  k = isl_basic_set_alloc_equality(graph->lp);
2335
364
  if (k < 0)
2336
0
    return isl_stat_error;
2337
364
  isl_seq_clr(graph->lp->eq[k], 1 +  total);
2338
364
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2339
873
  for (i = 0; 
i < graph->n873
;
++i509
)
{509
2340
509
    int pos = 1 + graph->node[i].start + 1;
2341
509
2342
818
    for (j = 0; 
j < graph->node[i].nparam818
;
++j309
)
2343
309
      isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2344
509
  }
2345
364
2346
364
  return isl_stat_ok;
2347
364
}
2348
2349
/* Add a constraint to graph->lp that equates the value at position
2350
 * "sum_pos" to the sum of the variable coefficients of all nodes.
2351
 *
2352
 * Within each node, the coefficients have the following order:
2353
 *  - c_i_0
2354
 *  - c_i_n (if parametric)
2355
 *  - positive and negative parts of c_i_x
2356
 */
2357
static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
2358
  int sum_pos)
2359
396
{
2360
396
  int i, j, k;
2361
396
  int total;
2362
396
2363
396
  total = isl_basic_set_dim(graph->lp, isl_dim_set);
2364
396
2365
396
  k = isl_basic_set_alloc_equality(graph->lp);
2366
396
  if (k < 0)
2367
0
    return isl_stat_error;
2368
396
  isl_seq_clr(graph->lp->eq[k], 1 +  total);
2369
396
  isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2370
967
  for (i = 0; 
i < graph->n967
;
++i571
)
{571
2371
571
    struct isl_sched_node *node = &graph->node[i];
2372
571
    int pos = 1 + node_var_coef_offset(node);
2373
571
2374
2.54k
    for (j = 0; 
j < 2 * node->nvar2.54k
;
++j1.97k
)
2375
1.97k
      isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2376
571
  }
2377
396
2378
396
  return isl_stat_ok;
2379
396
}
2380
2381
/* Construct an ILP problem for finding schedule coefficients
2382
 * that result in non-negative, but small dependence distances
2383
 * over all dependences.
2384
 * In particular, the dependence distances over proximity edges
2385
 * are bounded by m_0 + m_n n and we compute schedule coefficients
2386
 * with small values (preferably zero) of m_n and m_0.
2387
 *
2388
 * All variables of the ILP are non-negative.  The actual coefficients
2389
 * may be negative, so each coefficient is represented as the difference
2390
 * of two non-negative variables.  The negative part always appears
2391
 * immediately before the positive part.
2392
 * Other than that, the variables have the following order
2393
 *
2394
 *  - sum of positive and negative parts of m_n coefficients
2395
 *  - m_0
2396
 *  - sum of all c_n coefficients
2397
 *    (unconstrained when computing non-parametric schedules)
2398
 *  - sum of positive and negative parts of all c_x coefficients
2399
 *  - positive and negative parts of m_n coefficients
2400
 *  - for each node
2401
 *    - c_i_0
2402
 *    - c_i_n (if parametric)
2403
 *    - positive and negative parts of c_i_x
2404
 *
2405
 * The c_i_x are not represented directly, but through the columns of
2406
 * node->cmap.  That is, the computed values are for variable t_i_x
2407
 * such that c_i_x = Q t_i_x with Q equal to node->cmap.
2408
 *
2409
 * The constraints are those from the edges plus two or three equalities
2410
 * to express the sums.
2411
 *
2412
 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2413
 * Otherwise, we ignore them.
2414
 */
2415
static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
2416
  int use_coincidence)
2417
367
{
2418
367
  int i;
2419
367
  unsigned nparam;
2420
367
  unsigned total;
2421
367
  isl_space *space;
2422
367
  int parametric;
2423
367
  int param_pos;
2424
367
  int n_eq, n_ineq;
2425
367
2426
367
  parametric = ctx->opt->schedule_parametric;
2427
367
  nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
2428
367
  param_pos = 4;
2429
367
  total = param_pos + 2 * nparam;
2430
901
  for (i = 0; 
i < graph->n901
;
++i534
)
{534
2431
534
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2432
534
    if (node_update_cmap(node) < 0)
2433
0
      return isl_stat_error;
2434
534
    node->start = total;
2435
534
    total += 1 + node->nparam + 2 * node->nvar;
2436
534
  }
2437
367
2438
367
  
if (367
count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0367
)
2439
0
    return isl_stat_error;
2440
367
  
if (367
count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0367
)
2441
0
    return isl_stat_error;
2442
367
  
if (367
count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0367
)
2443
0
    return isl_stat_error;
2444
367
2445
367
  space = isl_space_set_alloc(ctx, 0, total);
2446
367
  isl_basic_set_free(graph->lp);
2447
367
  n_eq += 2 + parametric;
2448
367
2449
367
  graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
2450
367
2451
367
  if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
2452
0
    return isl_stat_error;
2453
367
  
if (367
parametric && 367
add_param_sum_constraint(graph, 2) < 0335
)
2454
0
    return isl_stat_error;
2455
367
  
if (367
add_var_sum_constraint(graph, 3) < 0367
)
2456
0
    return isl_stat_error;
2457
367
  
if (367
add_bound_constant_constraints(ctx, graph) < 0367
)
2458
0
    return isl_stat_error;
2459
367
  
if (367
add_bound_coefficient_constraints(ctx, graph) < 0367
)
2460
0
    return isl_stat_error;
2461
367
  
if (367
add_all_validity_constraints(graph, use_coincidence) < 0367
)
2462
0
    return isl_stat_error;
2463
367
  
if (367
add_all_proximity_constraints(graph, use_coincidence) < 0367
)
2464
0
    return isl_stat_error;
2465
367
2466
367
  return isl_stat_ok;
2467
367
}
2468
2469
/* Analyze the conflicting constraint found by
2470
 * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
2471
 * constraint of one of the edges between distinct nodes, living, moreover
2472
 * in distinct SCCs, then record the source and sink SCC as this may
2473
 * be a good place to cut between SCCs.
2474
 */
2475
static int check_conflict(int con, void *user)
2476
1.17k
{
2477
1.17k
  int i;
2478
1.17k
  struct isl_sched_graph *graph = user;
2479
1.17k
2480
1.17k
  if (graph->src_scc >= 0)
2481
43
    return 0;
2482
1.17k
2483
1.13k
  con -= graph->lp->n_eq;
2484
1.13k
2485
1.13k
  if (con >= graph->lp->n_ineq)
2486
536
    return 0;
2487
1.13k
2488
2.04k
  
for (i = 0; 594
i < graph->n_edge2.04k
;
++i1.45k
)
{1.45k
2489
1.45k
    if (!is_validity(&graph->edge[i]))
2490
175
      continue;
2491
1.27k
    
if (1.27k
graph->edge[i].src == graph->edge[i].dst1.27k
)
2492
682
      continue;
2493
597
    
if (597
graph->edge[i].src->scc == graph->edge[i].dst->scc597
)
2494
529
      continue;
2495
68
    
if (68
graph->edge[i].start > con68
)
2496
39
      continue;
2497
29
    
if (29
graph->edge[i].end <= con29
)
2498
21
      continue;
2499
8
    graph->src_scc = graph->edge[i].src->scc;
2500
8
    graph->dst_scc = graph->edge[i].dst->scc;
2501
8
  }
2502
594
2503
594
  return 0;
2504
1.13k
}
2505
2506
/* Check whether the next schedule row of the given node needs to be
2507
 * non-trivial.  Lower-dimensional domains may have some trivial rows,
2508
 * but as soon as the number of remaining required non-trivial rows
2509
 * is as large as the number or remaining rows to be computed,
2510
 * all remaining rows need to be non-trivial.
2511
 */
2512
static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
2513
567
{
2514
567
  return node->nvar - node->rank >= graph->maxvar - graph->n_row;
2515
567
}
2516
2517
/* Solve the ILP problem constructed in setup_lp.
2518
 * For each node such that all the remaining rows of its schedule
2519
 * need to be non-trivial, we construct a non-triviality region.
2520
 * This region imposes that the next row is independent of previous rows.
2521
 * In particular the coefficients c_i_x are represented by t_i_x
2522
 * variables with c_i_x = Q t_i_x and Q a unimodular matrix such that
2523
 * its first columns span the rows of the previously computed part
2524
 * of the schedule.  The non-triviality region enforces that at least
2525
 * one of the remaining components of t_i_x is non-zero, i.e.,
2526
 * that the new schedule row depends on at least one of the remaining
2527
 * columns of Q.
2528
 */
2529
static __isl_give isl_vec *solve_lp(struct isl_sched_graph *graph)
2530
367
{
2531
367
  int i;
2532
367
  isl_vec *sol;
2533
367
  isl_basic_set *lp;
2534
367
2535
901
  for (i = 0; 
i < graph->n901
;
++i534
)
{534
2536
534
    struct isl_sched_node *node = &graph->node[i];
2537
534
    int skip = node->rank;
2538
534
    graph->region[i].pos = node_var_coef_offset(node) + 2 * skip;
2539
534
    if (needs_row(graph, node))
2540
430
      graph->region[i].len = 2 * (node->nvar - skip);
2541
534
    else
2542
104
      graph->region[i].len = 0;
2543
534
  }
2544
367
  lp = isl_basic_set_copy(graph->lp);
2545
367
  sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
2546
367
               graph->region, &check_conflict, graph);
2547
367
  return sol;
2548
367
}
2549
2550
/* Extract the coefficients for the variables of "node" from "sol".
2551
 *
2552
 * Within each node, the coefficients have the following order:
2553
 *  - c_i_0
2554
 *  - c_i_n (if parametric)
2555
 *  - positive and negative parts of c_i_x
2556
 *
2557
 * The c_i_x^- appear before their c_i_x^+ counterpart.
2558
 *
2559
 * Return c_i_x = c_i_x^+ - c_i_x^-
2560
 */
2561
static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
2562
  __isl_keep isl_vec *sol)
2563
464
{
2564
464
  int i;
2565
464
  int pos;
2566
464
  isl_vec *csol;
2567
464
2568
464
  if (!sol)
2569
0
    return NULL;
2570
464
  csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
2571
464
  if (!csol)
2572
0
    return NULL;
2573
464
2574
464
  pos = 1 + node_var_coef_offset(node);
2575
1.31k
  for (i = 0; 
i < node->nvar1.31k
;
++i849
)
2576
849
    isl_int_sub(csol->el[i],
2577
464
          sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
2578
464
2579
464
  return csol;
2580
464
}
2581
2582
/* Update the schedules of all nodes based on the given solution
2583
 * of the LP problem.
2584
 * The new row is added to the current band.
2585
 * All possibly negative coefficients are encoded as a difference
2586
 * of two non-negative variables, so we need to perform the subtraction
2587
 * here.  Moreover, if use_cmap is set, then the solution does
2588
 * not refer to the actual coefficients c_i_x, but instead to variables
2589
 * t_i_x such that c_i_x = Q t_i_x and Q is equal to node->cmap.
2590
 * In this case, we then also need to perform this multiplication
2591
 * to obtain the values of c_i_x.
2592
 *
2593
 * If coincident is set, then the caller guarantees that the new
2594
 * row satisfies the coincidence constraints.
2595
 */
2596
static int update_schedule(struct isl_sched_graph *graph,
2597
  __isl_take isl_vec *sol, int use_cmap, int coincident)
2598
293
{
2599
293
  int i, j;
2600
293
  isl_vec *csol = NULL;
2601
293
2602
293
  if (!sol)
2603
0
    goto error;
2604
293
  
if (293
sol->size == 0293
)
2605
0
    isl_die(sol->ctx, isl_error_internal,
2606
293
      "no solution found", goto error);
2607
293
  
if (293
graph->n_total_row >= graph->max_row293
)
2608
0
    isl_die(sol->ctx, isl_error_internal,
2609
293
      "too many schedule rows", goto error);
2610
293
2611
699
  
for (i = 0; 293
i < graph->n699
;
++i406
)
{406
2612
406
    struct isl_sched_node *node = &graph->node[i];
2613
406
    int pos = node->start;
2614
406
    int row = isl_mat_rows(node->sched);
2615
406
2616
406
    isl_vec_free(csol);
2617
406
    csol = extract_var_coef(node, sol);
2618
406
    if (!csol)
2619
0
      goto error;
2620
406
2621
406
    isl_map_free(node->sched_map);
2622
406
    node->sched_map = NULL;
2623
406
    node->sched = isl_mat_add_rows(node->sched, 1);
2624
406
    if (!node->sched)
2625
0
      goto error;
2626
1.03k
    
for (j = 0; 406
j < 1 + node->nparam1.03k
;
++j624
)
2627
624
      node->sched = isl_mat_set_element(node->sched,
2628
624
            row, j, sol->el[1 + pos + j]);
2629
406
    if (use_cmap)
2630
369
      csol = isl_mat_vec_product(isl_mat_copy(node->cmap),
2631
369
               csol);
2632
406
    if (!csol)
2633
0
      goto error;
2634
1.14k
    
for (j = 0; 406
j < node->nvar1.14k
;
++j743
)
2635
743
      node->sched = isl_mat_set_element(node->sched,
2636
743
          row, 1 + node->nparam + j, csol->el[j]);
2637
406
    node->coincident[graph->n_total_row] = coincident;
2638
406
  }
2639
293
  isl_vec_free(sol);
2640
293
  isl_vec_free(csol);
2641
293
2642
293
  graph->n_row++;
2643
293
  graph->n_total_row++;
2644
293
2645
293
  return 0;
2646
0
error:
2647
0
  isl_vec_free(sol);
2648
0
  isl_vec_free(csol);
2649
0
  return -1;
2650
293
}
2651
2652
/* Convert row "row" of node->sched into an isl_aff living in "ls"
2653
 * and return this isl_aff.
2654
 */
2655
static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
2656
  struct isl_sched_node *node, int row)
2657
887
{
2658
887
  int j;
2659
887
  isl_int v;
2660
887
  isl_aff *aff;
2661
887
2662
887
  isl_int_init(v);
2663
887
2664
887
  aff = isl_aff_zero_on_domain(ls);
2665
887
  isl_mat_get_element(node->sched, row, 0, &v);
2666
887
  aff = isl_aff_set_constant(aff, v);
2667
1.35k
  for (j = 0; 
j < node->nparam1.35k
;
++j470
)
{470
2668
470
    isl_mat_get_element(node->sched, row, 1 + j, &v);
2669
470
    aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
2670
470
  }
2671
2.39k
  for (j = 0; 
j < node->nvar2.39k
;
++j1.50k
)
{1.50k
2672
1.50k
    isl_mat_get_element(node->sched, row, 1 + node->nparam + j, &v);
2673
1.50k
    aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
2674
1.50k
  }
2675
887
2676
887
  isl_int_clear(v);
2677
887
2678
887
  return aff;
2679
887
}
2680
2681
/* Convert the "n" rows starting at "first" of node->sched into a multi_aff
2682
 * and return this multi_aff.
2683
 *
2684
 * The result is defined over the uncompressed node domain.
2685
 */
2686
static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
2687
  struct isl_sched_node *node, int first, int n)
2688
524
{
2689
524
  int i;
2690
524
  isl_space *space;
2691
524
  isl_local_space *ls;
2692
524
  isl_aff *aff;
2693
524
  isl_multi_aff *ma;
2694
524
  int nrow;
2695
524
2696
524
  if (!node)
2697
0
    return NULL;
2698
524
  nrow = isl_mat_rows(node->sched);
2699
524
  if (node->compressed)
2700
25
    space = isl_multi_aff_get_domain_space(node->decompress);
2701
524
  else
2702
499
    space = isl_space_copy(node->space);
2703
524
  ls = isl_local_space_from_space(isl_space_copy(space));
2704
524
  space = isl_space_from_domain(space);
2705
524
  space = isl_space_add_dims(space, isl_dim_out, n);
2706
524
  ma = isl_multi_aff_zero(space);
2707
524
2708
1.27k
  for (i = first; 
i < first + n1.27k
;
++i749
)
{749
2709
749
    aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
2710
749
    ma = isl_multi_aff_set_aff(ma, i - first, aff);
2711
749
  }
2712
524
2713
524
  isl_local_space_free(ls);
2714
524
2715
524
  if (node->compressed)
2716
25
    ma = isl_multi_aff_pullback_multi_aff(ma,
2717
25
          isl_multi_aff_copy(node->compress));
2718
524
2719
524
  return ma;
2720
524
}
2721
2722
/* Convert node->sched into a multi_aff and return this multi_aff.
2723
 *
2724
 * The result is defined over the uncompressed node domain.
2725
 */
2726
static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
2727
  struct isl_sched_node *node)
2728
167
{
2729
167
  int nrow;
2730
167
2731
167
  nrow = isl_mat_rows(node->sched);
2732
167
  return node_extract_partial_schedule_multi_aff(node, 0, nrow);
2733
167
}
2734
2735
/* Convert node->sched into a map and return this map.
2736
 *
2737
 * The result is cached in node->sched_map, which needs to be released
2738
 * whenever node->sched is updated.
2739
 * It is defined over the uncompressed node domain.
2740
 */
2741
static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
2742
448
{
2743
448
  if (
!node->sched_map448
)
{167
2744
167
    isl_multi_aff *ma;
2745
167
2746
167
    ma = node_extract_schedule_multi_aff(node);
2747
167
    node->sched_map = isl_map_from_multi_aff(ma);
2748
167
  }
2749
448
2750
448
  return isl_map_copy(node->sched_map);
2751
448
}
2752
2753
/* Construct a map that can be used to update a dependence relation
2754
 * based on the current schedule.
2755
 * That is, construct a map expressing that source and sink
2756
 * are executed within the same iteration of the current schedule.
2757
 * This map can then be intersected with the dependence relation.
2758
 * This is not the most efficient way, but this shouldn't be a critical
2759
 * operation.
2760
 */
2761
static __isl_give isl_map *specializer(struct isl_sched_node *src,
2762
  struct isl_sched_node *dst)
2763
195
{
2764
195
  isl_map *src_sched, *dst_sched;
2765
195
2766
195
  src_sched = node_extract_schedule(src);
2767
195
  dst_sched = node_extract_schedule(dst);
2768
195
  return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
2769
195
}
2770
2771
/* Intersect the domains of the nested relations in domain and range
2772
 * of "umap" with "map".
2773
 */
2774
static __isl_give isl_union_map *intersect_domains(
2775
  __isl_take isl_union_map *umap, __isl_keep isl_map *map)
2776
68
{
2777
68
  isl_union_set *uset;
2778
68
2779
68
  umap = isl_union_map_zip(umap);
2780
68
  uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
2781
68
  umap = isl_union_map_intersect_domain(umap, uset);
2782
68
  umap = isl_union_map_zip(umap);
2783
68
  return umap;
2784
68
}
2785
2786
/* Update the dependence relation of the given edge based
2787
 * on the current schedule.
2788
 * If the dependence is carried completely by the current schedule, then
2789
 * it is removed from the edge_tables.  It is kept in the list of edges
2790
 * as otherwise all edge_tables would have to be recomputed.
2791
 */
2792
static int update_edge(struct isl_sched_graph *graph,
2793
  struct isl_sched_edge *edge)
2794
195
{
2795
195
  int empty;
2796
195
  isl_map *id;
2797
195
2798
195
  id = specializer(edge->src, edge->dst);
2799
195
  edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
2800
195
  if (!edge->map)
2801
0
    goto error;
2802
195
2803
195
  
if (195
edge->tagged_condition195
)
{31
2804
31
    edge->tagged_condition =
2805
31
      intersect_domains(edge->tagged_condition, id);
2806
31
    if (!edge->tagged_condition)
2807
0
      goto error;
2808
31
  }
2809
195
  
if (195
edge->tagged_validity195
)
{37
2810
37
    edge->tagged_validity =
2811
37
      intersect_domains(edge->tagged_validity, id);
2812
37
    if (!edge->tagged_validity)
2813
0
      goto error;
2814
37
  }
2815
195
2816
195
  empty = isl_map_plain_is_empty(edge->map);
2817
195
  if (empty < 0)
2818
0
    goto error;
2819
195
  
if (195
empty195
)
2820
108
    graph_remove_edge(graph, edge);
2821
195
2822
195
  isl_map_free(id);
2823
195
  return 0;
2824
0
error:
2825
0
  isl_map_free(id);
2826
0
  return -1;
2827
195
}
2828
2829
/* Does the domain of "umap" intersect "uset"?
2830
 */
2831
static int domain_intersects(__isl_keep isl_union_map *umap,
2832
  __isl_keep isl_union_set *uset)
2833
38
{
2834
38
  int empty;
2835
38
2836
38
  umap = isl_union_map_copy(umap);
2837
38
  umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
2838
38
  empty = isl_union_map_is_empty(umap);
2839
38
  isl_union_map_free(umap);
2840
38
2841
38
  return empty < 0 ? 
-10
:
!empty38
;
2842
38
}
2843
2844
/* Does the range of "umap" intersect "uset"?
2845
 */
2846
static int range_intersects(__isl_keep isl_union_map *umap,
2847
  __isl_keep isl_union_set *uset)
2848
26
{
2849
26
  int empty;
2850
26
2851
26
  umap = isl_union_map_copy(umap);
2852
26
  umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
2853
26
  empty = isl_union_map_is_empty(umap);
2854
26
  isl_union_map_free(umap);
2855
26
2856
26
  return empty < 0 ? 
-10
:
!empty26
;
2857
26
}
2858
2859
/* Are the condition dependences of "edge" local with respect to
2860
 * the current schedule?
2861
 *
2862
 * That is, are domain and range of the condition dependences mapped
2863
 * to the same point?
2864
 *
2865
 * In other words, is the condition false?
2866
 */
2867
static int is_condition_false(struct isl_sched_edge *edge)
2868
29
{
2869
29
  isl_union_map *umap;
2870
29
  isl_map *map, *sched, *test;
2871
29
  int empty, local;
2872
29
2873
29
  empty = isl_union_map_is_empty(edge->tagged_condition);
2874
29
  if (
empty < 0 || 29
empty29
)
2875
0
    return empty;
2876
29
2877
29
  umap = isl_union_map_copy(edge->tagged_condition);
2878
29
  umap = isl_union_map_zip(umap);
2879
29
  umap = isl_union_set_unwrap(isl_union_map_domain(umap));
2880
29
  map = isl_map_from_union_map(umap);
2881
29
2882
29
  sched = node_extract_schedule(edge->src);
2883
29
  map = isl_map_apply_domain(map, sched);
2884
29
  sched = node_extract_schedule(edge->dst);
2885
29
  map = isl_map_apply_range(map, sched);
2886
29
2887
29
  test = isl_map_identity(isl_map_get_space(map));
2888
29
  local = isl_map_is_subset(map, test);
2889
29
  isl_map_free(map);
2890
29
  isl_map_free(test);
2891
29
2892
29
  return local;
2893
29
}
2894
2895
/* For each conditional validity constraint that is adjacent
2896
 * to a condition with domain in condition_source or range in condition_sink,
2897
 * turn it into an unconditional validity constraint.
2898
 */
2899
static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
2900
  __isl_take isl_union_set *condition_source,
2901
  __isl_take isl_union_set *condition_sink)
2902
10
{
2903
10
  int i;
2904
10
2905
10
  condition_source = isl_union_set_coalesce(condition_source);
2906
10
  condition_sink = isl_union_set_coalesce(condition_sink);
2907
10
2908
52
  for (i = 0; 
i < graph->n_edge52
;
++i42
)
{42
2909
42
    int adjacent;
2910
42
    isl_union_map *validity;
2911
42
2912
42
    if (!is_conditional_validity(&graph->edge[i]))
2913
19
      continue;
2914
23
    
if (23
is_validity(&graph->edge[i])23
)
2915
4
      continue;
2916
23
2917
19
    validity = graph->edge[i].tagged_validity;
2918
19
    adjacent = domain_intersects(validity, condition_sink);
2919
19
    if (
adjacent >= 0 && 19
!adjacent19
)
2920
18
      adjacent = range_intersects(validity, condition_source);
2921
19
    if (adjacent < 0)
2922
0
      goto error;
2923
19
    
if (19
!adjacent19
)
2924
17
      continue;
2925
19
2926
2
    set_validity(&graph->edge[i]);
2927
2
  }
2928
10
2929
10
  isl_union_set_free(condition_source);
2930
10
  isl_union_set_free(condition_sink);
2931
10
  return 0;
2932
0
error:
2933
0
  isl_union_set_free(condition_source);
2934
0
  isl_union_set_free(condition_sink);
2935
0
  return -1;
2936
10
}
2937
2938
/* Update the dependence relations of all edges based on the current schedule
2939
 * and enforce conditional validity constraints that are adjacent
2940
 * to satisfied condition constraints.
2941
 *
2942
 * First check if any of the condition constraints are satisfied
2943
 * (i.e., not local to the outer schedule) and keep track of
2944
 * their domain and range.
2945
 * Then update all dependence relations (which removes the non-local
2946
 * constraints).
2947
 * Finally, if any condition constraints turned out to be satisfied,
2948
 * then turn all adjacent conditional validity constraints into
2949
 * unconditional validity constraints.
2950
 */
2951
static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
2952
80
{
2953
80
  int i;
2954
80
  int any = 0;
2955
80
  isl_union_set *source, *sink;
2956
80
2957
80
  source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
2958
80
  sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
2959
275
  for (i = 0; 
i < graph->n_edge275
;
++i195
)
{195
2960
195
    int local;
2961
195
    isl_union_set *uset;
2962
195
    isl_union_map *umap;
2963
195
2964
195
    if (!is_condition(&graph->edge[i]))
2965
164
      continue;
2966
31
    
if (31
is_local(&graph->edge[i])31
)
2967
16
      continue;
2968
15
    local = is_condition_false(&graph->edge[i]);
2969
15
    if (local < 0)
2970
0
      goto error;
2971
15
    
if (15
local15
)
2972
4
      continue;
2973
15
2974
11
    any = 1;
2975
11
2976
11
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
2977
11
    uset = isl_union_map_domain(umap);
2978
11
    source = isl_union_set_union(source, uset);
2979
11
2980
11
    umap = isl_union_map_copy(graph->edge[i].tagged_condition);
2981
11
    uset = isl_union_map_range(umap);
2982
11
    sink = isl_union_set_union(sink, uset);
2983
11
  }
2984
80
2985
275
  
for (i = graph->n_edge - 1; 80
i >= 0275
;
--i195
)
{195
2986
195
    if (update_edge(graph, &graph->edge[i]) < 0)
2987
0
      goto error;
2988
195
  }
2989
80
2990
80
  
if (80
any80
)
2991
10
    return unconditionalize_adjacent_validity(graph, source, sink);
2992
80
2993
70
  isl_union_set_free(source);
2994
70
  isl_union_set_free(sink);
2995
70
  return 0;
2996
0
error:
2997
0
  isl_union_set_free(source);
2998
0
  isl_union_set_free(sink);
2999
0
  return -1;
3000
80
}
3001
3002
static void next_band(struct isl_sched_graph *graph)
3003
82
{
3004
82
  graph->band_start = graph->n_total_row;
3005
82
}
3006
3007
/* Return the union of the universe domains of the nodes in "graph"
3008
 * that satisfy "pred".
3009
 */
3010
static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
3011
  struct isl_sched_graph *graph,
3012
  int (*pred)(struct isl_sched_node *node, int data), int data)
3013
149
{
3014
149
  int i;
3015
149
  isl_set *set;
3016
149
  isl_union_set *dom;
3017
149
3018
253
  for (i = 0; 
i < graph->n253
;
++i104
)
3019
253
    
if (253
pred(&graph->node[i], data)253
)
3020
149
      break;
3021
149
3022
149
  if (i >= graph->n)
3023
0
    isl_die(ctx, isl_error_internal,
3024
149
      "empty component", return NULL);
3025
149
3026
149
  set = isl_set_universe(isl_space_copy(graph->node[i].space));
3027
149
  dom = isl_union_set_from_set(set);
3028
149
3029
258
  for (i = i + 1; 
i < graph->n258
;
++i109
)
{109
3030
109
    if (!pred(&graph->node[i], data))
3031
102
      continue;
3032
7
    set = isl_set_universe(isl_space_copy(graph->node[i].space));
3033
7
    dom = isl_union_set_union(dom, isl_union_set_from_set(set));
3034
7
  }
3035
149
3036
149
  return dom;
3037
149
}
3038
3039
/* Return a list of unions of universe domains, where each element
3040
 * in the list corresponds to an SCC (or WCC) indexed by node->scc.
3041
 */
3042
static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
3043
  struct isl_sched_graph *graph)
3044
65
{
3045
65
  int i;
3046
65
  isl_union_set_list *filters;
3047
65
3048
65
  filters = isl_union_set_list_alloc(ctx, graph->scc);
3049
210
  for (i = 0; 
i < graph->scc210
;
++i145
)
{145
3050
145
    isl_union_set *dom;
3051
145
3052
145
    dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
3053
145
    filters = isl_union_set_list_add(filters, dom);
3054
145
  }
3055
65
3056
65
  return filters;
3057
65
}
3058
3059
/* Return a list of two unions of universe domains, one for the SCCs up
3060
 * to and including graph->src_scc and another for the other SCCs.
3061
 */
3062
static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
3063
  struct isl_sched_graph *graph)
3064
2
{
3065
2
  isl_union_set *dom;
3066
2
  isl_union_set_list *filters;
3067
2
3068
2
  filters = isl_union_set_list_alloc(ctx, 2);
3069
2
  dom = isl_sched_graph_domain(ctx, graph,
3070
2
          &node_scc_at_most, graph->src_scc);
3071
2
  filters = isl_union_set_list_add(filters, dom);
3072
2
  dom = isl_sched_graph_domain(ctx, graph,
3073
2
          &node_scc_at_least, graph->src_scc + 1);
3074
2
  filters = isl_union_set_list_add(filters, dom);
3075
2
3076
2
  return filters;
3077
2
}
3078
3079
/* Copy nodes that satisfy node_pred from the src dependence graph
3080
 * to the dst dependence graph.
3081
 */
3082
static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
3083
  int (*node_pred)(struct isl_sched_node *node, int data), int data)
3084
115
{
3085
115
  int i;
3086
115
3087
115
  dst->n = 0;
3088
407
  for (i = 0; 
i < src->n407
;
++i292
)
{292
3089
292
    int j;
3090
292
3091
292
    if (!node_pred(&src->node[i], data))
3092
152
      continue;
3093
292
3094
140
    j = dst->n;
3095
140
    dst->node[j].space = isl_space_copy(src->node[i].space);
3096
140
    dst->node[j].compressed = src->node[i].compressed;
3097
140
    dst->node[j].hull = isl_set_copy(src->node[i].hull);
3098
140
    dst->node[j].compress =
3099
140
      isl_multi_aff_copy(src->node[i].compress);
3100
140
    dst->node[j].decompress =
3101
140
      isl_multi_aff_copy(src->node[i].decompress);
3102
140
    dst->node[j].nvar = src->node[i].nvar;
3103
140
    dst->node[j].nparam = src->node[i].nparam;
3104
140
    dst->node[j].sched = isl_mat_copy(src->node[i].sched);
3105
140
    dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
3106
140
    dst->node[j].coincident = src->node[i].coincident;
3107
140
    dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
3108
140
    dst->node[j].max = isl_vec_copy(src->node[i].max);
3109
140
    dst->n++;
3110
140
3111
140
    if (
!dst->node[j].space || 140
!dst->node[j].sched140
)
3112
0
      return -1;
3113
140
    
if (140
dst->node[j].compressed &&140
3114
4
        
(!dst->node[j].hull || 4
!dst->node[j].compress4
||
3115
4
         !dst->node[j].decompress))
3116
0
      return -1;
3117
140
  }
3118
115
3119
115
  return 0;
3120
115
}
3121
3122
/* Copy non-empty edges that satisfy edge_pred from the src dependence graph
3123
 * to the dst dependence graph.
3124
 * If the source or destination node of the edge is not in the destination
3125
 * graph, then it must be a backward proximity edge and it should simply
3126
 * be ignored.
3127
 */
3128
static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
3129
  struct isl_sched_graph *src,
3130
  int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
3131
115
{
3132
115
  int i;
3133
115
  enum isl_edge_type t;
3134
115
3135
115
  dst->n_edge = 0;
3136
441
  for (i = 0; 
i < src->n_edge441
;
++i326
)
{326
3137
326
    struct isl_sched_edge *edge = &src->edge[i];
3138
326
    isl_map *map;
3139
326
    isl_union_map *tagged_condition;
3140
326
    isl_union_map *tagged_validity;
3141
326
    struct isl_sched_node *dst_src, *dst_dst;
3142
326
3143
326
    if (!edge_pred(edge, data))
3144
242
      continue;
3145
326
3146
84
    
if (84
isl_map_plain_is_empty(edge->map)84
)
3147
11
      continue;
3148
84
3149
73
    dst_src = graph_find_node(ctx, dst, edge->src->space);
3150
73
    dst_dst = graph_find_node(ctx, dst, edge->dst->space);
3151
73
    if (
!dst_src || 73
!dst_dst73
)
{0
3152
0
      if (
is_validity(edge) || 0
is_conditional_validity(edge)0
)
3153
0
        isl_die(ctx, isl_error_internal,
3154
0
          "backward (conditional) validity edge",
3155
0
          return -1);
3156
0
      continue;
3157
0
    }
3158
73
3159
73
    map = isl_map_copy(edge->map);
3160
73
    tagged_condition = isl_union_map_copy(edge->tagged_condition);
3161
73
    tagged_validity = isl_union_map_copy(edge->tagged_validity);
3162
73
3163
73
    dst->edge[dst->n_edge].src = dst_src;
3164
73
    dst->edge[dst->n_edge].dst = dst_dst;
3165
73
    dst->edge[dst->n_edge].map = map;
3166
73
    dst->edge[dst->n_edge].tagged_condition = tagged_condition;
3167
73
    dst->edge[dst->n_edge].tagged_validity = tagged_validity;
3168
73
    dst->edge[dst->n_edge].types = edge->types;
3169
73
    dst->n_edge++;
3170
73
3171
73
    if (
edge->tagged_condition && 73
!tagged_condition10
)
3172
0
      return -1;
3173
73
    
if (73
edge->tagged_validity && 73
!tagged_validity6
)
3174
0
      return -1;
3175
73
3176
438
    
for (t = isl_edge_first; 73
t <= isl_edge_last438
;
++t365
)
{365
3177
365
      if (edge !=
3178
365
          graph_find_edge(src, t, edge->src, edge->dst))
3179
182
        continue;
3180
183
      
if (183
graph_edge_table_add(ctx, dst, t,183
3181
183
              &dst->edge[dst->n_edge - 1]) < 0)
3182
0
        return -1;
3183
183
    }
3184
73
  }
3185
115
3186
115
  return 0;
3187
115
}
3188
3189
/* Compute the maximal number of variables over all nodes.
3190
 * This is the maximal number of linearly independent schedule
3191
 * rows that we need to compute.
3192
 * Just in case we end up in a part of the dependence graph
3193
 * with only lower-dimensional domains, we make sure we will
3194
 * compute the required amount of extra linearly independent rows.
3195
 */
3196
static int compute_maxvar(struct isl_sched_graph *graph)
3197
241
{
3198
241
  int i;
3199
241
3200
241
  graph->maxvar = 0;
3201
588
  for (i = 0; 
i < graph->n588
;
++i347
)
{347
3202
347
    struct isl_sched_node *node = &graph->node[i];
3203
347
    int nvar;
3204
347
3205
347
    if (node_update_cmap(node) < 0)
3206
0
      return -1;
3207
347
    nvar = node->nvar + graph->n_row - node->rank;
3208
347
    if (nvar > graph->maxvar)
3209
240
      graph->maxvar = nvar;
3210
347
  }
3211
241
3212
241
  return 0;
3213
241
}
3214
3215
/* Extract the subgraph of "graph" that consists of the node satisfying
3216
 * "node_pred" and the edges satisfying "edge_pred" and store
3217
 * the result in "sub".
3218
 */
3219
static int extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
3220
  int (*node_pred)(struct isl_sched_node *node, int data),
3221
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3222
  int data, struct isl_sched_graph *sub)
3223
115
{
3224
115
  int i, n = 0, n_edge = 0;
3225
115
  int t;
3226
115
3227
407
  for (i = 0; 
i < graph->n407
;
++i292
)
3228
292
    
if (292
node_pred(&graph->node[i], data)292
)
3229
140
      ++n;
3230
441
  for (i = 0; 
i < graph->n_edge441
;
++i326
)
3231
326
    
if (326
edge_pred(&graph->edge[i], data)326
)
3232
84
      ++n_edge;
3233
115
  if (graph_alloc(ctx, sub, n, n_edge) < 0)
3234
0
    return -1;
3235
115
  
if (115
copy_nodes(sub, graph, node_pred, data) < 0115
)
3236
0
    return -1;
3237
115
  
if (115
graph_init_table(ctx, sub) < 0115
)
3238
0
    return -1;
3239
690
  
for (t = 0; 115
t <= isl_edge_last690
;
++t575
)
3240
575
    sub->max_edge[t] = graph->max_edge[t];
3241
115
  if (graph_init_edge_tables(ctx, sub) < 0)
3242
0
    return -1;
3243
115
  
if (115
copy_edges(ctx, sub, graph, edge_pred, data) < 0115
)
3244
0
    return -1;
3245
115
  sub->n_row = graph->n_row;
3246
115
  sub->max_row = graph->max_row;
3247
115
  sub->n_total_row = graph->n_total_row;
3248
115
  sub->band_start = graph->band_start;
3249
115
3250
115
  return 0;
3251
115
}
3252
3253
static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
3254
  struct isl_sched_graph *graph);
3255
static __isl_give isl_schedule_node *compute_schedule_wcc(
3256
  isl_schedule_node *node, struct isl_sched_graph *graph);
3257
3258
/* Compute a schedule for a subgraph of "graph".  In particular, for
3259
 * the graph composed of nodes that satisfy node_pred and edges that
3260
 * that satisfy edge_pred.
3261
 * If the subgraph is known to consist of a single component, then wcc should
3262
 * be set and then we call compute_schedule_wcc on the constructed subgraph.
3263
 * Otherwise, we call compute_schedule, which will check whether the subgraph
3264
 * is connected.
3265
 *
3266
 * The schedule is inserted at "node" and the updated schedule node
3267
 * is returned.
3268
 */
3269
static __isl_give isl_schedule_node *compute_sub_schedule(
3270
  __isl_take isl_schedule_node *node, isl_ctx *ctx,
3271
  struct isl_sched_graph *graph,
3272
  int (*node_pred)(struct isl_sched_node *node, int data),
3273
  int (*edge_pred)(struct isl_sched_edge *edge, int data),
3274
  int data, int wcc)
3275
56
{
3276
56
  struct isl_sched_graph split = { 0 };
3277
56
3278
56
  if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
3279
56
        &split) < 0)
3280
0
    goto error;
3281
56
3282
56
  
if (56
wcc56
)
3283
38
    node = compute_schedule_wcc(node, &split);
3284
56
  else
3285
18
    node = compute_schedule(node, &split);
3286
56
3287
56
  graph_free(ctx, &split);
3288
56
  return node;
3289
0
error:
3290
0
  graph_free(ctx, &split);
3291
0
  return isl_schedule_node_free(node);
3292
56
}
3293
3294
static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
3295
486
{
3296
198
  return edge->src->scc == scc && edge->dst->scc == scc;
3297
486
}
3298
3299
static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
3300
14
{
3301
14
  return edge->dst->scc <= scc;
3302
14
}
3303
3304
static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
3305
14
{
3306
14
  return edge->src->scc >= scc;
3307
14
}
3308
3309
/* Reset the current band by dropping all its schedule rows.
3310
 */
3311
static int reset_band(struct isl_sched_graph *graph)
3312
7
{
3313
7
  int i;
3314
7
  int drop;
3315
7
3316
7
  drop = graph->n_total_row - graph->band_start;
3317
7
  graph->n_total_row -= drop;
3318
7
  graph->n_row -= drop;
3319
7
3320
25
  for (i = 0; 
i < graph->n25
;
++i18
)
{18
3321
18
    struct isl_sched_node *node = &graph->node[i];
3322
18
3323
18
    isl_map_free(node->sched_map);
3324
18
    node->sched_map = NULL;
3325
18
3326
18
    node->sched = isl_mat_drop_rows(node->sched,
3327
18
            graph->band_start, drop);
3328
18
3329
18
    if (!node->sched)
3330
0
      return -1;
3331
18
  }
3332
7
3333
7
  return 0;
3334
7
}
3335
3336
/* Split the current graph into two parts and compute a schedule for each
3337
 * part individually.  In particular, one part consists of all SCCs up
3338
 * to and including graph->src_scc, while the other part contains the other
3339
 * SCCs.  The split is enforced by a sequence node inserted at position "node"
3340
 * in the schedule tree.  Return the updated schedule node.
3341
 * If either of these two parts consists of a sequence, then it is spliced
3342
 * into the sequence containing the two parts.
3343
 *
3344
 * The current band is reset. It would be possible to reuse
3345
 * the previously computed rows as the first rows in the next
3346
 * band, but recomputing them may result in better rows as we are looking
3347
 * at a smaller part of the dependence graph.
3348
 */
3349
static __isl_give isl_schedule_node *compute_split_schedule(
3350
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3351
2
{
3352
2
  int is_seq;
3353
2
  isl_ctx *ctx;
3354
2
  isl_union_set_list *filters;
3355
2
3356
2
  if (!node)
3357
0
    return NULL;
3358
2
3359
2
  
if (2
reset_band(graph) < 02
)
3360
0
    return isl_schedule_node_free(node);
3361
2
3362
2
  next_band(graph);
3363
2
3364
2
  ctx = isl_schedule_node_get_ctx(node);
3365
2
  filters = extract_split(ctx, graph);
3366
2
  node = isl_schedule_node_insert_sequence(node, filters);
3367
2
  node = isl_schedule_node_child(node, 1);
3368
2
  node = isl_schedule_node_child(node, 0);
3369
2
3370
2
  node = compute_sub_schedule(node, ctx, graph,
3371
2
        &node_scc_at_least, &edge_src_scc_at_least,
3372
2
        graph->src_scc + 1, 0);
3373
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3374
2
  node = isl_schedule_node_parent(node);
3375
2
  node = isl_schedule_node_parent(node);
3376
2
  if (is_seq)
3377
0
    node = isl_schedule_node_sequence_splice_child(node, 1);
3378
2
  node = isl_schedule_node_child(node, 0);
3379
2
  node = isl_schedule_node_child(node, 0);
3380
2
  node = compute_sub_schedule(node, ctx, graph,
3381
2
        &node_scc_at_most, &edge_dst_scc_at_most,
3382
2
        graph->src_scc, 0);
3383
2
  is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3384
2
  node = isl_schedule_node_parent(node);
3385
2
  node = isl_schedule_node_parent(node);
3386
2
  if (is_seq)
3387
0
    node = isl_schedule_node_sequence_splice_child(node, 0);
3388
2
3389
2
  return node;
3390
2
}
3391
3392
/* Insert a band node at position "node" in the schedule tree corresponding
3393
 * to the current band in "graph".  Mark the band node permutable
3394
 * if "permutable" is set.
3395
 * The partial schedules and the coincidence property are extracted
3396
 * from the graph nodes.
3397
 * Return the updated schedule node.
3398
 */
3399
static __isl_give isl_schedule_node *insert_current_band(
3400
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3401
  int permutable)
3402
154
{
3403
154
  int i;
3404
154
  int start, end, n;
3405
154
  isl_multi_aff *ma;
3406
154
  isl_multi_pw_aff *mpa;
3407
154
  isl_multi_union_pw_aff *mupa;
3408
154
3409
154
  if (!node)
3410
0
    return NULL;
3411
154
3412
154
  
if (154
graph->n < 1154
)
3413
0
    isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
3414
154
      "graph should have at least one node",
3415
154
      return isl_schedule_node_free(node));
3416
154
3417
154
  start = graph->band_start;
3418
154
  end = graph->n_total_row;
3419
154
  n = end - start;
3420
154
3421
154
  ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
3422
154
  mpa = isl_multi_pw_aff_from_multi_aff(ma);
3423
154
  mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3424
154
3425
229
  for (i = 1; 
i < graph->n229
;
++i75
)
{75
3426
75
    isl_multi_union_pw_aff *mupa_i;
3427
75
3428
75
    ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
3429
75
                start, n);
3430
75
    mpa = isl_multi_pw_aff_from_multi_aff(ma);
3431
75
    mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3432
75
    mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
3433
75
  }
3434
154
  node = isl_schedule_node_insert_partial_schedule(node, mupa);
3435
154
3436
397
  for (i = 0; 
i < n397
;
++i243
)
3437
243
    node = isl_schedule_node_band_member_set_coincident(node, i,
3438
243
          graph->node[0].coincident[start + i]);
3439
154
  node = isl_schedule_node_band_set_permutable(node, permutable);
3440
154
3441
154
  return node;
3442
154
}
3443
3444
/* Update the dependence relations based on the current schedule,
3445
 * add the current band to "node" and then continue with the computation
3446
 * of the next band.
3447
 * Return the updated schedule node.
3448
 */
3449
static __isl_give isl_schedule_node *compute_next_band(
3450
  __isl_take isl_schedule_node *node,
3451
  struct isl_sched_graph *graph, int permutable)
3452
36
{
3453
36
  isl_ctx *ctx;
3454
36
3455
36
  if (!node)
3456
0
    return NULL;
3457
36
3458
36
  ctx = isl_schedule_node_get_ctx(node);
3459
36
  if (update_edges(ctx, graph) < 0)
3460
0
    return isl_schedule_node_free(node);
3461
36
  node = insert_current_band(node, graph, permutable);
3462
36
  next_band(graph);
3463
36
3464
36
  node = isl_schedule_node_child(node, 0);
3465
36
  node = compute_schedule(node, graph);
3466
36
  node = isl_schedule_node_parent(node);
3467
36
3468
36
  return node;
3469
36
}
3470
3471
/* Add constraints to graph->lp that force the dependence "map" (which
3472
 * is part of the dependence relation of "edge")
3473
 * to be respected and attempt to carry it, where the edge is one from
3474
 * a node j to itself.  "pos" is the sequence number of the given map.
3475
 * That is, add constraints that enforce
3476
 *
3477
 *  (c_j_0 + c_j_n n + c_j_x y) - (c_j_0 + c_j_n n + c_j_x x)
3478
 *  = c_j_x (y - x) >= e_i
3479
 *
3480
 * for each (x,y) in R.
3481
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
3482
 * of valid constraints for (y - x) and then plug in (-e_i, 0, c_j_x),
3483
 * with each coefficient in c_j_x represented as a pair of non-negative
3484
 * coefficients.
3485
 */
3486
static int add_intra_constraints(struct isl_sched_graph *graph,
3487
  struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
3488
45
{
3489
45
  int offset;
3490
45
  isl_ctx *ctx = isl_map_get_ctx(map);
3491
45
  isl_dim_map *dim_map;
3492
45
  isl_basic_set *coef;
3493
45
  struct isl_sched_node *node = edge->src;
3494
45
3495
45
  coef = intra_coefficients(graph, node, map);
3496
45
  if (!coef)
3497
0
    return -1;
3498
45
3499
45
  offset = coef_var_offset(coef);
3500
45
  dim_map = intra_dim_map(ctx, graph, node, offset, 1);
3501
45
  isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3502
45
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
3503
45
      coef->n_eq, coef->n_ineq);
3504
45
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
3505
45
                 coef, dim_map);
3506
45
3507
45
  return 0;
3508
45
}
3509
3510
/* Add constraints to graph->lp that force the dependence "map" (which
3511
 * is part of the dependence relation of "edge")
3512
 * to be respected and attempt to carry it, where the edge is one from
3513
 * node j to node k.  "pos" is the sequence number of the given map.
3514
 * That is, add constraints that enforce
3515
 *
3516
 *  (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
3517
 *
3518
 * for each (x,y) in R.
3519
 * We obtain general constraints on coefficients (c_0, c_n, c_x)
3520
 * of valid constraints for R and then plug in
3521
 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, c_k_x - c_j_x)
3522
 * with each coefficient (except e_i, c_*_0 and c_*_n)
3523
 * represented as a pair of non-negative coefficients.
3524
 */
3525
static int add_inter_constraints(struct isl_sched_graph *graph,
3526
  struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
3527
14
{
3528
14
  int offset;
3529
14
  isl_ctx *ctx = isl_map_get_ctx(map);
3530
14
  isl_dim_map *dim_map;
3531
14
  isl_basic_set *coef;
3532
14
  struct isl_sched_node *src = edge->src;
3533
14
  struct isl_sched_node *dst = edge->dst;
3534
14
3535
14
  coef = inter_coefficients(graph, edge, map);
3536
14
  if (!coef)
3537
0
    return -1;
3538
14
3539
14
  offset = coef_var_offset(coef);
3540
14
  dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
3541
14
  isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3542
14
  graph->lp = isl_basic_set_extend_constraints(graph->lp,
3543
14
      coef->n_eq, coef->n_ineq);
3544
14
  graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
3545
14
                 coef, dim_map);
3546
14
3547
14
  return 0;
3548
14
}
3549
3550
/* Add constraints to graph->lp that force all (conditional) validity
3551
 * dependences to be respected and attempt to carry them.
3552
 */
3553
static isl_stat add_all_constraints(struct isl_sched_graph *graph)
3554
29
{
3555
29
  int i, j;
3556
29
  int pos;
3557
29
3558
29
  pos = 0;
3559
68
  for (i = 0; 
i < graph->n_edge68
;
++i39
)
{39
3560
39
    struct isl_sched_edge *edge= &graph->edge[i];
3561
39
3562
39
    if (!is_any_validity(edge))
3563
0
      continue;
3564
39
3565
98
    
for (j = 0; 39
j < edge->map->n98
;
++j59
)
{59
3566
59
      isl_basic_map *bmap;
3567
59
      isl_map *map;
3568
59
3569
59
      bmap = isl_basic_map_copy(edge->map->p[j]);
3570
59
      map = isl_map_from_basic_map(bmap);
3571
59
3572
59
      if (edge->src == edge->dst &&
3573
45
          add_intra_constraints(graph, edge, map, pos) < 0)
3574
0
        return isl_stat_error;
3575
59
      
if (59
edge->src != edge->dst &&59
3576
14
          add_inter_constraints(graph, edge, map, pos) < 0)
3577
0
        return isl_stat_error;
3578
59
      ++pos;
3579
59
    }
3580
39
  }
3581
29
3582
29
  return isl_stat_ok;
3583
29
}
3584
3585
/* Count the number of equality and inequality constraints
3586
 * that will be added to the carry_lp problem.
3587
 * We count each edge exactly once.
3588
 */
3589
static isl_stat count_all_constraints(struct isl_sched_graph *graph,
3590
  int *n_eq, int *n_ineq)
3591
29
{
3592
29
  int i, j;
3593
29
3594
29
  *n_eq = *n_ineq = 0;
3595
68
  for (i = 0; 
i < graph->n_edge68
;
++i39
)
{39
3596
39
    struct isl_sched_edge *edge= &graph->edge[i];
3597
39
3598
39
    if (!is_any_validity(edge))
3599
0
      continue;
3600
39
3601
98
    
for (j = 0; 39
j < edge->map->n98
;
++j59
)
{59
3602
59
      isl_basic_map *bmap;
3603
59
      isl_map *map;
3604
59
3605
59
      bmap = isl_basic_map_copy(edge->map->p[j]);
3606
59
      map = isl_map_from_basic_map(bmap);
3607
59
3608
59
      if (count_map_constraints(graph, edge, map,
3609
59
              n_eq, n_ineq, 1, 0) < 0)
3610
0
            return isl_stat_error;
3611
59
    }
3612
39
  }
3613
29
3614
29
  return isl_stat_ok;
3615
29
}
3616
3617
/* Return the total number of (validity) edges that carry_dependences will
3618
 * attempt to carry.
3619
 */
3620
static int count_carry_edges(struct isl_sched_graph *graph)
3621
30
{
3622
30
  int i;
3623
30
  int n_edge;
3624
30
3625
30
  n_edge = 0;
3626
70
  for (i = 0; 
i < graph->n_edge70
;
++i40
)
{40
3627
40
    struct isl_sched_edge *edge = &graph->edge[i];
3628
40
3629
40
    if (!is_any_validity(edge))
3630
1
      continue;
3631
40
3632
39
    n_edge += isl_map_n_basic_map(edge->map);
3633
39
  }
3634
30
3635
30
  return n_edge;
3636
30
}
3637
3638
/* Construct an LP problem for finding schedule coefficients
3639
 * such that the schedule carries as many validity dependences as possible.
3640
 * In particular, for each dependence i, we bound the dependence distance
3641
 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
3642
 * of all e_i's.  Dependences with e_i = 0 in the solution are simply
3643
 * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
3644
 * Note that if the dependence relation is a union of basic maps,
3645
 * then we have to consider each basic map individually as it may only
3646
 * be possible to carry the dependences expressed by some of those
3647
 * basic maps and not all of them.
3648
 * Below, we consider each of those basic maps as a separate "edge".
3649
 * "n_edge" is the number of these edges.
3650
 *
3651
 * All variables of the LP are non-negative.  The actual coefficients
3652
 * may be negative, so each coefficient is represented as the difference
3653
 * of two non-negative variables.  The negative part always appears
3654
 * immediately before the positive part.
3655
 * Other than that, the variables have the following order
3656
 *
3657
 *  - sum of (1 - e_i) over all edges
3658
 *  - sum of all c_n coefficients
3659
 *    (unconstrained when computing non-parametric schedules)
3660
 *  - sum of positive and negative parts of all c_x coefficients
3661
 *  - for each edge
3662
 *    - e_i
3663
 *  - for each node
3664
 *    - c_i_0
3665
 *    - c_i_n (if parametric)
3666
 *    - positive and negative parts of c_i_x
3667
 *
3668
 * The constraints are those from the (validity) edges plus three equalities
3669
 * to express the sums and n_edge inequalities to express e_i <= 1.
3670
 */
3671
static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
3672
  int n_edge)
3673
29
{
3674
29
  int i;
3675
29
  int k;
3676
29
  isl_space *dim;
3677
29
  unsigned total;
3678
29
  int n_eq, n_ineq;
3679
29
3680
29
  total = 3 + n_edge;
3681
66
  for (i = 0; 
i < graph->n66
;
++i37
)
{37
3682
37
    struct isl_sched_node *node = &graph->node[graph->sorted[i]];
3683
37
    node->start = total;
3684
37
    total += 1 + node->nparam + 2 * node->nvar;
3685
37
  }
3686
29
3687
29
  if (count_all_constraints(graph, &n_eq, &n_ineq) < 0)
3688
0
    return isl_stat_error;
3689
29
3690
29
  dim = isl_space_set_alloc(ctx, 0, total);
3691
29
  isl_basic_set_free(graph->lp);
3692
29
  n_eq += 3;
3693
29
  n_ineq += n_edge;
3694
29
  graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
3695
29
  graph->lp = isl_basic_set_set_rational(graph->lp);
3696
29
3697
29
  k = isl_basic_set_alloc_equality(graph->lp);
3698
29
  if (k < 0)
3699
0
    return isl_stat_error;
3700
29
  isl_seq_clr(graph->lp->eq[k], 1 +  total);
3701
29
  isl_int_set_si(graph->lp->eq[k][0], -n_edge);
3702
29
  isl_int_set_si(graph->lp->eq[k][1], 1);
3703
88
  for (i = 0; 
i < n_edge88
;
++i59
)
3704
59
    isl_int_set_si(graph->lp->eq[k][4 + i], 1);
3705
29
3706
29
  if (add_param_sum_constraint(graph, 1) < 0)
3707
0
    return isl_stat_error;
3708
29
  
if (29
add_var_sum_constraint(graph, 2) < 029
)
3709
0
    return isl_stat_error;
3710
29
3711
88
  
for (i = 0; 29
i < n_edge88
;
++i59
)
{59
3712
59
    k = isl_basic_set_alloc_inequality(graph->lp);
3713
59
    if (k < 0)
3714
0
      return isl_stat_error;
3715
59
    isl_seq_clr(graph->lp->ineq[k], 1 +  total);
3716
59
    isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
3717
59
    isl_int_set_si(graph->lp->ineq[k][0], 1);
3718
59
  }
3719
29
3720
29
  
if (29
add_all_constraints(graph) < 029
)
3721
0
    return isl_stat_error;
3722
29
3723
29
  return isl_stat_ok;
3724
29
}
3725
3726
static __isl_give isl_schedule_node *compute_component_schedule(
3727
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3728
  int wcc);
3729
3730
/* Comparison function for sorting the statements based on
3731
 * the corresponding value in "r".
3732
 */
3733
static int smaller_value(const void *a, const void *b, void *data)
3734
10
{
3735
10
  isl_vec *r = data;
3736
10
  const int *i1 = a;
3737
10
  const int *i2 = b;
3738
10
3739
10
  return isl_int_cmp(r->el[*i1], r->el[*i2]);
3740
10
}
3741
3742
/* If the schedule_split_scaled option is set and if the linear
3743
 * parts of the scheduling rows for all nodes in the graphs have
3744
 * a non-trivial common divisor, then split off the remainder of the
3745
 * constant term modulo this common divisor from the linear part.
3746
 * Otherwise, insert a band node directly and continue with
3747
 * the construction of the schedule.
3748
 *
3749
 * If a non-trivial common divisor is found, then
3750
 * the linear part is reduced and the remainder is enforced
3751
 * by a sequence node with the children placed in the order
3752
 * of this remainder.
3753
 * In particular, we assign an scc index based on the remainder and
3754
 * then rely on compute_component_schedule to insert the sequence and
3755
 * to continue the schedule construction on each part.
3756
 */
3757
static __isl_give isl_schedule_node *split_scaled(
3758
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3759
29
{
3760
29
  int i;
3761
29
  int row;
3762
29
  int scc;
3763
29
  isl_ctx *ctx;
3764
29
  isl_int gcd, gcd_i;
3765
29
  isl_vec *r;
3766
29
  int *order;
3767
29
3768
29
  if (!node)
3769
0
    return NULL;
3770
29
3771
29
  ctx = isl_schedule_node_get_ctx(node);
3772
29
  if (!ctx->opt->schedule_split_scaled)
3773
0
    return compute_next_band(node, graph, 0);
3774
29
  
if (29
graph->n <= 129
)
3775
23
    return compute_next_band(node, graph, 0);
3776
29
3777
6
  
isl_int_init6
(gcd);6
3778
6
  isl_int_init(gcd_i);
3779
6
3780
6
  isl_int_set_si(gcd, 0);
3781
6
3782
6
  row = isl_mat_rows(graph->node[0].sched) - 1;
3783
6
3784
20
  for (i = 0; 
i < graph->n20
;
++i14
)
{14
3785
14
    struct isl_sched_node *node = &graph->node[i];
3786
14
    int cols = isl_mat_cols(node->sched);
3787
14
3788
14
    isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
3789
14
    isl_int_gcd(gcd, gcd, gcd_i);
3790
14
  }
3791
6
3792
6
  isl_int_clear(gcd_i);
3793
6
3794
6
  if (
isl_int_cmp_si6
(gcd, 1) <= 06
)
{0
3795
0
    isl_int_clear(gcd);
3796
0
    return compute_next_band(node, graph, 0);
3797
0
  }
3798
6
3799
6
  r = isl_vec_alloc(ctx, graph->n);
3800
6
  order = isl_calloc_array(ctx, int, graph->n);
3801
6
  if (
!r || 6
!order6
)
3802
0
    goto error;
3803
6
3804
20
  
for (i = 0; 6
i < graph->n20
;
++i14
)
{14
3805
14
    struct isl_sched_node *node = &graph->node[i];
3806
14
3807
14
    order[i] = i;
3808
14
    isl_int_fdiv_r(r->el[i], node->sched->row[row][0], gcd);
3809
14
    isl_int_fdiv_q(node->sched->row[row][0],
3810
14
             node->sched->row[row][0], gcd);
3811
14
    isl_int_mul(node->sched->row[row][0],
3812
14
          node->sched->row[row][0], gcd);
3813
14
    node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
3814
14
    if (!node->sched)
3815
0
      goto error;
3816
14
  }
3817
6
3818
6
  
if (6
isl_sort(order, graph->n, sizeof(order[0]), &smaller_value, r) < 06
)
3819
0
    goto error;
3820
6
3821
6
  scc = 0;
3822
20
  for (i = 0; 
i < graph->n20
;
++i14
)
{14
3823
14
    if (
i > 0 && 14
isl_int_ne8
(r->el[order[i - 1]], r->el[order[i]]))
3824
8
      ++scc;
3825
14
    graph->node[order[i]].scc = scc;
3826
14
  }
3827
6
  graph->scc = ++scc;
3828
6
  graph->weak = 0;
3829
6
3830
6
  isl_int_clear(gcd);
3831
6
  isl_vec_free(r);
3832
6
  free(order);
3833
6
3834
6
  if (update_edges(ctx, graph) < 0)
3835
0
    return isl_schedule_node_free(node);
3836
6
  node = insert_current_band(node, graph, 0);
3837
6
  next_band(graph);
3838
6
3839
6
  node = isl_schedule_node_child(node, 0);
3840
6
  node = compute_component_schedule(node, graph, 0);
3841
6
  node = isl_schedule_node_parent(node);
3842
6
3843
6
  return node;
3844
0
error:
3845
0
  isl_vec_free(r);
3846
0
  free(order);
3847
0
  isl_int_clear(gcd);
3848
0
  return isl_schedule_node_free(node);
3849
6
}
3850
3851
/* Is the schedule row "sol" trivial on node "node"?
3852
 * That is, is the solution zero on the dimensions orthogonal to
3853
 * the previously found solutions?
3854
 * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
3855
 *
3856
 * Each coefficient is represented as the difference between
3857
 * two non-negative values in "sol".  "sol" has been computed
3858
 * in terms of the original iterators (i.e., without use of cmap).
3859
 * We construct the schedule row s and write it as a linear
3860
 * combination of (linear combinations of) previously computed schedule rows.
3861
 * s = Q c or c = U s.
3862
 * If the final entries of c are all zero, then the solution is trivial.
3863
 */
3864
static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
3865
33
{
3866
33
  int trivial;
3867
33
  isl_vec *node_sol;
3868
33
3869
33
  if (!sol)
3870
0
    return -1;
3871
33
  
if (33
node->nvar == node->rank33
)
3872
0
    return 0;
3873
33
3874
33
  node_sol = extract_var_coef(node, sol);
3875
33
  node_sol = isl_mat_vec_product(isl_mat_copy(node->cinv), node_sol);
3876
33
  if (!node_sol)
3877
0
    return -1;
3878
33
3879
33
  trivial = isl_seq_first_non_zero(node_sol->el + node->rank,
3880
33
          node->nvar - node->rank) == -1;
3881
33
3882
33
  isl_vec_free(node_sol);
3883
33
3884
33
  return trivial;
3885
33
}
3886
3887
/* Is the schedule row "sol" trivial on any node where it should
3888
 * not be trivial?
3889
 * "sol" has been computed in terms of the original iterators
3890
 * (i.e., without use of cmap).
3891
 * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
3892
 */
3893
static int is_any_trivial(struct isl_sched_graph *graph,
3894
  __isl_keep isl_vec *sol)
3895
29
{
3896
29
  int i;
3897
29
3898
58
  for (i = 0; 
i < graph->n58
;
++i29
)
{33
3899
33
    struct isl_sched_node *node = &graph->node[i];
3900
33
    int trivial;
3901
33
3902
33
    if (!needs_row(graph, node))
3903
0
      continue;
3904
33
    trivial = is_trivial(node, sol);
3905
33
    if (
trivial < 0 || 33
trivial33
)
3906
4
      return trivial;
3907
33
  }
3908
29
3909
25
  return 0;
3910
29
}
3911
3912
/* Does the schedule represented by "sol" perform loop coalescing on "node"?
3913
 * If so, return the position of the coalesced dimension.
3914
 * Otherwise, return node->nvar or -1 on error.
3915
 *
3916
 * In particular, look for pairs of coefficients c_i and c_j such that
3917
 * |c_j/c_i| >= size_i, i.e., |c_j| >= |c_i * size_i|.
3918
 * If any such pair is found, then return i.
3919
 * If size_i is infinity, then no check on c_i needs to be performed.
3920
 */
3921
static int find_node_coalescing(struct isl_sched_node *node,
3922
  __isl_keep isl_vec *sol)
3923
39
{
3924
39
  int i, j;
3925
39
  isl_int max;
3926
39
  isl_vec *csol;
3927
39
3928
39
  if (node->nvar <= 1)
3929
14
    return node->nvar;
3930
39
3931
25
  csol = extract_var_coef(node, sol);
3932
25
  if (!csol)
3933
0
    return -1;
3934
25
  
isl_int_init25
(max);25
3935
71
  for (i = 0; 
i < node->nvar71
;
++i46
)
{50
3936
50
    isl_val *v;
3937
50
3938
50
    if (isl_int_is_zero(csol->el[i]))
3939
18
      continue;
3940
32
    v = isl_multi_val_get_val(node->sizes, i);
3941
32
    if (!v)
3942
0
      goto error;
3943
32
    
if (32
!isl_val_is_int(v)32
)
{14
3944
14
      isl_val_free(v);
3945
14
      continue;
3946
14
    }
3947
18
    
isl_int_mul18
(max, v->n, csol->el[i]);18
3948
18
    isl_val_free(v);
3949
18
3950
46
    for (j = 0; 
j < node->nvar46
;
++j28
)
{32
3951
32
      if (j == i)
3952
14
        continue;
3953
18
      
if (18
isl_int_abs_ge18
(csol->el[j], max))
3954
4
        break;
3955
18
    }
3956
18
    if (j < node->nvar)
3957
4
      break;
3958
18
  }
3959
25
3960
25
  
isl_int_clear25
(max);25
3961
25
  isl_vec_free(csol);
3962
25
  return i;
3963
0
error:
3964
0
  isl_int_clear(max);
3965
0
  isl_vec_free(csol);
3966
0
  return -1;
3967
25
}
3968
3969
/* Force the schedule coefficient at position "pos" of "node" to be zero
3970
 * in "tl".
3971
 * The coefficient is encoded as the difference between two non-negative
3972
 * variables.  Force these two variables to have the same value.
3973
 */
3974
static __isl_give isl_tab_lexmin *zero_out_node_coef(
3975
  __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
3976
4
{
3977
4
  int dim;
3978
4
  isl_ctx *ctx;
3979
4
  isl_vec *eq;
3980
4
3981
4
  ctx = isl_space_get_ctx(node->space);
3982
4
  dim = isl_tab_lexmin_dim(tl);
3983
4
  if (dim < 0)
3984
0
    return isl_tab_lexmin_free(tl);
3985
4
  eq = isl_vec_alloc(ctx, 1 + dim);
3986
4
  eq = isl_vec_clr(eq);
3987
4
  if (!eq)
3988
0
    return isl_tab_lexmin_free(tl);
3989
4
3990
4
  pos = 1 + node_var_coef_offset(node) + 2 * pos;
3991
4
  isl_int_set_si(eq->el[pos], 1);
3992
4
  isl_int_set_si(eq->el[pos + 1], -1);
3993
4
  tl = isl_tab_lexmin_add_eq(tl, eq->el);
3994
4
  isl_vec_free(eq);
3995
4
3996
4
  return tl;
3997
4
}
3998
3999
/* Return the lexicographically smallest rational point in the basic set
4000
 * from which "tl" was constructed, double checking that this input set
4001
 * was not empty.
4002
 */
4003
static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
4004
33
{
4005
33
  isl_vec *sol;
4006
33
4007
33
  sol = isl_tab_lexmin_get_solution(tl);
4008
33
  if (!sol)
4009
0
    return NULL;
4010
33
  
if (33
sol->size == 033
)
4011
0
    isl_die(isl_vec_get_ctx(sol), isl_error_internal,
4012
33
      "error in schedule construction",
4013
33
      return isl_vec_free(sol));
4014
33
  return sol;
4015
33
}
4016
4017
/* Does the solution "sol" of the LP problem constructed by setup_carry_lp
4018
 * carry any of the "n_edge" groups of dependences?
4019
 * The value in the first position is the sum of (1 - e_i) over all "n_edge"
4020
 * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
4021
 * by the edge are carried by the solution.
4022
 * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
4023
 * one of those is carried.
4024
 *
4025
 * Note that despite the fact that the problem is solved using a rational
4026
 * solver, the solution is guaranteed to be integral.
4027
 * Specifically, the dependence distance lower bounds e_i (and therefore
4028
 * also their sum) are integers.  See Lemma 5 of [1].
4029
 *
4030
 * Any potential denominator of the sum is cleared by this function.
4031
 * The denominator is not relevant for any of the other elements
4032
 * in the solution.
4033
 *
4034
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4035
 *     Problem, Part II: Multi-Dimensional Time.
4036
 *     In Intl. Journal of Parallel Programming, 1992.
4037
 */
4038
static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
4039
33
{
4040
33
  isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
4041
33
  isl_int_set_si(sol->el[0], 1);
4042
33
  return isl_int_cmp_si(sol->el[1], n_edge) < 0;
4043
33
}
4044
4045
/* Return the lexicographically smallest rational point in "lp",
4046
 * assuming that all variables are non-negative and performing some
4047
 * additional sanity checks.
4048
 * In particular, "lp" should not be empty by construction.
4049
 * Double check that this is the case.
4050
 * Also, check that dependences are carried for at least one of
4051
 * the "n_edge" edges.
4052
 *
4053
 * If the computed schedule performs loop coalescing on a given node,
4054
 * i.e., if it is of the form
4055
 *
4056
 *  c_i i + c_j j + ...
4057
 *
4058
 * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
4059
 * to cut out this solution.  Repeat this process until no more loop
4060
 * coalescing occurs or until no more dependences can be carried.
4061
 * In the latter case, revert to the previously computed solution.
4062
 */
4063
static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
4064
  __isl_take isl_basic_set *lp, int n_edge)
4065
29
{
4066
29
  int i, pos;
4067
29
  isl_ctx *ctx;
4068
29
  isl_tab_lexmin *tl;
4069
29
  isl_vec *sol, *prev = NULL;
4070
29
  int treat_coalescing;
4071
29
4072
29
  if (!lp)
4073
0
    return NULL;
4074
29
  ctx = isl_basic_set_get_ctx(lp);
4075
29
  treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
4076
29
  tl = isl_tab_lexmin_from_basic_set(lp);
4077
29
4078
33
  do {
4079
33
    sol = non_empty_solution(tl);
4080
33
    if (!sol)
4081
0
      goto error;
4082
33
4083
33
    
if (33
!carries_dependences(sol, n_edge)33
)
{2
4084
2
      if (!prev)
4085
0
        isl_die(ctx, isl_error_unknown,
4086
2
          "unable to carry dependences",
4087
2
          goto error);
4088
2
      isl_vec_free(sol);
4089
2
      sol = prev;
4090
2
      break;
4091
2
    }
4092
31
    prev = isl_vec_free(prev);
4093
31
    if (!treat_coalescing)
4094
0
      break;
4095
66
    
for (i = 0; 31
i < graph->n66
;
++i35
)
{39
4096
39
      struct isl_sched_node *node = &graph->node[i];
4097
39
4098
39
      pos = find_node_coalescing(node, sol);
4099
39
      if (pos < 0)
4100
0
        goto error;
4101
39
      
if (39
pos < node->nvar39
)
4102
4
        break;
4103
39
    }
4104
31
    
if (31
i < graph->n31
)
{4
4105
4
      prev = sol;
4106
4
      tl = zero_out_node_coef(tl, &graph->node[i], pos);
4107
4
    }
4108
31
  } while (i < graph->n);
4109
29
4110
29
  isl_tab_lexmin_free(tl);
4111
29
4112
29
  return sol;
4113
0
error:
4114
0
  isl_tab_lexmin_free(tl);
4115
0
  isl_vec_free(prev);
4116
0
  isl_vec_free(sol);
4117
0
  return NULL;
4118
29
}
4119
4120
/* Construct a schedule row for each node such that as many validity dependences
4121
 * as possible are carried and then continue with the next band.
4122
 *
4123
 * If there are no validity dependences, then no dependence can be carried and
4124
 * the procedure is guaranteed to fail.  If there is more than one component,
4125
 * then try computing a schedule on each component separately
4126
 * to prevent or at least postpone this failure.
4127
 *
4128
 * If the computed schedule row turns out to be trivial on one or
4129
 * more nodes where it should not be trivial, then we throw it away
4130
 * and try again on each component separately.
4131
 *
4132
 * If there is only one component, then we accept the schedule row anyway,
4133
 * but we do not consider it as a complete row and therefore do not
4134
 * increment graph->n_row.  Note that the ranks of the nodes that
4135
 * do get a non-trivial schedule part will get updated regardless and
4136
 * graph->maxvar is computed based on these ranks.  The test for
4137
 * whether more schedule rows are required in compute_schedule_wcc
4138
 * is therefore not affected.
4139
 *
4140
 * Insert a band corresponding to the schedule row at position "node"
4141
 * of the schedule tree and continue with the construction of the schedule.
4142
 * This insertion and the continued construction is performed by split_scaled
4143
 * after optionally checking for non-trivial common divisors.
4144
 */
4145
static __isl_give isl_schedule_node *carry_dependences(
4146
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4147
30
{
4148
30
  int n_edge;
4149
30
  int trivial;
4150
30
  isl_ctx *ctx;
4151
30
  isl_vec *sol;
4152
30
  isl_basic_set *lp;
4153
30
4154
30
  if (!node)
4155
0
    return NULL;
4156
30
4157
30
  n_edge = count_carry_edges(graph);
4158
30
  if (
n_edge == 0 && 30
graph->scc > 11
)
4159
1
    return compute_component_schedule(node, graph, 1);
4160
30
4161
29
  ctx = isl_schedule_node_get_ctx(node);
4162
29
  if (setup_carry_lp(ctx, graph, n_edge) < 0)
4163
0
    return isl_schedule_node_free(node);
4164
29
4165
29
  lp = isl_basic_set_copy(graph->lp);
4166
29
  sol = non_neg_lexmin(graph, lp, n_edge);
4167
29
  if (!sol)
4168
0
    return isl_schedule_node_free(node);
4169
29
4170
29
  trivial = is_any_trivial(graph, sol);
4171
29
  if (
trivial < 029
)
{0
4172
0
    sol = isl_vec_free(sol);
4173
29
  } else 
if (29
trivial && 29
graph->scc > 14
)
{0
4174
0
    isl_vec_free(sol);
4175
0
    return compute_component_schedule(node, graph, 1);
4176
0
  }
4177
29
4178
29
  
if (29
update_schedule(graph, sol, 0, 0) < 029
)
4179
0
    return isl_schedule_node_free(node);
4180
29
  
if (29
trivial29
)
4181
4
    graph->n_row--;
4182
29
4183
29
  return split_scaled(node, graph);
4184
29
}
4185
4186
/* Topologically sort statements mapped to the same schedule iteration
4187
 * and add insert a sequence node in front of "node"
4188
 * corresponding to this order.
4189
 * If "initialized" is set, then it may be assumed that compute_maxvar
4190
 * has been called on the current band.  Otherwise, call
4191
 * compute_maxvar if and before carry_dependences gets called.
4192
 *
4193
 * If it turns out to be impossible to sort the statements apart,
4194
 * because different dependences impose different orderings
4195
 * on the statements, then we extend the schedule such that
4196
 * it carries at least one more dependence.
4197
 */
4198
static __isl_give isl_schedule_node *sort_statements(
4199
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4200
  int initialized)
4201
142
{
4202
142
  isl_ctx *ctx;
4203
142
  isl_union_set_list *filters;
4204
142
4205
142
  if (!node)
4206
0
    return NULL;
4207
142
4208
142
  ctx = isl_schedule_node_get_ctx(node);
4209
142
  if (graph->n < 1)
4210
0
    isl_die(ctx, isl_error_internal,
4211
142
      "graph should have at least one node",
4212
142
      return isl_schedule_node_free(node));
4213
142
4214
142
  
if (142
graph->n == 1142
)
4215
104
    return node;
4216
142
4217
38
  
if (38
update_edges(ctx, graph) < 038
)
4218
0
    return isl_schedule_node_free(node);
4219
38
4220
38
  
if (38
graph->n_edge == 038
)
4221
0
    return node;
4222
38
4223
38
  
if (38
detect_sccs(ctx, graph) < 038
)
4224
0
    return isl_schedule_node_free(node);
4225
38
4226
38
  next_band(graph);
4227
38
  if (
graph->scc < graph->n38
)
{2
4228
2
    if (
!initialized && 2
compute_maxvar(graph) < 00
)
4229
0
      return isl_schedule_node_free(node);
4230
2
    return carry_dependences(node, graph);
4231
2
  }
4232
38
4233
36
  filters = extract_sccs(ctx, graph);
4234
36
  node = isl_schedule_node_insert_sequence(node, filters);
4235
36
4236
36
  return node;
4237
38
}
4238
4239
/* Are there any (non-empty) (conditional) validity edges in the graph?
4240
 */
4241
static int has_validity_edges(struct isl_sched_graph *graph)
4242
26
{
4243
26
  int i;
4244
26
4245
34
  for (i = 0; 
i < graph->n_edge34
;
++i8
)
{20
4246
20
    int empty;
4247
20
4248
20
    empty = isl_map_plain_is_empty(graph->edge[i].map);
4249
20
    if (empty < 0)
4250
0
      return -1;
4251
20
    
if (20
empty20
)
4252
6
      continue;
4253
14
    
if (14
is_any_validity(&graph->edge[i])14
)
4254
12
      return 1;
4255
14
  }
4256
26
4257
14
  return 0;
4258
26
}
4259
4260
/* Should we apply a Feautrier step?
4261
 * That is, did the user request the Feautrier algorithm and are
4262
 * there any validity dependences (left)?
4263
 */
4264
static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
4265
180
{
4266
180
  if (
ctx->opt->schedule_algorithm != 180
ISL_SCHEDULE_ALGORITHM_FEAUTRIER180
)
4267
154
    return 0;
4268
180
4269
26
  return has_validity_edges(graph);
4270
180
}
4271
4272
/* Compute a schedule for a connected dependence graph using Feautrier's
4273
 * multi-dimensional scheduling algorithm and return the updated schedule node.
4274
 *
4275
 * The original algorithm is described in [1].
4276
 * The main idea is to minimize the number of scheduling dimensions, by
4277
 * trying to satisfy as many dependences as possible per scheduling dimension.
4278
 *
4279
 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4280
 *     Problem, Part II: Multi-Dimensional Time.
4281
 *     In Intl. Journal of Parallel Programming, 1992.
4282
 */
4283
static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
4284
  isl_schedule_node *node, struct isl_sched_graph *graph)
4285
12
{
4286
12
  return carry_dependences(node, graph);
4287
12
}
4288
4289
/* Turn off the "local" bit on all (condition) edges.
4290
 */
4291
static void clear_local_edges(struct isl_sched_graph *graph)
4292
214
{
4293
214
  int i;
4294
214
4295
463
  for (i = 0; 
i < graph->n_edge463
;
++i249
)
4296
249
    
if (249
is_condition(&graph->edge[i])249
)
4297
34
      clear_local(&graph->edge[i]);
4298
214
}
4299
4300
/* Does "graph" have both condition and conditional validity edges?
4301
 */
4302
static int need_condition_check(struct isl_sched_graph *graph)
4303
214
{
4304
214
  int i;
4305
214
  int any_condition = 0;
4306
214
  int any_conditional_validity = 0;
4307
214
4308
463
  for (i = 0; 
i < graph->n_edge463
;
++i249
)
{249
4309
249
    if (is_condition(&graph->edge[i]))
4310
34
      any_condition = 1;
4311
249
    if (is_conditional_validity(&graph->edge[i]))
4312
39
      any_conditional_validity = 1;
4313
249
  }
4314
214
4315
19
  return any_condition && any_conditional_validity;
4316
214
}
4317
4318
/* Does "graph" contain any coincidence edge?
4319
 */
4320
static int has_any_coincidence(struct isl_sched_graph *graph)
4321
214
{
4322
214
  int i;
4323
214
4324
326
  for (i = 0; 
i < graph->n_edge326
;
++i112
)
4325
206
    
if (206
is_coincidence(&graph->edge[i])206
)
4326
94
      return 1;
4327
214
4328
120
  return 0;
4329
214
}
4330
4331
/* Extract the final schedule row as a map with the iteration domain
4332
 * of "node" as domain.
4333
 */
4334
static __isl_give isl_map *final_row(struct isl_sched_node *node)
4335
138
{
4336
138
  isl_local_space *ls;
4337
138
  isl_aff *aff;
4338
138
  int row;
4339
138
4340
138
  row = isl_mat_rows(node->sched) - 1;
4341
138
  ls = isl_local_space_from_space(isl_space_copy(node->space));
4342
138
  aff = extract_schedule_row(ls, node, row);
4343
138
  return isl_map_from_aff(aff);
4344
138
}
4345
4346
/* Is the conditional validity dependence in the edge with index "edge_index"
4347
 * violated by the latest (i.e., final) row of the schedule?
4348
 * That is, is i scheduled after j
4349
 * for any conditional validity dependence i -> j?
4350
 */
4351
static int is_violated(struct isl_sched_graph *graph, int edge_index)
4352
69
{
4353
69
  isl_map *src_sched, *dst_sched, *map;
4354
69
  struct isl_sched_edge *edge = &graph->edge[edge_index];
4355
69
  int empty;
4356
69
4357
69
  src_sched = final_row(edge->src);
4358
69
  dst_sched = final_row(edge->dst);
4359
69
  map = isl_map_copy(edge->map);
4360
69
  map = isl_map_apply_domain(map, src_sched);
4361
69
  map = isl_map_apply_range(map, dst_sched);
4362
69
  map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
4363
69
  empty = isl_map_is_empty(map);
4364
69
  isl_map_free(map);
4365
69
4366
69
  if (empty < 0)
4367
0
    return -1;
4368
69
4369
69
  return !empty;
4370
69
}
4371
4372
/* Does "graph" have any satisfied condition edges that
4373
 * are adjacent to the conditional validity constraint with
4374
 * domain "conditional_source" and range "conditional_sink"?
4375
 *
4376
 * A satisfied condition is one that is not local.
4377
 * If a condition was forced to be local already (i.e., marked as local)
4378
 * then there is no need to check if it is in fact local.
4379
 *
4380
 * Additionally, mark all adjacent condition edges found as local.
4381
 */
4382
static int has_adjacent_true_conditions(struct isl_sched_graph *graph,
4383
  __isl_keep isl_union_set *conditional_source,
4384
  __isl_keep isl_union_set *conditional_sink)
4385
10
{
4386
10
  int i;
4387
10
  int any = 0;
4388
10
4389
57
  for (i = 0; 
i < graph->n_edge57
;
++i47
)
{47
4390
47
    int adjacent, local;
4391
47
    isl_union_map *condition;
4392
47
4393
47
    if (!is_condition(&graph->edge[i]))
4394
28
      continue;
4395
19
    
if (19
is_local(&graph->edge[i])19
)
4396
0
      continue;
4397
19
4398
19
    condition = graph->edge[i].tagged_condition;
4399
19
    adjacent = domain_intersects(condition, conditional_sink);
4400
19
    if (
adjacent >= 0 && 19
!adjacent19
)
4401
8
      adjacent = range_intersects(condition,
4402
8
              conditional_source);
4403
19
    if (adjacent < 0)
4404
0
      return -1;
4405
19
    
if (19
!adjacent19
)
4406
5
      continue;
4407
19
4408
14
    set_local(&graph->edge[i]);
4409
14
4410
14
    local = is_condition_false(&graph->edge[i]);
4411
14
    if (local < 0)
4412
0
      return -1;
4413
14
    
if (14
!local14
)
4414
7
      any = 1;
4415
14
  }
4416
10
4417
10
  return any;
4418
10
}
4419
4420
/* Are there any violated conditional validity dependences with
4421
 * adjacent condition dependences that are not local with respect
4422
 * to the current schedule?
4423
 * That is, is the conditional validity constraint violated?
4424
 *
4425
 * Additionally, mark all those adjacent condition dependences as local.
4426
 * We also mark those adjacent condition dependences that were not marked
4427
 * as local before, but just happened to be local already.  This ensures
4428
 * that they remain local if the schedule is recomputed.
4429
 *
4430
 * We first collect domain and range of all violated conditional validity
4431
 * dependences and then check if there are any adjacent non-local
4432
 * condition dependences.
4433
 */
4434
static int has_violated_conditional_constraint(isl_ctx *ctx,
4435
  struct isl_sched_graph *graph)
4436
29
{
4437
29
  int i;
4438
29
  int any = 0;
4439
29
  isl_union_set *source, *sink;
4440
29
4441
29
  source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4442
29
  sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4443
162
  for (i = 0; 
i < graph->n_edge162
;
++i133
)
{133
4444
133
    isl_union_set *uset;
4445
133
    isl_union_map *umap;
4446
133
    int violated;
4447
133
4448
133
    if (!is_conditional_validity(&graph->edge[i]))
4449
64
      continue;
4450
133
4451
69
    violated = is_violated(graph, i);
4452
69
    if (violated < 0)
4453
0
      goto error;
4454
69
    
if (69
!violated69
)
4455
51
      continue;
4456
69
4457
18
    any = 1;
4458
18
4459
18
    umap = isl_union_map_copy(graph->edge[i].tagged_validity);
4460
18
    uset = isl_union_map_domain(umap);
4461
18
    source = isl_union_set_union(source, uset);
4462
18
    source = isl_union_set_coalesce(source);
4463
18
4464
18
    umap = isl_union_map_copy(graph->edge[i].tagged_validity);
4465
18
    uset = isl_union_map_range(umap);
4466
18
    sink = isl_union_set_union(sink, uset);
4467
18
    sink = isl_union_set_coalesce(sink);
4468
18
  }
4469
29
4470
29
  
if (29
any29
)
4471
10
    any = has_adjacent_true_conditions(graph, source, sink);
4472
29
4473
29
  isl_union_set_free(source);
4474
29
  isl_union_set_free(sink);
4475
29
  return any;
4476
0
error:
4477
0
  isl_union_set_free(source);
4478
0
  isl_union_set_free(sink);
4479
0
  return -1;
4480
29
}
4481
4482
/* Examine the current band (the rows between graph->band_start and
4483
 * graph->n_total_row), deciding whether to drop it or add it to "node"
4484
 * and then continue with the computation of the next band, if any.
4485
 * If "initialized" is set, then it may be assumed that compute_maxvar
4486
 * has been called on the current band.  Otherwise, call
4487
 * compute_maxvar if and before carry_dependences gets called.
4488
 *
4489
 * The caller keeps looking for a new row as long as
4490
 * graph->n_row < graph->maxvar.  If the latest attempt to find
4491
 * such a row failed (i.e., we still have graph->n_row < graph->maxvar),
4492
 * then we either
4493
 * - split between SCCs and start over (assuming we found an interesting
4494
 *  pair of SCCs between which to split)
4495
 * - continue with the next band (assuming the current band has at least
4496
 *  one row)
4497
 * - try to carry as many dependences as possible and continue with the next
4498
 *  band
4499
 * In each case, we first insert a band node in the schedule tree
4500
 * if any rows have been computed.
4501
 *
4502
 * If the caller managed to complete the schedule, we insert a band node
4503
 * (if any schedule rows were computed) and we finish off by topologically
4504
 * sorting the statements based on the remaining dependences.
4505
 */
4506
static __isl_give isl_schedule_node *compute_schedule_finish_band(
4507
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4508
  int initialized)
4509
173
{
4510
173
  int insert;
4511
173
4512
173
  if (!node)
4513
0
    return NULL;
4514
173
4515
173
  
if (173
graph->n_row < graph->maxvar173
)
{31
4516
31
    isl_ctx *ctx;
4517
31
    int empty = graph->n_total_row == graph->band_start;
4518
31
4519
31
    ctx = isl_schedule_node_get_ctx(node);
4520
31
    if (
!ctx->opt->schedule_maximize_band_depth && 31
!empty29
)
4521
12
      return compute_next_band(node, graph, 1);
4522
19
    
if (19
graph->src_scc >= 019
)
4523
2
      return compute_split_schedule(node, graph);
4524
17
    
if (17
!empty17
)
4525
1
      return compute_next_band(node, graph, 1);
4526
16
    
if (16
!initialized && 16
compute_maxvar(graph) < 00
)
4527
0
      return isl_schedule_node_free(node);
4528
16
    return carry_dependences(node, graph);
4529
16
  }
4530
173
4531
142
  insert = graph->n_total_row > graph->band_start;
4532
142
  if (
insert142
)
{112
4533
112
    node = insert_current_band(node, graph, 1);
4534
112
    node = isl_schedule_node_child(node, 0);
4535
112
  }
4536
142
  node = sort_statements(node, graph, initialized);
4537
142
  if (insert)
4538
112
    node = isl_schedule_node_parent(node);
4539
142
4540
142
  return node;
4541
173
}
4542
4543
/* Construct a band of schedule rows for a connected dependence graph.
4544
 * The caller is responsible for determining the strongly connected
4545
 * components and calling compute_maxvar first.
4546
 *
4547
 * We try to find a sequence of as many schedule rows as possible that result
4548
 * in non-negative dependence distances (independent of the previous rows
4549
 * in the sequence, i.e., such that the sequence is tilable), with as
4550
 * many of the initial rows as possible satisfying the coincidence constraints.
4551
 * The computation stops if we can't find any more rows or if we have found
4552
 * all the rows we wanted to find.
4553
 *
4554
 * If ctx->opt->schedule_outer_coincidence is set, then we force the
4555
 * outermost dimension to satisfy the coincidence constraints.  If this
4556
 * turns out to be impossible, we fall back on the general scheme above
4557
 * and try to carry as many dependences as possible.
4558
 *
4559
 * If "graph" contains both condition and conditional validity dependences,
4560
 * then we need to check that that the conditional schedule constraint
4561
 * is satisfied, i.e., there are no violated conditional validity dependences
4562
 * that are adjacent to any non-local condition dependences.
4563
 * If there are, then we mark all those adjacent condition dependences
4564
 * as local and recompute the current band.  Those dependences that
4565
 * are marked local will then be forced to be local.
4566
 * The initial computation is performed with no dependences marked as local.
4567
 * If we are lucky, then there will be no violated conditional validity
4568
 * dependences adjacent to any non-local condition dependences.
4569
 * Otherwise, we mark some additional condition dependences as local and
4570
 * recompute.  We continue this process until there are no violations left or
4571
 * until we are no longer able to compute a schedule.
4572
 * Since there are only a finite number of dependences,
4573
 * there will only be a finite number of iterations.
4574
 */
4575
static isl_stat compute_schedule_wcc_band(isl_ctx *ctx,
4576
  struct isl_sched_graph *graph)
4577
214
{
4578
214
  int has_coincidence;
4579
214
  int use_coincidence;
4580
214
  int force_coincidence = 0;
4581
214
  int check_conditional;
4582
214
4583
214
  if (sort_sccs(graph) < 0)
4584
0
    return isl_stat_error;
4585
214
4586
214
  clear_local_edges(graph);
4587
214
  check_conditional = need_condition_check(graph);
4588
214
  has_coincidence = has_any_coincidence(graph);
4589
214
4590
214
  if (ctx->opt->schedule_outer_coincidence)
4591
14
    force_coincidence = 1;
4592
214
4593
214
  use_coincidence = has_coincidence;
4594
547
  while (
graph->n_row < graph->maxvar547
)
{367
4595
367
    isl_vec *sol;
4596
367
    int violated;
4597
367
    int coincident;
4598
367
4599
367
    graph->src_scc = -1;
4600
367
    graph->dst_scc = -1;
4601
367
4602
367
    if (setup_lp(ctx, graph, use_coincidence) < 0)
4603
0
      return isl_stat_error;
4604
367
    sol = solve_lp(graph);
4605
367
    if (!sol)
4606
0
      return isl_stat_error;
4607
367
    
if (367
sol->size == 0367
)
{103
4608
103
      int empty = graph->n_total_row == graph->band_start;
4609
103
4610
103
      isl_vec_free(sol);
4611
103
      if (
use_coincidence && 103
(!force_coincidence || 78
!empty9
))
{69
4612
69
        use_coincidence = 0;
4613
69
        continue;
4614
69
      }
4615
34
      return isl_stat_ok;
4616
103
    }
4617
264
    
coincident = !has_coincidence || 264
use_coincidence128
;
4618
264
    if (update_schedule(graph, sol, 1, coincident) < 0)
4619
0
      return isl_stat_error;
4620
264
4621
264
    
if (264
!check_conditional264
)
4622
235
      continue;
4623
29
    violated = has_violated_conditional_constraint(ctx, graph);
4624
29
    if (violated < 0)
4625
0
      return isl_stat_error;
4626
29
    
if (29
!violated29
)
4627
24
      continue;
4628
5
    
if (5
reset_band(graph) < 05
)
4629
0
      return isl_stat_error;
4630
5
    use_coincidence = has_coincidence;
4631
5
  }
4632
214
4633
180
  return isl_stat_ok;
4634
214
}
4635
4636
/* Compute a schedule for a connected dependence graph by considering
4637
 * the graph as a whole and return the updated schedule node.
4638
 *
4639
 * The actual schedule rows of the current band are computed by
4640
 * compute_schedule_wcc_band.  compute_schedule_finish_band takes
4641
 * care of integrating the band into "node" and continuing
4642
 * the computation.
4643
 */
4644
static __isl_give isl_schedule_node *compute_schedule_wcc_whole(
4645
  __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4646
153
{
4647
153
  isl_ctx *ctx;
4648
153
4649
153
  if (!node)
4650
0
    return NULL;
4651
153
4652
153
  ctx = isl_schedule_node_get_ctx(node);
4653
153
  if (compute_schedule_wcc_band(ctx, graph) < 0)
4654
0
    return isl_schedule_node_free(node);
4655
153
4656
153
  return compute_schedule_finish_band(node, graph, 1);
4657
153
}
4658
4659
/* Clustering information used by compute_schedule_wcc_clustering.
4660
 *
4661
 * "n" is the number of SCCs in the original dependence graph
4662
 * "scc" is an array of "n" elements, each representing an SCC
4663
 * of the original dependence graph.  All entries in the same cluster
4664
 * have the same number of schedule rows.
4665
 * "scc_cluster" maps each SCC index to the cluster to which it belongs,
4666
 * where each cluster is represented by the index of the first SCC
4667
 * in the cluster.  Initially, each SCC belongs to a cluster containing
4668
 * only that SCC.
4669
 *
4670
 * "scc_in_merge" is used by merge_clusters_along_edge to keep
4671
 * track of which SCCs need to be merged.
4672
 *
4673
 * "cluster" contains the merged clusters of SCCs after the clustering
4674
 * has completed.
4675
 *
4676
 * "scc_node" is a temporary data structure used inside copy_partial.
4677
 * For each SCC, it keeps track of the number of nodes in the SCC
4678
 * that have already been copied.
4679
 */
4680
struct isl_clustering {
4681
  int n;
4682
  struct isl_sched_graph *scc;
4683
  struct isl_sched_graph *cluster;
4684
  int *scc_cluster;
4685
  int *scc_node;
4686
  int *scc_in_merge;
4687
};
4688
4689
/* Initialize the clustering data structure "c" from "graph".
4690
 *
4691
 * In particular, allocate memory, extract the SCCs from "graph"
4692
 * into c->scc, initialize scc_cluster and construct
4693
 * a band of schedule rows for each SCC.
4694
 * Within each SCC, there is only one SCC by definition.
4695
 * Each SCC initially belongs to a cluster containing only that SCC.
4696
 */
4697
static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
4698
  struct isl_sched_graph *graph)
4699
15
{
4700
15
  int i;
4701
15
4702
15
  c->n = graph->scc;
4703
15
  c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
4704
15
  c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
4705
15
  c->scc_cluster = isl_calloc_array(ctx, int, c->n);
4706
15
  c->scc_node = isl_calloc_array(ctx, int, c->n);
4707
15
  c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
4708
15
  if (
!c->scc || 15
!c->cluster15
||
4709
15
      
!c->scc_cluster15
||
!c->scc_node15
||
!c->scc_in_merge15
)
4710
0
    return isl_stat_error;
4711
15
4712
54
  
for (i = 0; 15
i < c->n54
;
++i39
)
{39
4713
39
    if (extract_sub_graph(ctx, graph, &node_scc_exactly,
4714
39
          &edge_scc_exactly, i, &c->scc[i]) < 0)
4715
0
      return isl_stat_error;
4716
39
    c->scc[i].scc = 1;
4717
39
    if (compute_maxvar(&c->scc[i]) < 0)
4718
0
      return isl_stat_error;
4719
39
    
if (39
compute_schedule_wcc_band(ctx, &c->scc[i]) < 039
)
4720
0
      return isl_stat_error;
4721
39
    c->scc_cluster[i] = i;
4722
39
  }
4723
15
4724
15
  return isl_stat_ok;
4725
15
}
4726
4727
/* Free all memory allocated for "c".
4728
 */
4729
static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
4730
15
{
4731
15
  int i;
4732
15
4733
15
  if (c->scc)
4734
54
    
for (i = 0; 15
i < c->n54
;
++i39
)
4735
39
      graph_free(ctx, &c->scc[i]);
4736
15
  free(c->scc);
4737
15
  if (c->cluster)
4738
54
    
for (i = 0; 15
i < c->n54
;
++i39
)
4739
39
      graph_free(ctx, &c->cluster[i]);
4740
15
  free(c->cluster);
4741
15
  free(c->scc_cluster);
4742
15
  free(c->scc_node);
4743
15
  free(c->scc_in_merge);
4744
15
}
4745
4746
/* Should we refrain from merging the cluster in "graph" with
4747
 * any other cluster?
4748
 * In particular, is its current schedule band empty and incomplete.
4749
 */
4750
static int bad_cluster(struct isl_sched_graph *graph)
4751
254
{
4752
254
  return graph->n_row < graph->maxvar &&
4753
12
    graph->n_total_row == graph->band_start;
4754
254
}
4755
4756
/* Return the index of an edge in "graph" that can be used to merge
4757
 * two clusters in "c".
4758
 * Return graph->n_edge if no such edge can be found.
4759
 * Return -1 on error.
4760
 *
4761
 * In particular, return a proximity edge between two clusters
4762
 * that is not marked "no_merge" and such that neither of the
4763
 * two clusters has an incomplete, empty band.
4764
 *
4765
 * If there are multiple such edges, then try and find the most
4766
 * appropriate edge to use for merging.  In particular, pick the edge
4767
 * with the greatest weight.  If there are multiple of those,
4768
 * then pick one with the shortest distance between
4769
 * the two cluster representatives.
4770
 */
4771
static int find_proximity(struct isl_sched_graph *graph,
4772
  struct isl_clustering *c)
4773
38
{
4774
38
  int i, best = graph->n_edge, best_dist, best_weight;
4775
38
4776
161
  for (i = 0; 
i < graph->n_edge161
;
++i123
)
{123
4777
123
    struct isl_sched_edge *edge = &graph->edge[i];
4778
123
    int dist, weight;
4779
123
4780
123
    if (!is_proximity(edge))
4781
24
      continue;
4782
99
    
if (99
edge->no_merge99
)
4783
6
      continue;
4784
93
    
if (93
bad_cluster(&c->scc[edge->src->scc]) ||93
4785
93
        bad_cluster(&c->scc[edge->dst->scc]))
4786
0
      continue;
4787
93
    dist = c->scc_cluster[edge->dst->scc] -
4788
93
      c->scc_cluster[edge->src->scc];
4789
93
    if (dist == 0)
4790
57
      continue;
4791
36
    weight = edge->weight;
4792
36
    if (
best < graph->n_edge36
)
{13
4793
13
      if (best_weight > weight)
4794
0
        continue;
4795
13
      
if (13
best_weight == weight && 13
best_dist <= dist13
)
4796
11
        continue;
4797
13
    }
4798
25
    best = i;
4799
25
    best_dist = dist;
4800
25
    best_weight = weight;
4801
25
  }
4802
38
4803
38
  return best;
4804
38
}
4805
4806
/* Internal data structure used in mark_merge_sccs.
4807
 *
4808
 * "graph" is the dependence graph in which a strongly connected
4809
 * component is constructed.
4810
 * "scc_cluster" maps each SCC index to the cluster to which it belongs.
4811
 * "src" and "dst" are the indices of the nodes that are being merged.
4812
 */
4813
struct isl_mark_merge_sccs_data {
4814
  struct isl_sched_graph *graph;
4815
  int *scc_cluster;
4816
  int src;
4817
  int dst;
4818
};
4819
4820
/* Check whether the cluster containing node "i" depends on the cluster
4821
 * containing node "j".  If "i" and "j" belong to the same cluster,
4822
 * then they are taken to depend on each other to ensure that
4823
 * the resulting strongly connected component consists of complete
4824
 * clusters.  Furthermore, if "i" and "j" are the two nodes that
4825
 * are being merged, then they are taken to depend on each other as well.
4826
 * Otherwise, check if there is a (conditional) validity dependence
4827
 * from node[j] to node[i], forcing node[i] to follow node[j].
4828
 */
4829
static isl_bool cluster_follows(int i, int j, void *user)
4830
108
{
4831
108
  struct isl_mark_merge_sccs_data *data = user;
4832
108
  struct isl_sched_graph *graph = data->graph;
4833
108
  int *scc_cluster = data->scc_cluster;
4834
108
4835
108
  if (
data->src == i && 108
data->dst == j39
)
4836
23
    return isl_bool_true;
4837
85
  
if (85
data->src == j && 85
data->dst == i35
)
4838
21
    return isl_bool_true;
4839
64
  
if (64
scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc]64
)
4840
16
    return isl_bool_true;
4841
64
4842
48
  return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
4843
64
}
4844
4845
/* Mark all SCCs that belong to either of the two clusters in "c"
4846
 * connected by the edge in "graph" with index "edge", or to any
4847
 * of the intermediate clusters.
4848
 * The marking is recorded in c->scc_in_merge.
4849
 *
4850
 * The given edge has been selected for merging two clusters,
4851
 * meaning that there is at least a proximity edge between the two nodes.
4852
 * However, there may also be (indirect) validity dependences
4853
 * between the two nodes.  When merging the two clusters, all clusters
4854
 * containing one or more of the intermediate nodes along the
4855
 * indirect validity dependences need to be merged in as well.
4856
 *
4857
 * First collect all such nodes by computing the strongly connected
4858
 * component (SCC) containing the two nodes connected by the edge, where
4859
 * the two nodes are considered to depend on each other to make
4860
 * sure they end up in the same SCC.  Similarly, each node is considered
4861
 * to depend on every other node in the same cluster to ensure
4862
 * that the SCC consists of complete clusters.
4863
 *
4864
 * Then the original SCCs that contain any of these nodes are marked
4865
 * in c->scc_in_merge.
4866
 */
4867
static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
4868
  int edge, struct isl_clustering *c)
4869
23
{
4870
23
  struct isl_mark_merge_sccs_data data;
4871
23
  struct isl_tarjan_graph *g;
4872
23
  int i;
4873
23
4874
88
  for (i = 0; 
i < c->n88
;
++i65
)
4875
65
    c->scc_in_merge[i] = 0;
4876
23
4877
23
  data.graph = graph;
4878
23
  data.scc_cluster = c->scc_cluster;
4879
23
  data.src = graph->edge[edge].src - graph->node;
4880
23
  data.dst = graph->edge[edge].dst - graph->node;
4881
23
4882
23
  g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
4883
23
          &cluster_follows, &data);
4884
23
  if (!g)
4885
0
    goto error;
4886
23
4887
23
  i = g->op;
4888
23
  if (i < 3)
4889
0
    isl_die(ctx, isl_error_internal,
4890
23
      "expecting at least two nodes in component",
4891
23
      goto error);
4892
23
  
if (23
g->order[--i] != -123
)
4893
0
    isl_die(ctx, isl_error_internal,
4894
23
      "expecting end of component marker", goto error);
4895
23
4896
78
  
for (--i; 23
i >= 0 && 78
g->order[i] != -160
;
--i55
)
{55
4897
55
    int scc = graph->node[g->order[i]].scc;
4898
55
    c->scc_in_merge[scc] = 1;
4899
55
  }
4900
23
4901
23
  isl_tarjan_graph_free(g);
4902
23
  return isl_stat_ok;
4903
0
error:
4904
0
  isl_tarjan_graph_free(g);
4905
0
  return isl_stat_error;
4906
23
}
4907
4908
/* Construct the identifier "cluster_i".
4909
 */
4910
static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
4911
179
{
4912
179
  char name[40];
4913
179
4914
179
  snprintf(name, sizeof(name), "cluster_%d", i);
4915
179
  return isl_id_alloc(ctx, name, NULL);
4916
179
}
4917
4918
/* Construct the space of the cluster with index "i" containing
4919
 * the strongly connected component "scc".
4920
 *
4921
 * In particular, construct a space called cluster_i with dimension equal
4922
 * to the number of schedule rows in the current band of "scc".
4923
 */
4924
static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
4925
127
{
4926
127
  int nvar;
4927
127
  isl_space *space;
4928
127
  isl_id *id;
4929
127
4930
127
  nvar = scc->n_total_row - scc->band_start;
4931
127
  space = isl_space_copy(scc->node[0].space);
4932
127
  space = isl_space_params(space);
4933
127
  space = isl_space_set_from_params(space);
4934
127
  space = isl_space_add_dims(space, isl_dim_set, nvar);
4935
127
  id = cluster_id(isl_space_get_ctx(space), i);
4936
127
  space = isl_space_set_tuple_id(space, isl_dim_set, id);
4937
127
4938
127
  return space;
4939
127
}
4940
4941
/* Collect the domain of the graph for merging clusters.
4942
 *
4943
 * In particular, for each cluster with first SCC "i", construct
4944
 * a set in the space called cluster_i with dimension equal
4945
 * to the number of schedule rows in the current band of the cluster.
4946
 */
4947
static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
4948
  struct isl_sched_graph *graph, struct isl_clustering *c)
4949
22
{
4950
22
  int i;
4951
22
  isl_space *space;
4952
22
  isl_union_set *domain;
4953
22
4954
22
  space = isl_space_params_alloc(ctx, 0);
4955
22
  domain = isl_union_set_empty(space);
4956
22
4957
84
  for (i = 0; 
i < graph->scc84
;
++i62
)
{62
4958
62
    isl_space *space;
4959
62
4960
62
    if (!c->scc_in_merge[i])
4961
10
      continue;
4962
52
    
if (52
c->scc_cluster[i] != i52
)
4963
8
      continue;
4964
44
    space = cluster_space(&c->scc[i], i);
4965
44
    domain = isl_union_set_add_set(domain, isl_set_universe(space));
4966
44
  }
4967
22
4968
22
  return domain;
4969
22
}
4970
4971
/* Construct a map from the original instances to the corresponding
4972
 * cluster instance in the current bands of the clusters in "c".
4973
 */
4974
static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
4975
  struct isl_sched_graph *graph, struct isl_clustering *c)
4976
22
{
4977
22
  int i, j;
4978
22
  isl_space *space;
4979
22
  isl_union_map *cluster_map;
4980
22
4981
22
  space = isl_space_params_alloc(ctx, 0);
4982
22
  cluster_map = isl_union_map_empty(space);
4983
84
  for (i = 0; 
i < graph->scc84
;
++i62
)
{62
4984
62
    int start, n;
4985
62
    isl_id *id;
4986
62
4987
62
    if (!c->scc_in_merge[i])
4988
10
      continue;
4989
62
4990
52
    id = cluster_id(ctx, c->scc_cluster[i]);
4991
52
    start = c->scc[i].band_start;
4992
52
    n = c->scc[i].n_total_row - start;
4993
104
    for (j = 0; 
j < c->scc[i].n104
;
++j52
)
{52
4994
52
      isl_multi_aff *ma;
4995
52
      isl_map *map;
4996
52
      struct isl_sched_node *node = &c->scc[i].node[j];
4997
52
4998
52
      ma = node_extract_partial_schedule_multi_aff(node,
4999
52
                    start, n);
5000
52
      ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
5001
52
                  isl_id_copy(id));
5002
52
      map = isl_map_from_multi_aff(ma);
5003
52
      cluster_map = isl_union_map_add_map(cluster_map, map);
5004
52
    }
5005
52
    isl_id_free(id);
5006
52
  }
5007
22
5008
22
  return cluster_map;
5009
22
}
5010
5011
/* Add "umap" to the schedule constraints "sc" of all types of "edge"
5012
 * that are not isl_edge_condition or isl_edge_conditional_validity.
5013
 */
5014
static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
5015
  struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
5016
  __isl_take isl_schedule_constraints *sc)
5017
58
{
5018
58
  enum isl_edge_type t;
5019
58
5020
58
  if (!sc)
5021
0
    return NULL;
5022
58
5023
348
  
for (t = isl_edge_first; 58
t <= isl_edge_last348
;
++t290
)
{290
5024
290
    if (t == isl_edge_condition ||
5025
232
        t == isl_edge_conditional_validity)
5026
116
      continue;
5027
174
    
if (174
!is_type(edge, t)174
)
5028
35
      continue;
5029
139
    sc = isl_schedule_constraints_add(sc, t,
5030
139
                isl_union_map_copy(umap));
5031
139