Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/External/isl/isl_ast_codegen.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2012-2014 Ecole Normale Superieure
3
 * Copyright 2014      INRIA Rocquencourt
4
 *
5
 * Use of this software is governed by the MIT license
6
 *
7
 * Written by Sven Verdoolaege,
8
 * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
9
 * and Inria Paris - Rocquencourt, Domaine de Voluceau - Rocquencourt,
10
 * B.P. 105 - 78153 Le Chesnay, France
11
 */
12
13
#include <limits.h>
14
#include <isl/id.h>
15
#include <isl/val.h>
16
#include <isl/space.h>
17
#include <isl/aff.h>
18
#include <isl/constraint.h>
19
#include <isl/set.h>
20
#include <isl/ilp.h>
21
#include <isl/union_set.h>
22
#include <isl/union_map.h>
23
#include <isl/schedule_node.h>
24
#include <isl/options.h>
25
#include <isl_sort.h>
26
#include <isl_tarjan.h>
27
#include <isl_ast_private.h>
28
#include <isl_ast_build_expr.h>
29
#include <isl_ast_build_private.h>
30
#include <isl_ast_graft_private.h>
31
32
/* Try and reduce the number of disjuncts in the representation of "set",
33
 * without dropping explicit representations of local variables.
34
 */
35
static __isl_give isl_set *isl_set_coalesce_preserve(__isl_take isl_set *set)
36
3.19k
{
37
3.19k
  isl_ctx *ctx;
38
3.19k
  int save_preserve;
39
3.19k
40
3.19k
  if (!set)
41
0
    return NULL;
42
3.19k
43
3.19k
  ctx = isl_set_get_ctx(set);
44
3.19k
  save_preserve = isl_options_get_coalesce_preserve_locals(ctx);
45
3.19k
  isl_options_set_coalesce_preserve_locals(ctx, 1);
46
3.19k
  set = isl_set_coalesce(set);
47
3.19k
  isl_options_set_coalesce_preserve_locals(ctx, save_preserve);
48
3.19k
  return set;
49
3.19k
}
50
51
/* Data used in generate_domain.
52
 *
53
 * "build" is the input build.
54
 * "list" collects the results.
55
 */
56
struct isl_generate_domain_data {
57
  isl_ast_build *build;
58
59
  isl_ast_graft_list *list;
60
};
61
62
static __isl_give isl_ast_graft_list *generate_next_level(
63
  __isl_take isl_union_map *executed,
64
  __isl_take isl_ast_build *build);
65
static __isl_give isl_ast_graft_list *generate_code(
66
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
67
  int internal);
68
69
/* Generate an AST for a single domain based on
70
 * the (non single valued) inverse schedule "executed".
71
 *
72
 * We extend the schedule with the iteration domain
73
 * and continue generating through a call to generate_code.
74
 *
75
 * In particular, if executed has the form
76
 *
77
 *  S -> D
78
 *
79
 * then we continue generating code on
80
 *
81
 *  [S -> D] -> D
82
 *
83
 * The extended inverse schedule is clearly single valued
84
 * ensuring that the nested generate_code will not reach this function,
85
 * but will instead create calls to all elements of D that need
86
 * to be executed from the current schedule domain.
87
 */
88
static isl_stat generate_non_single_valued(__isl_take isl_map *executed,
89
  struct isl_generate_domain_data *data)
90
22
{
91
22
  isl_map *identity;
92
22
  isl_ast_build *build;
93
22
  isl_ast_graft_list *list;
94
22
95
22
  build = isl_ast_build_copy(data->build);
96
22
97
22
  identity = isl_set_identity(isl_map_range(isl_map_copy(executed)));
98
22
  executed = isl_map_domain_product(executed, identity);
99
22
  build = isl_ast_build_set_single_valued(build, 1);
100
22
101
22
  list = generate_code(isl_union_map_from_map(executed), build, 1);
102
22
103
22
  data->list = isl_ast_graft_list_concat(data->list, list);
104
22
105
22
  return isl_stat_ok;
106
22
}
107
108
/* Call the at_each_domain callback, if requested by the user,
109
 * after recording the current inverse schedule in the build.
110
 */
111
static __isl_give isl_ast_graft *at_each_domain(__isl_take isl_ast_graft *graft,
112
  __isl_keep isl_map *executed, __isl_keep isl_ast_build *build)
113
2.23k
{
114
2.23k
  if (!graft || !build)
115
0
    return isl_ast_graft_free(graft);
116
2.23k
  if (!build->at_each_domain)
117
10
    return graft;
118
2.22k
119
2.22k
  build = isl_ast_build_copy(build);
120
2.22k
  build = isl_ast_build_set_executed(build,
121
2.22k
      isl_union_map_from_map(isl_map_copy(executed)));
122
2.22k
  if (!build)
123
0
    return isl_ast_graft_free(graft);
124
2.22k
125
2.22k
  graft->node = build->at_each_domain(graft->node,
126
2.22k
          build, build->at_each_domain_user);
127
2.22k
  isl_ast_build_free(build);
128
2.22k
129
2.22k
  if (!graft->node)
130
0
    graft = isl_ast_graft_free(graft);
131
2.22k
132
2.22k
  return graft;
133
2.22k
}
134
135
/* Generate a call expression for the single executed
136
 * domain element "map" and put a guard around it based its (simplified)
137
 * domain.  "executed" is the original inverse schedule from which "map"
138
 * has been derived.  In particular, "map" is either identical to "executed"
139
 * or it is the result of gisting "executed" with respect to the build domain.
140
 * "executed" is only used if there is an at_each_domain callback.
141
 *
142
 * At this stage, any pending constraints in the build can no longer
143
 * be simplified with respect to any enforced constraints since
144
 * the call node does not have any enforced constraints.
145
 * Since all pending constraints not covered by any enforced constraints
146
 * will be added as a guard to the graft in create_node_scaled,
147
 * even in the eliminated case, the pending constraints
148
 * can be considered to have been generated by outer constructs.
149
 *
150
 * If the user has set an at_each_domain callback, it is called
151
 * on the constructed call expression node.
152
 */
153
static isl_stat add_domain(__isl_take isl_map *executed,
154
  __isl_take isl_map *map, struct isl_generate_domain_data *data)
155
2.23k
{
156
2.23k
  isl_ast_build *build;
157
2.23k
  isl_ast_graft *graft;
158
2.23k
  isl_ast_graft_list *list;
159
2.23k
  isl_set *guard, *pending;
160
2.23k
161
2.23k
  build = isl_ast_build_copy(data->build);
162
2.23k
  pending = isl_ast_build_get_pending(build);
163
2.23k
  build = isl_ast_build_replace_pending_by_guard(build, pending);
164
2.23k
165
2.23k
  guard = isl_map_domain(isl_map_copy(map));
166
2.23k
  guard = isl_set_compute_divs(guard);
167
2.23k
  guard = isl_set_coalesce_preserve(guard);
168
2.23k
  guard = isl_set_gist(guard, isl_ast_build_get_generated(build));
169
2.23k
  guard = isl_ast_build_specialize(build, guard);
170
2.23k
171
2.23k
  graft = isl_ast_graft_alloc_domain(map, build);
172
2.23k
  graft = at_each_domain(graft, executed, build);
173
2.23k
  isl_ast_build_free(build);
174
2.23k
  isl_map_free(executed);
175
2.23k
  graft = isl_ast_graft_add_guard(graft, guard, data->build);
176
2.23k
177
2.23k
  list = isl_ast_graft_list_from_ast_graft(graft);
178
2.23k
  data->list = isl_ast_graft_list_concat(data->list, list);
179
2.23k
180
2.23k
  return isl_stat_ok;
181
2.23k
}
182
183
/* Generate an AST for a single domain based on
184
 * the inverse schedule "executed" and add it to data->list.
185
 *
186
 * If there is more than one domain element associated to the current
187
 * schedule "time", then we need to continue the generation process
188
 * in generate_non_single_valued.
189
 * Note that the inverse schedule being single-valued may depend
190
 * on constraints that are only available in the original context
191
 * domain specified by the user.  We therefore first introduce
192
 * some of the constraints of data->build->domain.  In particular,
193
 * we intersect with a single-disjunct approximation of this set.
194
 * We perform this approximation to avoid further splitting up
195
 * the executed relation, possibly introducing a disjunctive guard
196
 * on the statement.
197
 *
198
 * On the other hand, we only perform the test after having taken the gist
199
 * of the domain as the resulting map is the one from which the call
200
 * expression is constructed.  Using this map to construct the call
201
 * expression usually yields simpler results in cases where the original
202
 * map is not obviously single-valued.
203
 * If the original map is obviously single-valued, then the gist
204
 * operation is skipped.
205
 *
206
 * Because we perform the single-valuedness test on the gisted map,
207
 * we may in rare cases fail to recognize that the inverse schedule
208
 * is single-valued.  This becomes problematic if this happens
209
 * from the recursive call through generate_non_single_valued
210
 * as we would then end up in an infinite recursion.
211
 * We therefore check if we are inside a call to generate_non_single_valued
212
 * and revert to the ungisted map if the gisted map turns out not to be
213
 * single-valued.
214
 *
215
 * Otherwise, call add_domain to generate a call expression (with guard) and
216
 * to call the at_each_domain callback, if any.
217
 */
218
static isl_stat generate_domain(__isl_take isl_map *executed, void *user)
219
2.26k
{
220
2.26k
  struct isl_generate_domain_data *data = user;
221
2.26k
  isl_set *domain;
222
2.26k
  isl_map *map = NULL;
223
2.26k
  int empty, sv;
224
2.26k
225
2.26k
  domain = isl_ast_build_get_domain(data->build);
226
2.26k
  domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
227
2.26k
  executed = isl_map_intersect_domain(executed, domain);
228
2.26k
  empty = isl_map_is_empty(executed);
229
2.26k
  if (empty < 0)
230
0
    goto error;
231
2.26k
  if (empty) {
232
0
    isl_map_free(executed);
233
0
    return isl_stat_ok;
234
0
  }
235
2.26k
236
2.26k
  sv = isl_map_plain_is_single_valued(executed);
237
2.26k
  if (sv < 0)
238
0
    goto error;
239
2.26k
  if (sv)
240
899
    return add_domain(executed, isl_map_copy(executed), data);
241
1.36k
242
1.36k
  executed = isl_map_coalesce(executed);
243
1.36k
  map = isl_map_copy(executed);
244
1.36k
  map = isl_ast_build_compute_gist_map_domain(data->build, map);
245
1.36k
  sv = isl_map_is_single_valued(map);
246
1.36k
  if (sv < 0)
247
0
    goto error;
248
1.36k
  if (!sv) {
249
22
    isl_map_free(map);
250
22
    if (data->build->single_valued)
251
0
      map = isl_map_copy(executed);
252
22
    else
253
22
      return generate_non_single_valued(executed, data);
254
1.33k
  }
255
1.33k
256
1.33k
  return add_domain(executed, map, data);
257
0
error:
258
0
  isl_map_free(map);
259
0
  isl_map_free(executed);
260
0
  return isl_stat_error;
261
1.33k
}
262
263
/* Call build->create_leaf to a create "leaf" node in the AST,
264
 * encapsulate the result in an isl_ast_graft and return the result
265
 * as a 1-element list.
266
 *
267
 * Note that the node returned by the user may be an entire tree.
268
 *
269
 * Since the node itself cannot enforce any constraints, we turn
270
 * all pending constraints into guards and add them to the resulting
271
 * graft to ensure that they will be generated.
272
 *
273
 * Before we pass control to the user, we first clear some information
274
 * from the build that is (presumbably) only meaningful
275
 * for the current code generation.
276
 * This includes the create_leaf callback itself, so we make a copy
277
 * of the build first.
278
 */
279
static __isl_give isl_ast_graft_list *call_create_leaf(
280
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
281
3
{
282
3
  isl_set *guard;
283
3
  isl_ast_node *node;
284
3
  isl_ast_graft *graft;
285
3
  isl_ast_build *user_build;
286
3
287
3
  guard = isl_ast_build_get_pending(build);
288
3
  user_build = isl_ast_build_copy(build);
289
3
  user_build = isl_ast_build_replace_pending_by_guard(user_build,
290
3
              isl_set_copy(guard));
291
3
  user_build = isl_ast_build_set_executed(user_build, executed);
292
3
  user_build = isl_ast_build_clear_local_info(user_build);
293
3
  if (!user_build)
294
0
    node = NULL;
295
3
  else
296
3
    node = build->create_leaf(user_build, build->create_leaf_user);
297
3
  graft = isl_ast_graft_alloc(node, build);
298
3
  graft = isl_ast_graft_add_guard(graft, guard, build);
299
3
  isl_ast_build_free(build);
300
3
  return isl_ast_graft_list_from_ast_graft(graft);
301
3
}
302
303
static __isl_give isl_ast_graft_list *build_ast_from_child(
304
  __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
305
  __isl_take isl_union_map *executed);
306
307
/* Generate an AST after having handled the complete schedule
308
 * of this call to the code generator or the complete band
309
 * if we are generating an AST from a schedule tree.
310
 *
311
 * If we are inside a band node, then move on to the child of the band.
312
 *
313
 * If the user has specified a create_leaf callback, control
314
 * is passed to the user in call_create_leaf.
315
 *
316
 * Otherwise, we generate one or more calls for each individual
317
 * domain in generate_domain.
318
 */
319
static __isl_give isl_ast_graft_list *generate_inner_level(
320
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
321
4.47k
{
322
4.47k
  isl_ctx *ctx;
323
4.47k
  struct isl_generate_domain_data data = { build };
324
4.47k
325
4.47k
  if (!build || !executed)
326
0
    goto error;
327
4.47k
328
4.47k
  if (isl_ast_build_has_schedule_node(build)) {
329
2.20k
    isl_schedule_node *node;
330
2.20k
    node = isl_ast_build_get_schedule_node(build);
331
2.20k
    build = isl_ast_build_reset_schedule_node(build);
332
2.20k
    return build_ast_from_child(build, node, executed);
333
2.20k
  }
334
2.26k
335
2.26k
  if (build->create_leaf)
336
3
    return call_create_leaf(executed, build);
337
2.26k
338
2.26k
  ctx = isl_union_map_get_ctx(executed);
339
2.26k
  data.list = isl_ast_graft_list_alloc(ctx, 0);
340
2.26k
  if (isl_union_map_foreach_map(executed, &generate_domain, &data) < 0)
341
0
    data.list = isl_ast_graft_list_free(data.list);
342
2.26k
343
2.26k
  if (0)
344
0
error:    data.list = NULL;
345
2.26k
  isl_ast_build_free(build);
346
2.26k
  isl_union_map_free(executed);
347
2.26k
  return data.list;
348
2.26k
}
349
350
/* Call the before_each_for callback, if requested by the user.
351
 */
352
static __isl_give isl_ast_node *before_each_for(__isl_take isl_ast_node *node,
353
  __isl_keep isl_ast_build *build)
354
749
{
355
749
  isl_id *id;
356
749
357
749
  if (!node || !build)
358
0
    return isl_ast_node_free(node);
359
749
  if (!build->before_each_for)
360
472
    return node;
361
277
  id = build->before_each_for(build, build->before_each_for_user);
362
277
  node = isl_ast_node_set_annotation(node, id);
363
277
  return node;
364
277
}
365
366
/* Call the after_each_for callback, if requested by the user.
367
 */
368
static __isl_give isl_ast_graft *after_each_for(__isl_take isl_ast_graft *graft,
369
  __isl_keep isl_ast_build *build)
370
749
{
371
749
  if (!graft || !build)
372
0
    return isl_ast_graft_free(graft);
373
749
  if (!build->after_each_for)
374
472
    return graft;
375
277
  graft->node = build->after_each_for(graft->node, build,
376
277
            build->after_each_for_user);
377
277
  if (!graft->node)
378
0
    return isl_ast_graft_free(graft);
379
277
  return graft;
380
277
}
381
382
/* Plug in all the know values of the current and outer dimensions
383
 * in the domain of "executed".  In principle, we only need to plug
384
 * in the known value of the current dimension since the values of
385
 * outer dimensions have been plugged in already.
386
 * However, it turns out to be easier to just plug in all known values.
387
 */
388
static __isl_give isl_union_map *plug_in_values(
389
  __isl_take isl_union_map *executed, __isl_keep isl_ast_build *build)
390
3.31k
{
391
3.31k
  return isl_ast_build_substitute_values_union_map_domain(build,
392
3.31k
                    executed);
393
3.31k
}
394
395
/* Check if the constraint "c" is a lower bound on dimension "pos",
396
 * an upper bound, or independent of dimension "pos".
397
 */
398
static int constraint_type(isl_constraint *c, int pos)
399
2.05k
{
400
2.05k
  if (isl_constraint_is_lower_bound(c, isl_dim_set, pos))
401
898
    return 1;
402
1.16k
  if (isl_constraint_is_upper_bound(c, isl_dim_set, pos))
403
1.04k
    return 2;
404
119
  return 0;
405
119
}
406
407
/* Compare the types of the constraints "a" and "b",
408
 * resulting in constraints that are independent of "depth"
409
 * to be sorted before the lower bounds on "depth", which in
410
 * turn are sorted before the upper bounds on "depth".
411
 */
412
static int cmp_constraint(__isl_keep isl_constraint *a,
413
  __isl_keep isl_constraint *b, void *user)
414
1.02k
{
415
1.02k
  int *depth = user;
416
1.02k
  int t1 = constraint_type(a, *depth);
417
1.02k
  int t2 = constraint_type(b, *depth);
418
1.02k
419
1.02k
  return t1 - t2;
420
1.02k
}
421
422
/* Extract a lower bound on dimension "pos" from constraint "c".
423
 *
424
 * If the constraint is of the form
425
 *
426
 *  a x + f(...) >= 0
427
 *
428
 * then we essentially return
429
 *
430
 *  l = ceil(-f(...)/a)
431
 *
432
 * However, if the current dimension is strided, then we need to make
433
 * sure that the lower bound we construct is of the form
434
 *
435
 *  f + s a
436
 *
437
 * with f the offset and s the stride.
438
 * We therefore compute
439
 *
440
 *  f + s * ceil((l - f)/s)
441
 */
442
static __isl_give isl_aff *lower_bound(__isl_keep isl_constraint *c,
443
  int pos, __isl_keep isl_ast_build *build)
444
764
{
445
764
  isl_aff *aff;
446
764
447
764
  aff = isl_constraint_get_bound(c, isl_dim_set, pos);
448
764
  aff = isl_aff_ceil(aff);
449
764
450
764
  if (isl_ast_build_has_stride(build, pos)) {
451
12
    isl_aff *offset;
452
12
    isl_val *stride;
453
12
454
12
    offset = isl_ast_build_get_offset(build, pos);
455
12
    stride = isl_ast_build_get_stride(build, pos);
456
12
457
12
    aff = isl_aff_sub(aff, isl_aff_copy(offset));
458
12
    aff = isl_aff_scale_down_val(aff, isl_val_copy(stride));
459
12
    aff = isl_aff_ceil(aff);
460
12
    aff = isl_aff_scale_val(aff, stride);
461
12
    aff = isl_aff_add(aff, offset);
462
12
  }
463
764
464
764
  aff = isl_ast_build_compute_gist_aff(build, aff);
465
764
466
764
  return aff;
467
764
}
468
469
/* Return the exact lower bound (or upper bound if "upper" is set)
470
 * of "domain" as a piecewise affine expression.
471
 *
472
 * If we are computing a lower bound (of a strided dimension), then
473
 * we need to make sure it is of the form
474
 *
475
 *  f + s a
476
 *
477
 * where f is the offset and s is the stride.
478
 * We therefore need to include the stride constraint before computing
479
 * the minimum.
480
 */
481
static __isl_give isl_pw_aff *exact_bound(__isl_keep isl_set *domain,
482
  __isl_keep isl_ast_build *build, int upper)
483
0
{
484
0
  isl_set *stride;
485
0
  isl_map *it_map;
486
0
  isl_pw_aff *pa;
487
0
  isl_pw_multi_aff *pma;
488
0
489
0
  domain = isl_set_copy(domain);
490
0
  if (!upper) {
491
0
    stride = isl_ast_build_get_stride_constraint(build);
492
0
    domain = isl_set_intersect(domain, stride);
493
0
  }
494
0
  it_map = isl_ast_build_map_to_iterator(build, domain);
495
0
  if (upper)
496
0
    pma = isl_map_lexmax_pw_multi_aff(it_map);
497
0
  else
498
0
    pma = isl_map_lexmin_pw_multi_aff(it_map);
499
0
  pa = isl_pw_multi_aff_get_pw_aff(pma, 0);
500
0
  isl_pw_multi_aff_free(pma);
501
0
  pa = isl_ast_build_compute_gist_pw_aff(build, pa);
502
0
  pa = isl_pw_aff_coalesce(pa);
503
0
504
0
  return pa;
505
0
}
506
507
/* Callback for sorting the isl_pw_aff_list passed to reduce_list and
508
 * remove_redundant_lower_bounds.
509
 */
510
static int reduce_list_cmp(__isl_keep isl_pw_aff *a, __isl_keep isl_pw_aff *b,
511
  void *user)
512
100
{
513
100
  return isl_pw_aff_plain_cmp(a, b);
514
100
}
515
516
/* Given a list of lower bounds "list", remove those that are redundant
517
 * with respect to the other bounds in "list" and the domain of "build".
518
 *
519
 * We first sort the bounds in the same way as they would be sorted
520
 * by set_for_node_expressions so that we can try and remove the last
521
 * bounds first.
522
 *
523
 * For a lower bound to be effective, there needs to be at least
524
 * one domain element for which it is larger than all other lower bounds.
525
 * For each lower bound we therefore intersect the domain with
526
 * the conditions that it is larger than all other bounds and
527
 * check whether the result is empty.  If so, the bound can be removed.
528
 */
529
static __isl_give isl_pw_aff_list *remove_redundant_lower_bounds(
530
  __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
531
12
{
532
12
  int i, j, n;
533
12
  isl_set *domain;
534
12
535
12
  list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
536
12
  if (!list)
537
0
    return NULL;
538
12
539
12
  n = isl_pw_aff_list_n_pw_aff(list);
540
12
  if (n <= 1)
541
12
    return list;
542
0
543
0
  domain = isl_ast_build_get_domain(build);
544
0
545
0
  for (i = n - 1; i >= 0; --i) {
546
0
    isl_pw_aff *pa_i;
547
0
    isl_set *domain_i;
548
0
    int empty;
549
0
550
0
    domain_i = isl_set_copy(domain);
551
0
    pa_i = isl_pw_aff_list_get_pw_aff(list, i);
552
0
553
0
    for (j = 0; j < n; ++j) {
554
0
      isl_pw_aff *pa_j;
555
0
      isl_set *better;
556
0
557
0
      if (j == i)
558
0
        continue;
559
0
560
0
      pa_j = isl_pw_aff_list_get_pw_aff(list, j);
561
0
      better = isl_pw_aff_gt_set(isl_pw_aff_copy(pa_i), pa_j);
562
0
      domain_i = isl_set_intersect(domain_i, better);
563
0
    }
564
0
565
0
    empty = isl_set_is_empty(domain_i);
566
0
567
0
    isl_set_free(domain_i);
568
0
    isl_pw_aff_free(pa_i);
569
0
570
0
    if (empty < 0)
571
0
      goto error;
572
0
    if (!empty)
573
0
      continue;
574
0
    list = isl_pw_aff_list_drop(list, i, 1);
575
0
    n--;
576
0
  }
577
0
578
0
  isl_set_free(domain);
579
0
580
0
  return list;
581
0
error:
582
0
  isl_set_free(domain);
583
0
  return isl_pw_aff_list_free(list);
584
0
}
585
586
/* Extract a lower bound on dimension "pos" from each constraint
587
 * in "constraints" and return the list of lower bounds.
588
 * If "constraints" has zero elements, then we extract a lower bound
589
 * from "domain" instead.
590
 *
591
 * If the current dimension is strided, then the lower bound
592
 * is adjusted by lower_bound to match the stride information.
593
 * This modification may make one or more lower bounds redundant
594
 * with respect to the other lower bounds.  We therefore check
595
 * for this condition and remove the redundant lower bounds.
596
 */
597
static __isl_give isl_pw_aff_list *lower_bounds(
598
  __isl_keep isl_constraint_list *constraints, int pos,
599
  __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
600
749
{
601
749
  isl_ctx *ctx;
602
749
  isl_pw_aff_list *list;
603
749
  int i, n;
604
749
605
749
  if (!build)
606
0
    return NULL;
607
749
608
749
  n = isl_constraint_list_n_constraint(constraints);
609
749
  if (n == 0) {
610
0
    isl_pw_aff *pa;
611
0
    pa = exact_bound(domain, build, 0);
612
0
    return isl_pw_aff_list_from_pw_aff(pa);
613
0
  }
614
749
615
749
  ctx = isl_ast_build_get_ctx(build);
616
749
  list = isl_pw_aff_list_alloc(ctx,n);
617
749
618
1.51k
  for (i = 0; i < n; 
++i764
) {
619
764
    isl_aff *aff;
620
764
    isl_constraint *c;
621
764
622
764
    c = isl_constraint_list_get_constraint(constraints, i);
623
764
    aff = lower_bound(c, pos, build);
624
764
    isl_constraint_free(c);
625
764
    list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
626
764
  }
627
749
628
749
  if (isl_ast_build_has_stride(build, pos))
629
12
    list = remove_redundant_lower_bounds(list, build);
630
749
631
749
  return list;
632
749
}
633
634
/* Extract an upper bound on dimension "pos" from each constraint
635
 * in "constraints" and return the list of upper bounds.
636
 * If "constraints" has zero elements, then we extract an upper bound
637
 * from "domain" instead.
638
 */
639
static __isl_give isl_pw_aff_list *upper_bounds(
640
  __isl_keep isl_constraint_list *constraints, int pos,
641
  __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
642
749
{
643
749
  isl_ctx *ctx;
644
749
  isl_pw_aff_list *list;
645
749
  int i, n;
646
749
647
749
  n = isl_constraint_list_n_constraint(constraints);
648
749
  if (n == 0) {
649
0
    isl_pw_aff *pa;
650
0
    pa = exact_bound(domain, build, 1);
651
0
    return isl_pw_aff_list_from_pw_aff(pa);
652
0
  }
653
749
654
749
  ctx = isl_ast_build_get_ctx(build);
655
749
  list = isl_pw_aff_list_alloc(ctx,n);
656
749
657
1.58k
  for (i = 0; i < n; 
++i832
) {
658
832
    isl_aff *aff;
659
832
    isl_constraint *c;
660
832
661
832
    c = isl_constraint_list_get_constraint(constraints, i);
662
832
    aff = isl_constraint_get_bound(c, isl_dim_set, pos);
663
832
    isl_constraint_free(c);
664
832
    aff = isl_aff_floor(aff);
665
832
    list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
666
832
  }
667
749
668
749
  return list;
669
749
}
670
671
/* Return an isl_ast_expr that performs the reduction of type "type"
672
 * on AST expressions corresponding to the elements in "list".
673
 *
674
 * The list is assumed to contain at least one element.
675
 * If the list contains exactly one element, then the returned isl_ast_expr
676
 * simply computes that affine expression.
677
 * If the list contains more than one element, then we sort it
678
 * using a fairly abitrary but hopefully reasonably stable order.
679
 */
680
static __isl_give isl_ast_expr *reduce_list(enum isl_ast_op_type type,
681
  __isl_keep isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
682
1.49k
{
683
1.49k
  int i, n;
684
1.49k
  isl_ctx *ctx;
685
1.49k
  isl_ast_expr *expr;
686
1.49k
687
1.49k
  if (!list)
688
0
    return NULL;
689
1.49k
690
1.49k
  n = isl_pw_aff_list_n_pw_aff(list);
691
1.49k
692
1.49k
  if (n == 1)
693
1.40k
    return isl_ast_build_expr_from_pw_aff_internal(build,
694
1.40k
        isl_pw_aff_list_get_pw_aff(list, 0));
695
95
696
95
  ctx = isl_pw_aff_list_get_ctx(list);
697
95
  expr = isl_ast_expr_alloc_op(ctx, type, n);
698
95
  if (!expr)
699
0
    return NULL;
700
95
701
95
  list = isl_pw_aff_list_copy(list);
702
95
  list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
703
95
  if (!list)
704
0
    return isl_ast_expr_free(expr);
705
95
706
288
  
for (i = 0; 95
i < n;
++i193
) {
707
193
    isl_ast_expr *expr_i;
708
193
709
193
    expr_i = isl_ast_build_expr_from_pw_aff_internal(build,
710
193
        isl_pw_aff_list_get_pw_aff(list, i));
711
193
    if (!expr_i)
712
0
      goto error;
713
193
    expr->u.op.args[i] = expr_i;
714
193
  }
715
95
716
95
  isl_pw_aff_list_free(list);
717
95
  return expr;
718
0
error:
719
0
  isl_pw_aff_list_free(list);
720
0
  isl_ast_expr_free(expr);
721
0
  return NULL;
722
95
}
723
724
/* Add guards implied by the "generated constraints",
725
 * but not (necessarily) enforced by the generated AST to "guard".
726
 * In particular, if there is any stride constraints,
727
 * then add the guard implied by those constraints.
728
 * If we have generated a degenerate loop, then add the guard
729
 * implied by "bounds" on the outer dimensions, i.e., the guard
730
 * that ensures that the single value actually exists.
731
 * Since there may also be guards implied by a combination
732
 * of these constraints, we first combine them before
733
 * deriving the implied constraints.
734
 */
735
static __isl_give isl_set *add_implied_guards(__isl_take isl_set *guard,
736
  int degenerate, __isl_keep isl_basic_set *bounds,
737
  __isl_keep isl_ast_build *build)
738
749
{
739
749
  int depth, has_stride;
740
749
  isl_space *space;
741
749
  isl_set *dom, *set;
742
749
743
749
  depth = isl_ast_build_get_depth(build);
744
749
  has_stride = isl_ast_build_has_stride(build, depth);
745
749
  if (!has_stride && 
!degenerate737
)
746
737
    return guard;
747
12
748
12
  space = isl_basic_set_get_space(bounds);
749
12
  dom = isl_set_universe(space);
750
12
751
12
  if (degenerate) {
752
0
    bounds = isl_basic_set_copy(bounds);
753
0
    bounds = isl_basic_set_drop_constraints_not_involving_dims(
754
0
          bounds, isl_dim_set, depth, 1);
755
0
    set = isl_set_from_basic_set(bounds);
756
0
    dom = isl_set_intersect(dom, set);
757
0
  }
758
12
759
12
  if (has_stride) {
760
12
    set = isl_ast_build_get_stride_constraint(build);
761
12
    dom = isl_set_intersect(dom, set);
762
12
  }
763
12
764
12
  dom = isl_set_eliminate(dom, isl_dim_set, depth, 1);
765
12
  dom = isl_ast_build_compute_gist(build, dom);
766
12
  guard = isl_set_intersect(guard, dom);
767
12
768
12
  return guard;
769
12
}
770
771
/* Update "graft" based on "sub_build" for the degenerate case.
772
 *
773
 * "build" is the build in which graft->node was created
774
 * "sub_build" contains information about the current level itself,
775
 * including the single value attained.
776
 *
777
 * We set the initialization part of the for loop to the single
778
 * value attained by the current dimension.
779
 * The increment and condition are not strictly needed as the are known
780
 * to be "1" and "iterator <= value" respectively.
781
 */
782
static __isl_give isl_ast_graft *refine_degenerate(
783
  __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build,
784
  __isl_keep isl_ast_build *sub_build)
785
0
{
786
0
  isl_pw_aff *value;
787
0
788
0
  if (!graft || !sub_build)
789
0
    return isl_ast_graft_free(graft);
790
0
791
0
  value = isl_pw_aff_copy(sub_build->value);
792
0
793
0
  graft->node->u.f.init = isl_ast_build_expr_from_pw_aff_internal(build,
794
0
            value);
795
0
  if (!graft->node->u.f.init)
796
0
    return isl_ast_graft_free(graft);
797
0
798
0
  return graft;
799
0
}
800
801
/* Return the intersection of constraints in "list" as a set.
802
 */
803
static __isl_give isl_set *intersect_constraints(
804
  __isl_keep isl_constraint_list *list)
805
0
{
806
0
  int i, n;
807
0
  isl_basic_set *bset;
808
0
809
0
  n = isl_constraint_list_n_constraint(list);
810
0
  if (n < 1)
811
0
    isl_die(isl_constraint_list_get_ctx(list), isl_error_internal,
812
0
      "expecting at least one constraint", return NULL);
813
0
814
0
  bset = isl_basic_set_from_constraint(
815
0
        isl_constraint_list_get_constraint(list, 0));
816
0
  for (i = 1; i < n; ++i) {
817
0
    isl_basic_set *bset_i;
818
0
819
0
    bset_i = isl_basic_set_from_constraint(
820
0
        isl_constraint_list_get_constraint(list, i));
821
0
    bset = isl_basic_set_intersect(bset, bset_i);
822
0
  }
823
0
824
0
  return isl_set_from_basic_set(bset);
825
0
}
826
827
/* Compute the constraints on the outer dimensions enforced by
828
 * graft->node and add those constraints to graft->enforced,
829
 * in case the upper bound is expressed as a set "upper".
830
 *
831
 * In particular, if l(...) is a lower bound in "lower", and
832
 *
833
 *  -a i + f(...) >= 0    or  a i <= f(...)
834
 *
835
 * is an upper bound ocnstraint on the current dimension i,
836
 * then the for loop enforces the constraint
837
 *
838
 *  -a l(...) + f(...) >= 0   or  a l(...) <= f(...)
839
 *
840
 * We therefore simply take each lower bound in turn, plug it into
841
 * the upper bounds and compute the intersection over all lower bounds.
842
 *
843
 * If a lower bound is a rational expression, then
844
 * isl_basic_set_preimage_multi_aff will force this rational
845
 * expression to have only integer values.  However, the loop
846
 * itself does not enforce this integrality constraint.  We therefore
847
 * use the ceil of the lower bounds instead of the lower bounds themselves.
848
 * Other constraints will make sure that the for loop is only executed
849
 * when each of the lower bounds attains an integral value.
850
 * In particular, potentially rational values only occur in
851
 * lower_bound if the offset is a (seemingly) rational expression,
852
 * but then outer conditions will make sure that this rational expression
853
 * only attains integer values.
854
 */
855
static __isl_give isl_ast_graft *set_enforced_from_set(
856
  __isl_take isl_ast_graft *graft,
857
  __isl_keep isl_pw_aff_list *lower, int pos, __isl_keep isl_set *upper)
858
0
{
859
0
  isl_space *space;
860
0
  isl_basic_set *enforced;
861
0
  isl_pw_multi_aff *pma;
862
0
  int i, n;
863
0
864
0
  if (!graft || !lower)
865
0
    return isl_ast_graft_free(graft);
866
0
867
0
  space = isl_set_get_space(upper);
868
0
  enforced = isl_basic_set_universe(isl_space_copy(space));
869
0
870
0
  space = isl_space_map_from_set(space);
871
0
  pma = isl_pw_multi_aff_identity(space);
872
0
873
0
  n = isl_pw_aff_list_n_pw_aff(lower);
874
0
  for (i = 0; i < n; ++i) {
875
0
    isl_pw_aff *pa;
876
0
    isl_set *enforced_i;
877
0
    isl_basic_set *hull;
878
0
    isl_pw_multi_aff *pma_i;
879
0
880
0
    pa = isl_pw_aff_list_get_pw_aff(lower, i);
881
0
    pa = isl_pw_aff_ceil(pa);
882
0
    pma_i = isl_pw_multi_aff_copy(pma);
883
0
    pma_i = isl_pw_multi_aff_set_pw_aff(pma_i, pos, pa);
884
0
    enforced_i = isl_set_copy(upper);
885
0
    enforced_i = isl_set_preimage_pw_multi_aff(enforced_i, pma_i);
886
0
    hull = isl_set_simple_hull(enforced_i);
887
0
    enforced = isl_basic_set_intersect(enforced, hull);
888
0
  }
889
0
890
0
  isl_pw_multi_aff_free(pma);
891
0
892
0
  graft = isl_ast_graft_enforce(graft, enforced);
893
0
894
0
  return graft;
895
0
}
896
897
/* Compute the constraints on the outer dimensions enforced by
898
 * graft->node and add those constraints to graft->enforced,
899
 * in case the upper bound is expressed as
900
 * a list of affine expressions "upper".
901
 *
902
 * The enforced condition is that each lower bound expression is less
903
 * than or equal to each upper bound expression.
904
 */
905
static __isl_give isl_ast_graft *set_enforced_from_list(
906
  __isl_take isl_ast_graft *graft,
907
  __isl_keep isl_pw_aff_list *lower, __isl_keep isl_pw_aff_list *upper)
908
749
{
909
749
  isl_set *cond;
910
749
  isl_basic_set *enforced;
911
749
912
749
  lower = isl_pw_aff_list_copy(lower);
913
749
  upper = isl_pw_aff_list_copy(upper);
914
749
  cond = isl_pw_aff_list_le_set(lower, upper);
915
749
  enforced = isl_set_simple_hull(cond);
916
749
  graft = isl_ast_graft_enforce(graft, enforced);
917
749
918
749
  return graft;
919
749
}
920
921
/* Does "aff" have a negative constant term?
922
 */
923
static isl_stat aff_constant_is_negative(__isl_take isl_set *set,
924
  __isl_take isl_aff *aff, void *user)
925
764
{
926
764
  int *neg = user;
927
764
  isl_val *v;
928
764
929
764
  v = isl_aff_get_constant_val(aff);
930
764
  *neg = isl_val_is_neg(v);
931
764
  isl_val_free(v);
932
764
  isl_set_free(set);
933
764
  isl_aff_free(aff);
934
764
935
764
  return *neg ? 
isl_stat_ok187
:
isl_stat_error577
;
936
764
}
937
938
/* Does "pa" have a negative constant term over its entire domain?
939
 */
940
static isl_stat pw_aff_constant_is_negative(__isl_take isl_pw_aff *pa,
941
  void *user)
942
764
{
943
764
  isl_stat r;
944
764
  int *neg = user;
945
764
946
764
  r = isl_pw_aff_foreach_piece(pa, &aff_constant_is_negative, user);
947
764
  isl_pw_aff_free(pa);
948
764
949
764
  return (*neg && 
r >= 0187
) ?
isl_stat_ok187
:
isl_stat_error577
;
950
764
}
951
952
/* Does each element in "list" have a negative constant term?
953
 *
954
 * The callback terminates the iteration as soon an element has been
955
 * found that does not have a negative constant term.
956
 */
957
static int list_constant_is_negative(__isl_keep isl_pw_aff_list *list)
958
749
{
959
749
  int neg = 1;
960
749
961
749
  if (isl_pw_aff_list_foreach(list,
962
749
        &pw_aff_constant_is_negative, &neg) < 0 && 
neg577
)
963
0
    return -1;
964
749
965
749
  return neg;
966
749
}
967
968
/* Add 1 to each of the elements in "list", where each of these elements
969
 * is defined over the internal schedule space of "build".
970
 */
971
static __isl_give isl_pw_aff_list *list_add_one(
972
  __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
973
172
{
974
172
  int i, n;
975
172
  isl_space *space;
976
172
  isl_aff *aff;
977
172
  isl_pw_aff *one;
978
172
979
172
  space = isl_ast_build_get_space(build, 1);
980
172
  aff = isl_aff_zero_on_domain(isl_local_space_from_space(space));
981
172
  aff = isl_aff_add_constant_si(aff, 1);
982
172
  one = isl_pw_aff_from_aff(aff);
983
172
984
172
  n = isl_pw_aff_list_n_pw_aff(list);
985
344
  for (i = 0; i < n; 
++i172
) {
986
172
    isl_pw_aff *pa;
987
172
    pa = isl_pw_aff_list_get_pw_aff(list, i);
988
172
    pa = isl_pw_aff_add(pa, isl_pw_aff_copy(one));
989
172
    list = isl_pw_aff_list_set_pw_aff(list, i, pa);
990
172
  }
991
172
992
172
  isl_pw_aff_free(one);
993
172
994
172
  return list;
995
172
}
996
997
/* Set the condition part of the for node graft->node in case
998
 * the upper bound is represented as a list of piecewise affine expressions.
999
 *
1000
 * In particular, set the condition to
1001
 *
1002
 *  iterator <= min(list of upper bounds)
1003
 *
1004
 * If each of the upper bounds has a negative constant term, then
1005
 * set the condition to
1006
 *
1007
 *  iterator < min(list of (upper bound + 1)s)
1008
 *
1009
 */
1010
static __isl_give isl_ast_graft *set_for_cond_from_list(
1011
  __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *list,
1012
  __isl_keep isl_ast_build *build)
1013
749
{
1014
749
  int neg;
1015
749
  isl_ast_expr *bound, *iterator, *cond;
1016
749
  enum isl_ast_op_type type = isl_ast_op_le;
1017
749
1018
749
  if (!graft || !list)
1019
0
    return isl_ast_graft_free(graft);
1020
749
1021
749
  neg = list_constant_is_negative(list);
1022
749
  if (neg < 0)
1023
0
    return isl_ast_graft_free(graft);
1024
749
  list = isl_pw_aff_list_copy(list);
1025
749
  if (neg) {
1026
172
    list = list_add_one(list, build);
1027
172
    type = isl_ast_op_lt;
1028
172
  }
1029
749
1030
749
  bound = reduce_list(isl_ast_op_min, list, build);
1031
749
  iterator = isl_ast_expr_copy(graft->node->u.f.iterator);
1032
749
  cond = isl_ast_expr_alloc_binary(type, iterator, bound);
1033
749
  graft->node->u.f.cond = cond;
1034
749
1035
749
  isl_pw_aff_list_free(list);
1036
749
  if (!graft->node->u.f.cond)
1037
0
    return isl_ast_graft_free(graft);
1038
749
  return graft;
1039
749
}
1040
1041
/* Set the condition part of the for node graft->node in case
1042
 * the upper bound is represented as a set.
1043
 */
1044
static __isl_give isl_ast_graft *set_for_cond_from_set(
1045
  __isl_take isl_ast_graft *graft, __isl_keep isl_set *set,
1046
  __isl_keep isl_ast_build *build)
1047
0
{
1048
0
  isl_ast_expr *cond;
1049
0
1050
0
  if (!graft)
1051
0
    return NULL;
1052
0
1053
0
  cond = isl_ast_build_expr_from_set_internal(build, isl_set_copy(set));
1054
0
  graft->node->u.f.cond = cond;
1055
0
  if (!graft->node->u.f.cond)
1056
0
    return isl_ast_graft_free(graft);
1057
0
  return graft;
1058
0
}
1059
1060
/* Construct an isl_ast_expr for the increment (i.e., stride) of
1061
 * the current dimension.
1062
 */
1063
static __isl_give isl_ast_expr *for_inc(__isl_keep isl_ast_build *build)
1064
749
{
1065
749
  int depth;
1066
749
  isl_val *v;
1067
749
  isl_ctx *ctx;
1068
749
1069
749
  if (!build)
1070
0
    return NULL;
1071
749
  ctx = isl_ast_build_get_ctx(build);
1072
749
  depth = isl_ast_build_get_depth(build);
1073
749
1074
749
  if (!isl_ast_build_has_stride(build, depth))
1075
737
    return isl_ast_expr_alloc_int_si(ctx, 1);
1076
12
1077
12
  v = isl_ast_build_get_stride(build, depth);
1078
12
  return isl_ast_expr_from_val(v);
1079
12
}
1080
1081
/* Should we express the loop condition as
1082
 *
1083
 *  iterator <= min(list of upper bounds)
1084
 *
1085
 * or as a conjunction of constraints?
1086
 *
1087
 * The first is constructed from a list of upper bounds.
1088
 * The second is constructed from a set.
1089
 *
1090
 * If there are no upper bounds in "constraints", then this could mean
1091
 * that "domain" simply doesn't have an upper bound or that we didn't
1092
 * pick any upper bound.  In the first case, we want to generate the
1093
 * loop condition as a(n empty) conjunction of constraints
1094
 * In the second case, we will compute
1095
 * a single upper bound from "domain" and so we use the list form.
1096
 *
1097
 * If there are upper bounds in "constraints",
1098
 * then we use the list form iff the atomic_upper_bound option is set.
1099
 */
1100
static int use_upper_bound_list(isl_ctx *ctx, int n_upper,
1101
  __isl_keep isl_set *domain, int depth)
1102
749
{
1103
749
  if (n_upper > 0)
1104
749
    return isl_options_get_ast_build_atomic_upper_bound(ctx);
1105
0
  else
1106
0
    return isl_set_dim_has_upper_bound(domain, isl_dim_set, depth);
1107
749
}
1108
1109
/* Fill in the expressions of the for node in graft->node.
1110
 *
1111
 * In particular,
1112
 * - set the initialization part of the loop to the maximum of the lower bounds
1113
 * - extract the increment from the stride of the current dimension
1114
 * - construct the for condition either based on a list of upper bounds
1115
 *  or on a set of upper bound constraints.
1116
 */
1117
static __isl_give isl_ast_graft *set_for_node_expressions(
1118
  __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower,
1119
  int use_list, __isl_keep isl_pw_aff_list *upper_list,
1120
  __isl_keep isl_set *upper_set, __isl_keep isl_ast_build *build)
1121
749
{
1122
749
  isl_ast_node *node;
1123
749
1124
749
  if (!graft)
1125
0
    return NULL;
1126
749
1127
749
  build = isl_ast_build_copy(build);
1128
749
1129
749
  node = graft->node;
1130
749
  node->u.f.init = reduce_list(isl_ast_op_max, lower, build);
1131
749
  node->u.f.inc = for_inc(build);
1132
749
1133
749
  if (!node->u.f.init || !node->u.f.inc)
1134
0
    graft = isl_ast_graft_free(graft);
1135
749
1136
749
  if (use_list)
1137
749
    graft = set_for_cond_from_list(graft, upper_list, build);
1138
0
  else
1139
0
    graft = set_for_cond_from_set(graft, upper_set, build);
1140
749
1141
749
  isl_ast_build_free(build);
1142
749
1143
749
  return graft;
1144
749
}
1145
1146
/* Update "graft" based on "bounds" and "domain" for the generic,
1147
 * non-degenerate, case.
1148
 *
1149
 * "c_lower" and "c_upper" contain the lower and upper bounds
1150
 * that the loop node should express.
1151
 * "domain" is the subset of the intersection of the constraints
1152
 * for which some code is executed.
1153
 *
1154
 * There may be zero lower bounds or zero upper bounds in "constraints"
1155
 * in case the list of constraints was created
1156
 * based on the atomic option or based on separation with explicit bounds.
1157
 * In that case, we use "domain" to derive lower and/or upper bounds.
1158
 *
1159
 * We first compute a list of one or more lower bounds.
1160
 *
1161
 * Then we decide if we want to express the condition as
1162
 *
1163
 *  iterator <= min(list of upper bounds)
1164
 *
1165
 * or as a conjunction of constraints.
1166
 *
1167
 * The set of enforced constraints is then computed either based on
1168
 * a list of upper bounds or on a set of upper bound constraints.
1169
 * We do not compute any enforced constraints if we were forced
1170
 * to compute a lower or upper bound using exact_bound.  The domains
1171
 * of the resulting expressions may imply some bounds on outer dimensions
1172
 * that we do not want to appear in the enforced constraints since
1173
 * they are not actually enforced by the corresponding code.
1174
 *
1175
 * Finally, we fill in the expressions of the for node.
1176
 */
1177
static __isl_give isl_ast_graft *refine_generic_bounds(
1178
  __isl_take isl_ast_graft *graft,
1179
  __isl_take isl_constraint_list *c_lower,
1180
  __isl_take isl_constraint_list *c_upper,
1181
  __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
1182
749
{
1183
749
  int depth;
1184
749
  isl_ctx *ctx;
1185
749
  isl_pw_aff_list *lower;
1186
749
  int use_list;
1187
749
  isl_set *upper_set = NULL;
1188
749
  isl_pw_aff_list *upper_list = NULL;
1189
749
  int n_lower, n_upper;
1190
749
1191
749
  if (!graft || !c_lower || !c_upper || !build)
1192
0
    goto error;
1193
749
1194
749
  depth = isl_ast_build_get_depth(build);
1195
749
  ctx = isl_ast_graft_get_ctx(graft);
1196
749
1197
749
  n_lower = isl_constraint_list_n_constraint(c_lower);
1198
749
  n_upper = isl_constraint_list_n_constraint(c_upper);
1199
749
1200
749
  use_list = use_upper_bound_list(ctx, n_upper, domain, depth);
1201
749
1202
749
  lower = lower_bounds(c_lower, depth, domain, build);
1203
749
1204
749
  if (use_list)
1205
749
    upper_list = upper_bounds(c_upper, depth, domain, build);
1206
0
  else if (n_upper > 0)
1207
0
    upper_set = intersect_constraints(c_upper);
1208
0
  else
1209
0
    upper_set = isl_set_universe(isl_set_get_space(domain));
1210
749
1211
749
  if (n_lower == 0 || n_upper == 0)
1212
0
    ;
1213
749
  else if (use_list)
1214
749
    graft = set_enforced_from_list(graft, lower, upper_list);
1215
0
  else
1216
0
    graft = set_enforced_from_set(graft, lower, depth, upper_set);
1217
749
1218
749
  graft = set_for_node_expressions(graft, lower, use_list, upper_list,
1219
749
          upper_set, build);
1220
749
1221
749
  isl_pw_aff_list_free(lower);
1222
749
  isl_pw_aff_list_free(upper_list);
1223
749
  isl_set_free(upper_set);
1224
749
  isl_constraint_list_free(c_lower);
1225
749
  isl_constraint_list_free(c_upper);
1226
749
1227
749
  return graft;
1228
0
error:
1229
0
  isl_constraint_list_free(c_lower);
1230
0
  isl_constraint_list_free(c_upper);
1231
0
  return isl_ast_graft_free(graft);
1232
749
}
1233
1234
/* Internal data structure used inside count_constraints to keep
1235
 * track of the number of constraints that are independent of dimension "pos",
1236
 * the lower bounds in "pos" and the upper bounds in "pos".
1237
 */
1238
struct isl_ast_count_constraints_data {
1239
  int pos;
1240
1241
  int n_indep;
1242
  int n_lower;
1243
  int n_upper;
1244
};
1245
1246
/* Increment data->n_indep, data->lower or data->upper depending
1247
 * on whether "c" is independenct of dimensions data->pos,
1248
 * a lower bound or an upper bound.
1249
 */
1250
static isl_stat count_constraints(__isl_take isl_constraint *c, void *user)
1251
1.65k
{
1252
1.65k
  struct isl_ast_count_constraints_data *data = user;
1253
1.65k
1254
1.65k
  if (isl_constraint_is_lower_bound(c, isl_dim_set, data->pos))
1255
764
    data->n_lower++;
1256
888
  else if (isl_constraint_is_upper_bound(c, isl_dim_set, data->pos))
1257
832
    data->n_upper++;
1258
56
  else
1259
56
    data->n_indep++;
1260
1.65k
1261
1.65k
  isl_constraint_free(c);
1262
1.65k
1263
1.65k
  return isl_stat_ok;
1264
1.65k
}
1265
1266
/* Update "graft" based on "bounds" and "domain" for the generic,
1267
 * non-degenerate, case.
1268
 *
1269
 * "list" respresent the list of bounds that need to be encoded by
1270
 * the for loop.  Only the constraints that involve the iterator
1271
 * are relevant here.  The other constraints are taken care of by
1272
 * the caller and are included in the generated constraints of "build".
1273
 * "domain" is the subset of the intersection of the constraints
1274
 * for which some code is executed.
1275
 * "build" is the build in which graft->node was created.
1276
 *
1277
 * We separate lower bounds, upper bounds and constraints that
1278
 * are independent of the loop iterator.
1279
 *
1280
 * The actual for loop bounds are generated in refine_generic_bounds.
1281
 */
1282
static __isl_give isl_ast_graft *refine_generic_split(
1283
  __isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *list,
1284
  __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
1285
749
{
1286
749
  struct isl_ast_count_constraints_data data;
1287
749
  isl_constraint_list *lower;
1288
749
  isl_constraint_list *upper;
1289
749
1290
749
  if (!list)
1291
0
    return isl_ast_graft_free(graft);
1292
749
1293
749
  data.pos = isl_ast_build_get_depth(build);
1294
749
1295
749
  list = isl_constraint_list_sort(list, &cmp_constraint, &data.pos);
1296
749
  if (!list)
1297
0
    return isl_ast_graft_free(graft);
1298
749
1299
749
  data.n_indep = data.n_lower = data.n_upper = 0;
1300
749
  if (isl_constraint_list_foreach(list, &count_constraints, &data) < 0) {
1301
0
    isl_constraint_list_free(list);
1302
0
    return isl_ast_graft_free(graft);
1303
0
  }
1304
749
1305
749
  lower = isl_constraint_list_drop(list, 0, data.n_indep);
1306
749
  upper = isl_constraint_list_copy(lower);
1307
749
  lower = isl_constraint_list_drop(lower, data.n_lower, data.n_upper);
1308
749
  upper = isl_constraint_list_drop(upper, 0, data.n_lower);
1309
749
1310
749
  return refine_generic_bounds(graft, lower, upper, domain, build);
1311
749
}
1312
1313
/* Update "graft" based on "bounds" and "domain" for the generic,
1314
 * non-degenerate, case.
1315
 *
1316
 * "bounds" respresent the bounds that need to be encoded by
1317
 * the for loop (or a guard around the for loop).
1318
 * "domain" is the subset of "bounds" for which some code is executed.
1319
 * "build" is the build in which graft->node was created.
1320
 *
1321
 * We break up "bounds" into a list of constraints and continue with
1322
 * refine_generic_split.
1323
 */
1324
static __isl_give isl_ast_graft *refine_generic(
1325
  __isl_take isl_ast_graft *graft,
1326
  __isl_keep isl_basic_set *bounds, __isl_keep isl_set *domain,
1327
  __isl_keep isl_ast_build *build)
1328
749
{
1329
749
  isl_constraint_list *list;
1330
749
1331
749
  if (!build || !graft)
1332
0
    return isl_ast_graft_free(graft);
1333
749
1334
749
  list = isl_basic_set_get_constraint_list(bounds);
1335
749
1336
749
  graft = refine_generic_split(graft, list, domain, build);
1337
749
1338
749
  return graft;
1339
749
}
1340
1341
/* Create a for node for the current level.
1342
 *
1343
 * Mark the for node degenerate if "degenerate" is set.
1344
 */
1345
static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build,
1346
  int degenerate)
1347
749
{
1348
749
  int depth;
1349
749
  isl_id *id;
1350
749
  isl_ast_node *node;
1351
749
1352
749
  if (!build)
1353
0
    return NULL;
1354
749
1355
749
  depth = isl_ast_build_get_depth(build);
1356
749
  id = isl_ast_build_get_iterator_id(build, depth);
1357
749
  node = isl_ast_node_alloc_for(id);
1358
749
  if (degenerate)
1359
0
    node = isl_ast_node_for_mark_degenerate(node);
1360
749
1361
749
  return node;
1362
749
}
1363
1364
/* If the ast_build_exploit_nested_bounds option is set, then return
1365
 * the constraints enforced by all elements in "list".
1366
 * Otherwise, return the universe.
1367
 */
1368
static __isl_give isl_basic_set *extract_shared_enforced(
1369
  __isl_keep isl_ast_graft_list *list, __isl_keep isl_ast_build *build)
1370
4.06k
{
1371
4.06k
  isl_ctx *ctx;
1372
4.06k
  isl_space *space;
1373
4.06k
1374
4.06k
  if (!list)
1375
0
    return NULL;
1376
4.06k
1377
4.06k
  ctx = isl_ast_graft_list_get_ctx(list);
1378
4.06k
  if (isl_options_get_ast_build_exploit_nested_bounds(ctx))
1379
4.06k
    return isl_ast_graft_list_extract_shared_enforced(list, build);
1380
2
1381
2
  space = isl_ast_build_get_space(build, 1);
1382
2
  return isl_basic_set_universe(space);
1383
2
}
1384
1385
/* Return the pending constraints of "build" that are not already taken
1386
 * care of (by a combination of "enforced" and the generated constraints
1387
 * of "build").
1388
 */
1389
static __isl_give isl_set *extract_pending(__isl_keep isl_ast_build *build,
1390
  __isl_keep isl_basic_set *enforced)
1391
4.06k
{
1392
4.06k
  isl_set *guard, *context;
1393
4.06k
1394
4.06k
  guard = isl_ast_build_get_pending(build);
1395
4.06k
  context = isl_set_from_basic_set(isl_basic_set_copy(enforced));
1396
4.06k
  context = isl_set_intersect(context,
1397
4.06k
          isl_ast_build_get_generated(build));
1398
4.06k
  return isl_set_gist(guard, context);
1399
4.06k
}
1400
1401
/* Create an AST node for the current dimension based on
1402
 * the schedule domain "bounds" and return the node encapsulated
1403
 * in an isl_ast_graft.
1404
 *
1405
 * "executed" is the current inverse schedule, taking into account
1406
 * the bounds in "bounds"
1407
 * "domain" is the domain of "executed", with inner dimensions projected out.
1408
 * It may be a strict subset of "bounds" in case "bounds" was created
1409
 * based on the atomic option or based on separation with explicit bounds.
1410
 *
1411
 * "domain" may satisfy additional equalities that result
1412
 * from intersecting "executed" with "bounds" in add_node.
1413
 * It may also satisfy some global constraints that were dropped out because
1414
 * we performed separation with explicit bounds.
1415
 * The very first step is then to copy these constraints to "bounds".
1416
 *
1417
 * Since we may be calling before_each_for and after_each_for
1418
 * callbacks, we record the current inverse schedule in the build.
1419
 *
1420
 * We consider three builds,
1421
 * "build" is the one in which the current level is created,
1422
 * "body_build" is the build in which the next level is created,
1423
 * "sub_build" is essentially the same as "body_build", except that
1424
 * the depth has not been increased yet.
1425
 *
1426
 * "build" already contains information (in strides and offsets)
1427
 * about the strides at the current level, but this information is not
1428
 * reflected in the build->domain.
1429
 * We first add this information and the "bounds" to the sub_build->domain.
1430
 * isl_ast_build_set_loop_bounds adds the stride information and
1431
 * checks whether the current dimension attains
1432
 * only a single value and whether this single value can be represented using
1433
 * a single affine expression.
1434
 * In the first case, the current level is considered "degenerate".
1435
 * In the second, sub-case, the current level is considered "eliminated".
1436
 * Eliminated levels don't need to be reflected in the AST since we can
1437
 * simply plug in the affine expression.  For degenerate, but non-eliminated,
1438
 * levels, we do introduce a for node, but mark is as degenerate so that
1439
 * it can be printed as an assignment of the single value to the loop
1440
 * "iterator".
1441
 *
1442
 * If the current level is eliminated, we explicitly plug in the value
1443
 * for the current level found by isl_ast_build_set_loop_bounds in the
1444
 * inverse schedule.  This ensures that if we are working on a slice
1445
 * of the domain based on information available in the inverse schedule
1446
 * and the build domain, that then this information is also reflected
1447
 * in the inverse schedule.  This operation also eliminates the current
1448
 * dimension from the inverse schedule making sure no inner dimensions depend
1449
 * on the current dimension.  Otherwise, we create a for node, marking
1450
 * it degenerate if appropriate.  The initial for node is still incomplete
1451
 * and will be completed in either refine_degenerate or refine_generic.
1452
 *
1453
 * We then generate a sequence of grafts for the next level,
1454
 * create a surrounding graft for the current level and insert
1455
 * the for node we created (if the current level is not eliminated).
1456
 * Before creating a graft for the current level, we first extract
1457
 * hoistable constraints from the child guards and combine them
1458
 * with the pending constraints in the build.  These constraints
1459
 * are used to simplify the child guards and then added to the guard
1460
 * of the current graft to ensure that they will be generated.
1461
 * If the hoisted guard is a disjunction, then we use it directly
1462
 * to gist the guards on the children before intersect it with the
1463
 * pending constraints.  We do so because this disjunction is typically
1464
 * identical to the guards on the children such that these guards
1465
 * can be effectively removed completely.  After the intersection,
1466
 * the gist operation would have a harder time figuring this out.
1467
 *
1468
 * Finally, we set the bounds of the for loop in either
1469
 * refine_degenerate or refine_generic.
1470
 * We do so in a context where the pending constraints of the build
1471
 * have been replaced by the guard of the current graft.
1472
 */
1473
static __isl_give isl_ast_graft *create_node_scaled(
1474
  __isl_take isl_union_map *executed,
1475
  __isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
1476
  __isl_take isl_ast_build *build)
1477
4.06k
{
1478
4.06k
  int depth;
1479
4.06k
  int degenerate;
1480
4.06k
  isl_bool eliminated;
1481
4.06k
  isl_basic_set *hull;
1482
4.06k
  isl_basic_set *enforced;
1483
4.06k
  isl_set *guard, *hoisted;
1484
4.06k
  isl_ast_node *node = NULL;
1485
4.06k
  isl_ast_graft *graft;
1486
4.06k
  isl_ast_graft_list *children;
1487
4.06k
  isl_ast_build *sub_build;
1488
4.06k
  isl_ast_build *body_build;
1489
4.06k
1490
4.06k
  domain = isl_ast_build_eliminate_divs(build, domain);
1491
4.06k
  domain = isl_set_detect_equalities(domain);
1492
4.06k
  hull = isl_set_unshifted_simple_hull(isl_set_copy(domain));
1493
4.06k
  bounds = isl_basic_set_intersect(bounds, hull);
1494
4.06k
  build = isl_ast_build_set_executed(build, isl_union_map_copy(executed));
1495
4.06k
1496
4.06k
  depth = isl_ast_build_get_depth(build);
1497
4.06k
  sub_build = isl_ast_build_copy(build);
1498
4.06k
  bounds = isl_basic_set_remove_redundancies(bounds);
1499
4.06k
  bounds = isl_ast_build_specialize_basic_set(sub_build, bounds);
1500
4.06k
  sub_build = isl_ast_build_set_loop_bounds(sub_build,
1501
4.06k
            isl_basic_set_copy(bounds));
1502
4.06k
  degenerate = isl_ast_build_has_value(sub_build);
1503
4.06k
  eliminated = isl_ast_build_has_affine_value(sub_build, depth);
1504
4.06k
  if (degenerate < 0 || eliminated < 0)
1505
0
    executed = isl_union_map_free(executed);
1506
4.06k
  if (!degenerate)
1507
749
    bounds = isl_ast_build_compute_gist_basic_set(build, bounds);
1508
4.06k
  sub_build = isl_ast_build_set_pending_generated(sub_build,
1509
4.06k
            isl_basic_set_copy(bounds));
1510
4.06k
  if (eliminated)
1511
3.31k
    executed = plug_in_values(executed, sub_build);
1512
749
  else
1513
749
    node = create_for(build, degenerate);
1514
4.06k
1515
4.06k
  body_build = isl_ast_build_copy(sub_build);
1516
4.06k
  body_build = isl_ast_build_increase_depth(body_build);
1517
4.06k
  if (!eliminated)
1518
749
    node = before_each_for(node, body_build);
1519
4.06k
  children = generate_next_level(executed,
1520
4.06k
            isl_ast_build_copy(body_build));
1521
4.06k
1522
4.06k
  enforced = extract_shared_enforced(children, build);
1523
4.06k
  guard = extract_pending(sub_build, enforced);
1524
4.06k
  hoisted = isl_ast_graft_list_extract_hoistable_guard(children, build);
1525
4.06k
  if (isl_set_n_basic_set(hoisted) > 1)
1526
7
    children = isl_ast_graft_list_gist_guards(children,
1527
7
                isl_set_copy(hoisted));
1528
4.06k
  guard = isl_set_intersect(guard, hoisted);
1529
4.06k
  if (!eliminated)
1530
749
    guard = add_implied_guards(guard, degenerate, bounds, build);
1531
4.06k
1532
4.06k
  graft = isl_ast_graft_alloc_from_children(children,
1533
4.06k
          isl_set_copy(guard), enforced, build, sub_build);
1534
4.06k
1535
4.06k
  if (!eliminated) {
1536
749
    isl_ast_build *for_build;
1537
749
1538
749
    graft = isl_ast_graft_insert_for(graft, node);
1539
749
    for_build = isl_ast_build_copy(build);
1540
749
    for_build = isl_ast_build_replace_pending_by_guard(for_build,
1541
749
              isl_set_copy(guard));
1542
749
    if (degenerate)
1543
0
      graft = refine_degenerate(graft, for_build, sub_build);
1544
749
    else
1545
749
      graft = refine_generic(graft, bounds,
1546
749
          domain, for_build);
1547
749
    isl_ast_build_free(for_build);
1548
749
  }
1549
4.06k
  isl_set_free(guard);
1550
4.06k
  if (!eliminated)
1551
749
    graft = after_each_for(graft, body_build);
1552
4.06k
1553
4.06k
  isl_ast_build_free(body_build);
1554
4.06k
  isl_ast_build_free(sub_build);
1555
4.06k
  isl_ast_build_free(build);
1556
4.06k
  isl_basic_set_free(bounds);
1557
4.06k
  isl_set_free(domain);
1558
4.06k
1559
4.06k
  return graft;
1560
4.06k
}
1561
1562
/* Internal data structure for checking if all constraints involving
1563
 * the input dimension "depth" are such that the other coefficients
1564
 * are multiples of "m", reducing "m" if they are not.
1565
 * If "m" is reduced all the way down to "1", then the check has failed
1566
 * and we break out of the iteration.
1567
 */
1568
struct isl_check_scaled_data {
1569
  int depth;
1570
  isl_val *m;
1571
};
1572
1573
/* If constraint "c" involves the input dimension data->depth,
1574
 * then make sure that all the other coefficients are multiples of data->m,
1575
 * reducing data->m if needed.
1576
 * Break out of the iteration if data->m has become equal to "1".
1577
 */
1578
static isl_stat constraint_check_scaled(__isl_take isl_constraint *c,
1579
  void *user)
1580
26
{
1581
26
  struct isl_check_scaled_data *data = user;
1582
26
  int i, j, n;
1583
26
  enum isl_dim_type t[] = { isl_dim_param, isl_dim_in, isl_dim_out,
1584
26
            isl_dim_div };
1585
26
1586
26
  if (!isl_constraint_involves_dims(c, isl_dim_in, data->depth, 1)) {
1587
12
    isl_constraint_free(c);
1588
12
    return isl_stat_ok;
1589
12
  }
1590
14
1591
52
  
for (i = 0; 14
i < 4;
++i38
) {
1592
45
    n = isl_constraint_dim(c, t[i]);
1593
91
    for (j = 0; j < n; 
++j46
) {
1594
53
      isl_val *d;
1595
53
1596
53
      if (t[i] == isl_dim_in && 
j == data->depth30
)
1597
14
        continue;
1598
39
      if (!isl_constraint_involves_dims(c, t[i], j, 1))
1599
25
        continue;
1600
14
      d = isl_constraint_get_coefficient_val(c, t[i], j);
1601
14
      data->m = isl_val_gcd(data->m, d);
1602
14
      if (isl_val_is_one(data->m))
1603
7
        break;
1604
14
    }
1605
45
    if (j < n)
1606
7
      break;
1607
45
  }
1608
14
1609
14
  isl_constraint_free(c);
1610
14
1611
14
  return i < 4 ? 
isl_stat_error7
:
isl_stat_ok7
;
1612
14
}
1613
1614
/* For each constraint of "bmap" that involves the input dimension data->depth,
1615
 * make sure that all the other coefficients are multiples of data->m,
1616
 * reducing data->m if needed.
1617
 * Break out of the iteration if data->m has become equal to "1".
1618
 */
1619
static isl_stat basic_map_check_scaled(__isl_take isl_basic_map *bmap,
1620
  void *user)
1621
7
{
1622
7
  isl_stat r;
1623
7
1624
7
  r = isl_basic_map_foreach_constraint(bmap,
1625
7
            &constraint_check_scaled, user);
1626
7
  isl_basic_map_free(bmap);
1627
7
1628
7
  return r;
1629
7
}
1630
1631
/* For each constraint of "map" that involves the input dimension data->depth,
1632
 * make sure that all the other coefficients are multiples of data->m,
1633
 * reducing data->m if needed.
1634
 * Break out of the iteration if data->m has become equal to "1".
1635
 */
1636
static isl_stat map_check_scaled(__isl_take isl_map *map, void *user)
1637
7
{
1638
7
  isl_stat r;
1639
7
1640
7
  r = isl_map_foreach_basic_map(map, &basic_map_check_scaled, user);
1641
7
  isl_map_free(map);
1642
7
1643
7
  return r;
1644
7
}
1645
1646
/* Create an AST node for the current dimension based on
1647
 * the schedule domain "bounds" and return the node encapsulated
1648
 * in an isl_ast_graft.
1649
 *
1650
 * "executed" is the current inverse schedule, taking into account
1651
 * the bounds in "bounds"
1652
 * "domain" is the domain of "executed", with inner dimensions projected out.
1653
 *
1654
 *
1655
 * Before moving on to the actual AST node construction in create_node_scaled,
1656
 * we first check if the current dimension is strided and if we can scale
1657
 * down this stride.  Note that we only do this if the ast_build_scale_strides
1658
 * option is set.
1659
 *
1660
 * In particular, let the current dimension take on values
1661
 *
1662
 *  f + s a
1663
 *
1664
 * with a an integer.  We check if we can find an integer m that (obviously)
1665
 * divides both f and s.
1666
 *
1667
 * If so, we check if the current dimension only appears in constraints
1668
 * where the coefficients of the other variables are multiples of m.
1669
 * We perform this extra check to avoid the risk of introducing
1670
 * divisions by scaling down the current dimension.
1671
 *
1672
 * If so, we scale the current dimension down by a factor of m.
1673
 * That is, we plug in
1674
 *
1675
 *  i = m i'              (1)
1676
 *
1677
 * Note that in principle we could always scale down strided loops
1678
 * by plugging in
1679
 *
1680
 *  i = f + s i'
1681
 *
1682
 * but this may result in i' taking on larger values than the original i,
1683
 * due to the shift by "f".
1684
 * By constrast, the scaling in (1) can only reduce the (absolute) value "i".
1685
 */
1686
static __isl_give isl_ast_graft *create_node(__isl_take isl_union_map *executed,
1687
  __isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
1688
  __isl_take isl_ast_build *build)
1689
4.06k
{
1690
4.06k
  struct isl_check_scaled_data data;
1691
4.06k
  isl_ctx *ctx;
1692
4.06k
  isl_aff *offset;
1693
4.06k
  isl_val *d;
1694
4.06k
1695
4.06k
  ctx = isl_ast_build_get_ctx(build);
1696
4.06k
  if (!isl_options_get_ast_build_scale_strides(ctx))
1697
0
    return create_node_scaled(executed, bounds, domain, build);
1698
4.06k
1699
4.06k
  data.depth = isl_ast_build_get_depth(build);
1700
4.06k
  if (!isl_ast_build_has_stride(build, data.depth))
1701
4.05k
    return create_node_scaled(executed, bounds, domain, build);
1702
15
1703
15
  offset = isl_ast_build_get_offset(build, data.depth);
1704
15
  data.m = isl_ast_build_get_stride(build, data.depth);
1705
15
  if (!data.m)
1706
0
    offset = isl_aff_free(offset);
1707
15
  offset = isl_aff_scale_down_val(offset, isl_val_copy(data.m));
1708
15
  d = isl_aff_get_denominator_val(offset);
1709
15
  if (!d)
1710
0
    executed = isl_union_map_free(executed);
1711
15
1712
15
  if (executed && isl_val_is_divisible_by(data.m, d))
1713
13
    data.m = isl_val_div(data.m, d);
1714
2
  else {
1715
2
    data.m = isl_val_set_si(data.m, 1);
1716
2
    isl_val_free(d);
1717
2
  }
1718
15
1719
15
  if (!isl_val_is_one(data.m)) {
1720
7
    if (isl_union_map_foreach_map(executed, &map_check_scaled,
1721
7
            &data) < 0 &&
1722
7
        !isl_val_is_one(data.m))
1723
0
      executed = isl_union_map_free(executed);
1724
7
  }
1725
15
1726
15
  if (!isl_val_is_one(data.m)) {
1727
0
    isl_space *space;
1728
0
    isl_multi_aff *ma;
1729
0
    isl_aff *aff;
1730
0
    isl_map *map;
1731
0
    isl_union_map *umap;
1732
0
1733
0
    space = isl_ast_build_get_space(build, 1);
1734
0
    space = isl_space_map_from_set(space);
1735
0
    ma = isl_multi_aff_identity(space);
1736
0
    aff = isl_multi_aff_get_aff(ma, data.depth);
1737
0
    aff = isl_aff_scale_val(aff, isl_val_copy(data.m));
1738
0
    ma = isl_multi_aff_set_aff(ma, data.depth, aff);
1739
0
1740
0
    bounds = isl_basic_set_preimage_multi_aff(bounds,
1741
0
            isl_multi_aff_copy(ma));
1742
0
    domain = isl_set_preimage_multi_aff(domain,
1743
0
            isl_multi_aff_copy(ma));
1744
0
    map = isl_map_reverse(isl_map_from_multi_aff(ma));
1745
0
    umap = isl_union_map_from_map(map);
1746
0
    executed = isl_union_map_apply_domain(executed,
1747
0
            isl_union_map_copy(umap));
1748
0
    build = isl_ast_build_scale_down(build, isl_val_copy(data.m),
1749
0
            umap);
1750
0
  }
1751
15
  isl_aff_free(offset);
1752
15
  isl_val_free(data.m);
1753
15
1754
15
  return create_node_scaled(executed, bounds, domain, build);
1755
15
}
1756
1757
/* Add the basic set to the list that "user" points to.
1758
 */
1759
static isl_stat collect_basic_set(__isl_take isl_basic_set *bset, void *user)
1760
1.02k
{
1761
1.02k
  isl_basic_set_list **list = user;
1762
1.02k
1763
1.02k
  *list = isl_basic_set_list_add(*list, bset);
1764
1.02k
1765
1.02k
  return isl_stat_ok;
1766
1.02k
}
1767
1768
/* Extract the basic sets of "set" and collect them in an isl_basic_set_list.
1769
 */
1770
static __isl_give isl_basic_set_list *isl_basic_set_list_from_set(
1771
  __isl_take isl_set *set)
1772
965
{
1773
965
  int n;
1774
965
  isl_ctx *ctx;
1775
965
  isl_basic_set_list *list;
1776
965
1777
965
  if (!set)
1778
0
    return NULL;
1779
965
1780
965
  ctx = isl_set_get_ctx(set);
1781
965
1782
965
  n = isl_set_n_basic_set(set);
1783
965
  list = isl_basic_set_list_alloc(ctx, n);
1784
965
  if (isl_set_foreach_basic_set(set, &collect_basic_set, &list) < 0)
1785
0
    list = isl_basic_set_list_free(list);
1786
965
1787
965
  isl_set_free(set);
1788
965
  return list;
1789
965
}
1790
1791
/* Generate code for the schedule domain "bounds"
1792
 * and add the result to "list".
1793
 *
1794
 * We mainly detect strides here and check if the bounds do not
1795
 * conflict with the current build domain
1796
 * and then pass over control to create_node.
1797
 *
1798
 * "bounds" reflects the bounds on the current dimension and possibly
1799
 * some extra conditions on outer dimensions.
1800
 * It does not, however, include any divs involving the current dimension,
1801
 * so it does not capture any stride constraints.
1802
 * We therefore need to compute that part of the schedule domain that
1803
 * intersects with "bounds" and derive the strides from the result.
1804
 */
1805
static __isl_give isl_ast_graft_list *add_node(
1806
  __isl_take isl_ast_graft_list *list, __isl_take isl_union_map *executed,
1807
  __isl_take isl_basic_set *bounds, __isl_take isl_ast_build *build)
1808
5.56k
{
1809
5.56k
  isl_ast_graft *graft;
1810
5.56k
  isl_set *domain = NULL;
1811
5.56k
  isl_union_set *uset;
1812
5.56k
  int empty, disjoint;
1813
5.56k
1814
5.56k
  uset = isl_union_set_from_basic_set(isl_basic_set_copy(bounds));
1815
5.56k
  executed = isl_union_map_intersect_domain(executed, uset);
1816
5.56k
  empty = isl_union_map_is_empty(executed);
1817
5.56k
  if (empty < 0)
1818
0
    goto error;
1819
5.56k
  if (empty)
1820
0
    goto done;
1821
5.56k
1822
5.56k
  uset = isl_union_map_domain(isl_union_map_copy(executed));
1823
5.56k
  domain = isl_set_from_union_set(uset);
1824
5.56k
  domain = isl_ast_build_specialize(build, domain);
1825
5.56k
1826
5.56k
  domain = isl_set_compute_divs(domain);
1827
5.56k
  domain = isl_ast_build_eliminate_inner(build, domain);
1828
5.56k
  disjoint = isl_set_is_disjoint(domain, build->domain);
1829
5.56k
  if (disjoint < 0)
1830
0
    goto error;
1831
5.56k
  if (disjoint)
1832
1.49k
    goto done;
1833
4.06k
1834
4.06k
  build = isl_ast_build_detect_strides(build, isl_set_copy(domain));
1835
4.06k
1836
4.06k
  graft = create_node(executed, bounds, domain,
1837
4.06k
        isl_ast_build_copy(build));
1838
4.06k
  list = isl_ast_graft_list_add(list, graft);
1839
4.06k
  isl_ast_build_free(build);
1840
4.06k
  return list;
1841
0
error:
1842
0
  list = isl_ast_graft_list_free(list);
1843
1.49k
done:
1844
1.49k
  isl_set_free(domain);
1845
1.49k
  isl_basic_set_free(bounds);
1846
1.49k
  isl_union_map_free(executed);
1847
1.49k
  isl_ast_build_free(build);
1848
1.49k
  return list;
1849
0
}
1850
1851
/* Does any element of i follow or coincide with any element of j
1852
 * at the current depth for equal values of the outer dimensions?
1853
 */
1854
static isl_bool domain_follows_at_depth(__isl_keep isl_basic_set *i,
1855
  __isl_keep isl_basic_set *j, void *user)
1856
43
{
1857
43
  int depth = *(int *) user;
1858
43
  isl_basic_map *test;
1859
43
  isl_bool empty;
1860
43
  int l;
1861
43
1862
43
  test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
1863
43
                isl_basic_set_copy(j));
1864
107
  for (l = 0; l < depth; 
++l64
)
1865
64
    test = isl_basic_map_equate(test, isl_dim_in, l,
1866
64
            isl_dim_out, l);
1867
43
  test = isl_basic_map_order_ge(test, isl_dim_in, depth,
1868
43
          isl_dim_out, depth);
1869
43
  empty = isl_basic_map_is_empty(test);
1870
43
  isl_basic_map_free(test);
1871
43
1872
43
  return empty < 0 ? 
isl_bool_error0
: !empty;
1873
43
}
1874
1875
/* Split up each element of "list" into a part that is related to "bset"
1876
 * according to "gt" and a part that is not.
1877
 * Return a list that consist of "bset" and all the pieces.
1878
 */
1879
static __isl_give isl_basic_set_list *add_split_on(
1880
  __isl_take isl_basic_set_list *list, __isl_take isl_basic_set *bset,
1881
  __isl_keep isl_basic_map *gt)
1882
0
{
1883
0
  int i, n;
1884
0
  isl_basic_set_list *res;
1885
0
1886
0
  if (!list)
1887
0
    bset = isl_basic_set_free(bset);
1888
0
1889
0
  gt = isl_basic_map_copy(gt);
1890
0
  gt = isl_basic_map_intersect_domain(gt, isl_basic_set_copy(bset));
1891
0
  n = isl_basic_set_list_n_basic_set(list);
1892
0
  res = isl_basic_set_list_from_basic_set(bset);
1893
0
  for (i = 0; res && i < n; ++i) {
1894
0
    isl_basic_set *bset;
1895
0
    isl_set *set1, *set2;
1896
0
    isl_basic_map *bmap;
1897
0
    int empty;
1898
0
1899
0
    bset = isl_basic_set_list_get_basic_set(list, i);
1900
0
    bmap = isl_basic_map_copy(gt);
1901
0
    bmap = isl_basic_map_intersect_range(bmap, bset);
1902
0
    bset = isl_basic_map_range(bmap);
1903
0
    empty = isl_basic_set_is_empty(bset);
1904
0
    if (empty < 0)
1905
0
      res = isl_basic_set_list_free(res);
1906
0
    if (empty)  {
1907
0
      isl_basic_set_free(bset);
1908
0
      bset = isl_basic_set_list_get_basic_set(list, i);
1909
0
      res = isl_basic_set_list_add(res, bset);
1910
0
      continue;
1911
0
    }
1912
0
1913
0
    res = isl_basic_set_list_add(res, isl_basic_set_copy(bset));
1914
0
    set1 = isl_set_from_basic_set(bset);
1915
0
    bset = isl_basic_set_list_get_basic_set(list, i);
1916
0
    set2 = isl_set_from_basic_set(bset);
1917
0
    set1 = isl_set_subtract(set2, set1);
1918
0
    set1 = isl_set_make_disjoint(set1);
1919
0
1920
0
    res = isl_basic_set_list_concat(res,
1921
0
              isl_basic_set_list_from_set(set1));
1922
0
  }
1923
0
  isl_basic_map_free(gt);
1924
0
  isl_basic_set_list_free(list);
1925
0
  return res;
1926
0
}
1927
1928
static __isl_give isl_ast_graft_list *generate_sorted_domains(
1929
  __isl_keep isl_basic_set_list *domain_list,
1930
  __isl_keep isl_union_map *executed,
1931
  __isl_keep isl_ast_build *build);
1932
1933
/* Internal data structure for add_nodes.
1934
 *
1935
 * "executed" and "build" are extra arguments to be passed to add_node.
1936
 * "list" collects the results.
1937
 */
1938
struct isl_add_nodes_data {
1939
  isl_union_map *executed;
1940
  isl_ast_build *build;
1941
1942
  isl_ast_graft_list *list;
1943
};
1944
1945
/* Generate code for the schedule domains in "scc"
1946
 * and add the results to "list".
1947
 *
1948
 * The domains in "scc" form a strongly connected component in the ordering.
1949
 * If the number of domains in "scc" is larger than 1, then this means
1950
 * that we cannot determine a valid ordering for the domains in the component.
1951
 * This should be fairly rare because the individual domains
1952
 * have been made disjoint first.
1953
 * The problem is that the domains may be integrally disjoint but not
1954
 * rationally disjoint.  For example, we may have domains
1955
 *
1956
 *  { [i,i] : 0 <= i <= 1 }   and { [i,1-i] : 0 <= i <= 1 }
1957
 *
1958
 * These two domains have an empty intersection, but their rational
1959
 * relaxations do intersect.  It is impossible to order these domains
1960
 * in the second dimension because the first should be ordered before
1961
 * the second for outer dimension equal to 0, while it should be ordered
1962
 * after for outer dimension equal to 1.
1963
 *
1964
 * This may happen in particular in case of unrolling since the domain
1965
 * of each slice is replaced by its simple hull.
1966
 *
1967
 * For each basic set i in "scc" and for each of the following basic sets j,
1968
 * we split off that part of the basic set i that shares the outer dimensions
1969
 * with j and lies before j in the current dimension.
1970
 * We collect all the pieces in a new list that replaces "scc".
1971
 *
1972
 * While the elements in "scc" should be disjoint, we double-check
1973
 * this property to avoid running into an infinite recursion in case
1974
 * they intersect due to some internal error.
1975
 */
1976
static isl_stat add_nodes(__isl_take isl_basic_set_list *scc, void *user)
1977
48
{
1978
48
  struct isl_add_nodes_data *data = user;
1979
48
  int i, n, depth;
1980
48
  isl_basic_set *bset, *first;
1981
48
  isl_basic_set_list *list;
1982
48
  isl_space *space;
1983
48
  isl_basic_map *gt;
1984
48
1985
48
  n = isl_basic_set_list_n_basic_set(scc);
1986
48
  bset = isl_basic_set_list_get_basic_set(scc, 0);
1987
48
  if (n == 1) {
1988
48
    isl_basic_set_list_free(scc);
1989
48
    data->list = add_node(data->list,
1990
48
        isl_union_map_copy(data->executed), bset,
1991
48
        isl_ast_build_copy(data->build));
1992
48
    return data->list ? isl_stat_ok : 
isl_stat_error0
;
1993
48
  }
1994
0
1995
0
  depth = isl_ast_build_get_depth(data->build);
1996
0
  space = isl_basic_set_get_space(bset);
1997
0
  space = isl_space_map_from_set(space);
1998
0
  gt = isl_basic_map_universe(space);
1999
0
  for (i = 0; i < depth; ++i)
2000
0
    gt = isl_basic_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
2001
0
  gt = isl_basic_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
2002
0
2003
0
  first = isl_basic_set_copy(bset);
2004
0
  list = isl_basic_set_list_from_basic_set(bset);
2005
0
  for (i = 1; i < n; ++i) {
2006
0
    int disjoint;
2007
0
2008
0
    bset = isl_basic_set_list_get_basic_set(scc, i);
2009
0
2010
0
    disjoint = isl_basic_set_is_disjoint(bset, first);
2011
0
    if (disjoint < 0)
2012
0
      list = isl_basic_set_list_free(list);
2013
0
    else if (!disjoint)
2014
0
      isl_die(isl_basic_set_list_get_ctx(scc),
2015
0
        isl_error_internal,
2016
0
        "basic sets in scc are assumed to be disjoint",
2017
0
        list = isl_basic_set_list_free(list));
2018
0
2019
0
    list = add_split_on(list, bset, gt);
2020
0
  }
2021
0
  isl_basic_set_free(first);
2022
0
  isl_basic_map_free(gt);
2023
0
  isl_basic_set_list_free(scc);
2024
0
  scc = list;
2025
0
  data->list = isl_ast_graft_list_concat(data->list,
2026
0
        generate_sorted_domains(scc, data->executed, data->build));
2027
0
  isl_basic_set_list_free(scc);
2028
0
2029
0
  return data->list ? isl_stat_ok : isl_stat_error;
2030
0
}
2031
2032
/* Sort the domains in "domain_list" according to the execution order
2033
 * at the current depth (for equal values of the outer dimensions),
2034
 * generate code for each of them, collecting the results in a list.
2035
 * If no code is generated (because the intersection of the inverse schedule
2036
 * with the domains turns out to be empty), then an empty list is returned.
2037
 *
2038
 * The caller is responsible for ensuring that the basic sets in "domain_list"
2039
 * are pair-wise disjoint.  It can, however, in principle happen that
2040
 * two basic sets should be ordered one way for one value of the outer
2041
 * dimensions and the other way for some other value of the outer dimensions.
2042
 * We therefore play safe and look for strongly connected components.
2043
 * The function add_nodes takes care of handling non-trivial components.
2044
 */
2045
static __isl_give isl_ast_graft_list *generate_sorted_domains(
2046
  __isl_keep isl_basic_set_list *domain_list,
2047
  __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
2048
1.02k
{
2049
1.02k
  isl_ctx *ctx;
2050
1.02k
  struct isl_add_nodes_data data;
2051
1.02k
  int depth;
2052
1.02k
  int n;
2053
1.02k
2054
1.02k
  if (!domain_list)
2055
0
    return NULL;
2056
1.02k
2057
1.02k
  ctx = isl_basic_set_list_get_ctx(domain_list);
2058
1.02k
  n = isl_basic_set_list_n_basic_set(domain_list);
2059
1.02k
  data.list = isl_ast_graft_list_alloc(ctx, n);
2060
1.02k
  if (n == 0)
2061
0
    return data.list;
2062
1.02k
  if (n == 1)
2063
999
    return add_node(data.list, isl_union_map_copy(executed),
2064
999
      isl_basic_set_list_get_basic_set(domain_list, 0),
2065
999
      isl_ast_build_copy(build));
2066
21
2067
21
  depth = isl_ast_build_get_depth(build);
2068
21
  data.executed = executed;
2069
21
  data.build = build;
2070
21
  if (isl_basic_set_list_foreach_scc(domain_list,
2071
21
          &domain_follows_at_depth, &depth,
2072
21
          &add_nodes, &data) < 0)
2073
0
    data.list = isl_ast_graft_list_free(data.list);
2074
21
2075
21
  return data.list;
2076
21
}
2077
2078
/* Do i and j share any values for the outer dimensions?
2079
 */
2080
static isl_bool shared_outer(__isl_keep isl_basic_set *i,
2081
  __isl_keep isl_basic_set *j, void *user)
2082
109
{
2083
109
  int depth = *(int *) user;
2084
109
  isl_basic_map *test;
2085
109
  isl_bool empty;
2086
109
  int l;
2087
109
2088
109
  test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
2089
109
                isl_basic_set_copy(j));
2090
297
  for (l = 0; l < depth; 
++l188
)
2091
188
    test = isl_basic_map_equate(test, isl_dim_in, l,
2092
188
            isl_dim_out, l);
2093
109
  empty = isl_basic_map_is_empty(test);
2094
109
  isl_basic_map_free(test);
2095
109
2096
109
  return empty < 0 ? 
isl_bool_error0
: !empty;
2097
109
}
2098
2099
/* Internal data structure for generate_sorted_domains_wrap.
2100
 *
2101
 * "n" is the total number of basic sets
2102
 * "executed" and "build" are extra arguments to be passed
2103
 *  to generate_sorted_domains.
2104
 *
2105
 * "single" is set to 1 by generate_sorted_domains_wrap if there
2106
 * is only a single component.
2107
 * "list" collects the results.
2108
 */
2109
struct isl_ast_generate_parallel_domains_data {
2110
  int n;
2111
  isl_union_map *executed;
2112
  isl_ast_build *build;
2113
2114
  int single;
2115
  isl_ast_graft_list *list;
2116
};
2117
2118
/* Call generate_sorted_domains on "scc", fuse the result into a list
2119
 * with either zero or one graft and collect the these single element
2120
 * lists into data->list.
2121
 *
2122
 * If there is only one component, i.e., if the number of basic sets
2123
 * in the current component is equal to the total number of basic sets,
2124
 * then data->single is set to 1 and the result of generate_sorted_domains
2125
 * is not fused.
2126
 */
2127
static isl_stat generate_sorted_domains_wrap(__isl_take isl_basic_set_list *scc,
2128
  void *user)
2129
96
{
2130
96
  struct isl_ast_generate_parallel_domains_data *data = user;
2131
96
  isl_ast_graft_list *list;
2132
96
2133
96
  list = generate_sorted_domains(scc, data->executed, data->build);
2134
96
  data->single = isl_basic_set_list_n_basic_set(scc) == data->n;
2135
96
  if (!data->single)
2136
75
    list = isl_ast_graft_list_fuse(list, data->build);
2137
96
  if (!data->list)
2138
57
    data->list = list;
2139
39
  else
2140
39
    data->list = isl_ast_graft_list_concat(data->list, list);
2141
96
2142
96
  isl_basic_set_list_free(scc);
2143
96
  if (!data->list)
2144
0
    return isl_stat_error;
2145
96
2146
96
  return isl_stat_ok;
2147
96
}
2148
2149
/* Look for any (weakly connected) components in the "domain_list"
2150
 * of domains that share some values of the outer dimensions.
2151
 * That is, domains in different components do not share any values
2152
 * of the outer dimensions.  This means that these components
2153
 * can be freely reordered.
2154
 * Within each of the components, we sort the domains according
2155
 * to the execution order at the current depth.
2156
 *
2157
 * If there is more than one component, then generate_sorted_domains_wrap
2158
 * fuses the result of each call to generate_sorted_domains
2159
 * into a list with either zero or one graft and collects these (at most)
2160
 * single element lists into a bigger list. This means that the elements of the
2161
 * final list can be freely reordered.  In particular, we sort them
2162
 * according to an arbitrary but fixed ordering to ease merging of
2163
 * graft lists from different components.
2164
 */
2165
static __isl_give isl_ast_graft_list *generate_parallel_domains(
2166
  __isl_keep isl_basic_set_list *domain_list,
2167
  __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
2168
981
{
2169
981
  int depth;
2170
981
  struct isl_ast_generate_parallel_domains_data data;
2171
981
2172
981
  if (!domain_list)
2173
0
    return NULL;
2174
981
2175
981
  data.n = isl_basic_set_list_n_basic_set(domain_list);
2176
981
  if (data.n <= 1)
2177
924
    return generate_sorted_domains(domain_list, executed, build);
2178
57
2179
57
  depth = isl_ast_build_get_depth(build);
2180
57
  data.list = NULL;
2181
57
  data.executed = executed;
2182
57
  data.build = build;
2183
57
  data.single = 0;
2184
57
  if (isl_basic_set_list_foreach_scc(domain_list, &shared_outer, &depth,
2185
57
              &generate_sorted_domains_wrap,
2186
57
              &data) < 0)
2187
0
    data.list = isl_ast_graft_list_free(data.list);
2188
57
2189
57
  if (!data.single)
2190
36
    data.list = isl_ast_graft_list_sort_guard(data.list);
2191
57
2192
57
  return data.list;
2193
57
}
2194
2195
/* Internal data for separate_domain.
2196
 *
2197
 * "explicit" is set if we only want to use explicit bounds.
2198
 *
2199
 * "domain" collects the separated domains.
2200
 */
2201
struct isl_separate_domain_data {
2202
  isl_ast_build *build;
2203
  int explicit;
2204
  isl_set *domain;
2205
};
2206
2207
/* Extract implicit bounds on the current dimension for the executed "map".
2208
 *
2209
 * The domain of "map" may involve inner dimensions, so we
2210
 * need to eliminate them.
2211
 */
2212
static __isl_give isl_set *implicit_bounds(__isl_take isl_map *map,
2213
  __isl_keep isl_ast_build *build)
2214
0
{
2215
0
  isl_set *domain;
2216
0
2217
0
  domain = isl_map_domain(map);
2218
0
  domain = isl_ast_build_eliminate(build, domain);
2219
0
2220
0
  return domain;
2221
0
}
2222
2223
/* Extract explicit bounds on the current dimension for the executed "map".
2224
 *
2225
 * Rather than eliminating the inner dimensions as in implicit_bounds,
2226
 * we simply drop any constraints involving those inner dimensions.
2227
 * The idea is that most bounds that are implied by constraints on the
2228
 * inner dimensions will be enforced by for loops and not by explicit guards.
2229
 * There is then no need to separate along those bounds.
2230
 */
2231
static __isl_give isl_set *explicit_bounds(__isl_take isl_map *map,
2232
  __isl_keep isl_ast_build *build)
2233
27
{
2234
27
  isl_set *domain;
2235
27
  int depth, dim;
2236
27
2237
27
  dim = isl_map_dim(map, isl_dim_out);
2238
27
  map = isl_map_drop_constraints_involving_dims(map, isl_dim_out, 0, dim);
2239
27
2240
27
  domain = isl_map_domain(map);
2241
27
  depth = isl_ast_build_get_depth(build);
2242
27
  dim = isl_set_dim(domain, isl_dim_set);
2243
27
  domain = isl_set_detect_equalities(domain);
2244
27
  domain = isl_set_drop_constraints_involving_dims(domain,
2245
27
        isl_dim_set, depth + 1, dim - (depth + 1));
2246
27
  domain = isl_set_remove_divs_involving_dims(domain,
2247
27
        isl_dim_set, depth, 1);
2248
27
  domain = isl_set_remove_unknown_divs(domain);
2249
27
2250
27
  return domain;
2251
27
}
2252
2253
/* Split data->domain into pieces that intersect with the range of "map"
2254
 * and pieces that do not intersect with the range of "map"
2255
 * and then add that part of the range of "map" that does not intersect
2256
 * with data->domain.
2257
 */
2258
static isl_stat separate_domain(__isl_take isl_map *map, void *user)
2259
27
{
2260
27
  struct isl_separate_domain_data *data = user;
2261
27
  isl_set *domain;
2262
27
  isl_set *d1, *d2;
2263
27
2264
27
  if (data->explicit)
2265
27
    domain = explicit_bounds(map, data->build);
2266
0
  else
2267
0
    domain = implicit_bounds(map, data->build);
2268
27
2269
27
  domain = isl_set_coalesce(domain);
2270
27
  domain = isl_set_make_disjoint(domain);
2271
27
  d1 = isl_set_subtract(isl_set_copy(domain), isl_set_copy(data->domain));
2272
27
  d2 = isl_set_subtract(isl_set_copy(data->domain), isl_set_copy(domain));
2273
27
  data->domain = isl_set_intersect(data->domain, domain);
2274
27
  data->domain = isl_set_union(data->domain, d1);
2275
27
  data->domain = isl_set_union(data->domain, d2);
2276
27
2277
27
  return isl_stat_ok;
2278
27
}
2279
2280
/* Separate the schedule domains of "executed".
2281
 *
2282
 * That is, break up the domain of "executed" into basic sets,
2283
 * such that for each basic set S, every element in S is associated with
2284
 * the same domain spaces.
2285
 *
2286
 * "space" is the (single) domain space of "executed".
2287
 */
2288
static __isl_give isl_set *separate_schedule_domains(
2289
  __isl_take isl_space *space, __isl_take isl_union_map *executed,
2290
  __isl_keep isl_ast_build *build)
2291
27
{
2292
27
  struct isl_separate_domain_data data = { build };
2293
27
  isl_ctx *ctx;
2294
27
2295
27
  ctx = isl_ast_build_get_ctx(build);
2296
27
  data.explicit = isl_options_get_ast_build_separation_bounds(ctx) ==
2297
27
            ISL_AST_BUILD_SEPARATION_BOUNDS_EXPLICIT;
2298
27
  data.domain = isl_set_empty(space);
2299
27
  if (isl_union_map_foreach_map(executed, &separate_domain, &data) < 0)
2300
0
    data.domain = isl_set_free(data.domain);
2301
27
2302
27
  isl_union_map_free(executed);
2303
27
  return data.domain;
2304
27
}
2305
2306
/* Temporary data used during the search for a lower bound for unrolling.
2307
 *
2308
 * "build" is the build in which the unrolling will be performed
2309
 * "domain" is the original set for which to find a lower bound
2310
 * "depth" is the dimension for which to find a lower boudn
2311
 * "expansion" is the expansion that needs to be applied to "domain"
2312
 * in the unrolling that will be performed
2313
 *
2314
 * "lower" is the best lower bound found so far.  It is NULL if we have not
2315
 * found any yet.
2316
 * "n" is the corresponding size.  If lower is NULL, then the value of n
2317
 * is undefined.
2318
 * "n_div" is the maximal number of integer divisions in the first
2319
 * unrolled iteration (after expansion).  It is set to -1 if it hasn't
2320
 * been computed yet.
2321
 */
2322
struct isl_find_unroll_data {
2323
  isl_ast_build *build;
2324
  isl_set *domain;
2325
  int depth;
2326
  isl_basic_map *expansion;
2327
2328
  isl_aff *lower;
2329
  int *n;
2330
  int n_div;
2331
};
2332
2333
/* Return the constraint
2334
 *
2335
 *  i_"depth" = aff + offset
2336
 */
2337
static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff,
2338
  int offset)
2339
4.52k
{
2340
4.52k
  aff = isl_aff_copy(aff);
2341
4.52k
  aff = isl_aff_add_coefficient_si(aff, isl_dim_in, depth, -1);
2342
4.52k
  aff = isl_aff_add_constant_si(aff, offset);
2343
4.52k
  return isl_equality_from_aff(aff);
2344
4.52k
}
2345
2346
/* Update *user to the number of integer divsions in the first element
2347
 * of "ma", if it is larger than the current value.
2348
 */
2349
static isl_stat update_n_div(__isl_take isl_set *set,
2350
  __isl_take isl_multi_aff *ma, void *user)
2351
2
{
2352
2
  isl_aff *aff;
2353
2
  int *n = user;
2354
2
  int n_div;
2355
2
2356
2
  aff = isl_multi_aff_get_aff(ma, 0);
2357
2
  n_div = isl_aff_dim(aff, isl_dim_div);
2358
2
  isl_aff_free(aff);
2359
2
  isl_multi_aff_free(ma);
2360
2
  isl_set_free(set);
2361
2
2362
2
  if (n_div > *n)
2363
2
    *n = n_div;
2364
2
2365
2
  return aff ? isl_stat_ok : 
isl_stat_error0
;
2366
2
}
2367
2368
/* Get the number of integer divisions in the expression for the iterator
2369
 * value at the first slice in the unrolling based on lower bound "lower",
2370
 * taking into account the expansion that needs to be performed on this slice.
2371
 */
2372
static int get_expanded_n_div(struct isl_find_unroll_data *data,
2373
  __isl_keep isl_aff *lower)
2374
2
{
2375
2
  isl_constraint *c;
2376
2
  isl_set *set;
2377
2
  isl_map *it_map, *expansion;
2378
2
  isl_pw_multi_aff *pma;
2379
2
  int n;
2380
2
2381
2
  c = at_offset(data->depth, lower, 0);
2382
2
  set = isl_set_copy(data->domain);
2383
2
  set = isl_set_add_constraint(set, c);
2384
2
  expansion = isl_map_from_basic_map(isl_basic_map_copy(data->expansion));
2385
2
  set = isl_set_apply(set, expansion);
2386
2
  it_map = isl_ast_build_map_to_iterator(data->build, set);
2387
2
  pma = isl_pw_multi_aff_from_map(it_map);
2388
2
  n = 0;
2389
2
  if (isl_pw_multi_aff_foreach_piece(pma, &update_n_div, &n) < 0)
2390
0
    n = -1;
2391
2
  isl_pw_multi_aff_free(pma);
2392
2
2393
2
  return n;
2394
2
}
2395
2396
/* Is the lower bound "lower" with corresponding iteration count "n"
2397
 * better than the one stored in "data"?
2398
 * If there is no upper bound on the iteration count ("n" is infinity) or
2399
 * if the count is too large, then we cannot use this lower bound.
2400
 * Otherwise, if there was no previous lower bound or
2401
 * if the iteration count of the new lower bound is smaller than
2402
 * the iteration count of the previous lower bound, then we consider
2403
 * the new lower bound to be better.
2404
 * If the iteration count is the same, then compare the number
2405
 * of integer divisions that would be needed to express
2406
 * the iterator value at the first slice in the unrolling
2407
 * according to the lower bound.  If we end up computing this
2408
 * number, then store the lowest value in data->n_div.
2409
 */
2410
static int is_better_lower_bound(struct isl_find_unroll_data *data,
2411
  __isl_keep isl_aff *lower, __isl_keep isl_val *n)
2412
2.38k
{
2413
2.38k
  int cmp;
2414
2.38k
  int n_div;
2415
2.38k
2416
2.38k
  if (!n)
2417
0
    return -1;
2418
2.38k
  if (isl_val_is_infty(n))
2419
0
    return 0;
2420
2.38k
  if (isl_val_cmp_si(n, INT_MAX) > 0)
2421
0
    return 0;
2422
2.38k
  if (!data->lower)
2423
2.37k
    return 1;
2424
7
  cmp = isl_val_cmp_si(n, *data->n);
2425
7
  if (cmp < 0)
2426
2
    return 1;
2427
5
  if (cmp > 0)
2428
4
    return 0;
2429
1
  if (data->n_div < 0)
2430
1
    data->n_div = get_expanded_n_div(data, data->lower);
2431
1
  if (data->n_div < 0)
2432
0
    return -1;
2433
1
  if (data->n_div == 0)
2434
0
    return 0;
2435
1
  n_div = get_expanded_n_div(data, lower);
2436
1
  if (n_div < 0)
2437
0
    return -1;
2438
1
  if (n_div >= data->n_div)
2439
0
    return 0;
2440
1
  data->n_div = n_div;
2441
1
2442
1
  return 1;
2443
1
}
2444
2445
/* Check if we can use "c" as a lower bound and if it is better than
2446
 * any previously found lower bound.
2447
 *
2448
 * If "c" does not involve the dimension at the current depth,
2449
 * then we cannot use it.
2450
 * Otherwise, let "c" be of the form
2451
 *
2452
 *  i >= f(j)/a
2453
 *
2454
 * We compute the maximal value of
2455
 *
2456
 *  -ceil(f(j)/a)) + i + 1
2457
 *
2458
 * over the domain.  If there is such a value "n", then we know
2459
 *
2460
 *  -ceil(f(j)/a)) + i + 1 <= n
2461
 *
2462
 * or
2463
 *
2464
 *  i < ceil(f(j)/a)) + n
2465
 *
2466
 * meaning that we can use ceil(f(j)/a)) as a lower bound for unrolling.
2467
 * We just need to check if we have found any lower bound before and
2468
 * if the new lower bound is better (smaller n or fewer integer divisions)
2469
 * than the previously found lower bounds.
2470
 */
2471
static isl_stat update_unrolling_lower_bound(struct isl_find_unroll_data *data,
2472
  __isl_keep isl_constraint *c)
2473
26.4k
{
2474
26.4k
  isl_aff *aff, *lower;
2475
26.4k
  isl_val *max;
2476
26.4k
  int better;
2477
26.4k
2478
26.4k
  if (!isl_constraint_is_lower_bound(c, isl_dim_set, data->depth))
2479
24.0k
    return isl_stat_ok;
2480
2.38k
2481
2.38k
  lower = isl_constraint_get_bound(c, isl_dim_set, data->depth);
2482
2.38k
  lower = isl_aff_ceil(lower);
2483
2.38k
  aff = isl_aff_copy(lower);
2484
2.38k
  aff = isl_aff_neg(aff);
2485
2.38k
  aff = isl_aff_add_coefficient_si(aff, isl_dim_in, data->depth, 1);
2486
2.38k
  aff = isl_aff_add_constant_si(aff, 1);
2487
2.38k
  max = isl_set_max_val(data->domain, aff);
2488
2.38k
  isl_aff_free(aff);
2489
2.38k
2490
2.38k
  better = is_better_lower_bound(data, lower, max);
2491
2.38k
  if (better < 0 || !better) {
2492
4
    isl_val_free(max);
2493
4
    isl_aff_free(lower);
2494
4
    return better < 0 ? 
isl_stat_error0
: isl_stat_ok;
2495
4
  }
2496
2.37k
2497
2.37k
  isl_aff_free(data->lower);
2498
2.37k
  data->lower = lower;
2499
2.37k
  *data->n = isl_val_get_num_si(max);
2500
2.37k
  isl_val_free(max);
2501
2.37k
2502
2.37k
  return isl_stat_ok;
2503
2.37k
}
2504
2505
/* Check if we can use "c" as a lower bound and if it is better than
2506
 * any previously found lower bound.
2507
 */
2508
static isl_stat constraint_find_unroll(__isl_take isl_constraint *c, void *user)
2509
26.4k
{
2510
26.4k
  struct isl_find_unroll_data *data;
2511
26.4k
  isl_stat r;
2512
26.4k
2513
26.4k
  data = (struct isl_find_unroll_data *) user;
2514
26.4k
  r = update_unrolling_lower_bound(data, c);
2515
26.4k
  isl_constraint_free(c);
2516
26.4k
2517
26.4k
  return r;
2518
26.4k
}
2519
2520
/* Look for a lower bound l(i) on the dimension at "depth"
2521
 * and a size n such that "domain" is a subset of
2522
 *
2523
 *  { [i] : l(i) <= i_d < l(i) + n }
2524
 *
2525
 * where d is "depth" and l(i) depends only on earlier dimensions.
2526
 * Furthermore, try and find a lower bound such that n is as small as possible.
2527
 * In particular, "n" needs to be finite.
2528
 * "build" is the build in which the unrolling will be performed.
2529
 * "expansion" is the expansion that needs to be applied to "domain"
2530
 * in the unrolling that will be performed.
2531
 *
2532
 * Inner dimensions have been eliminated from "domain" by the caller.
2533
 *
2534
 * We first construct a collection of lower bounds on the input set
2535
 * by computing its simple hull.  We then iterate through them,
2536
 * discarding those that we cannot use (either because they do not
2537
 * involve the dimension at "depth" or because they have no corresponding
2538
 * upper bound, meaning that "n" would be unbounded) and pick out the
2539
 * best from the remaining ones.
2540
 *
2541
 * If we cannot find a suitable lower bound, then we consider that
2542
 * to be an error.
2543
 */
2544
static __isl_give isl_aff *find_unroll_lower_bound(
2545
  __isl_keep isl_ast_build *build, __isl_keep isl_set *domain,
2546
  int depth, __isl_keep isl_basic_map *expansion, int *n)
2547
2.37k
{
2548
2.37k
  struct isl_find_unroll_data data =
2549
2.37k
      { build, domain, depth, expansion, NULL, n, -1 };
2550
2.37k
  isl_basic_set *hull;
2551
2.37k
2552
2.37k
  hull = isl_set_simple_hull(isl_set_copy(domain));
2553
2.37k
2554
2.37k
  if (isl_basic_set_foreach_constraint(hull,
2555
2.37k
              &constraint_find_unroll, &data) < 0)
2556
0
    goto error;
2557
2.37k
2558
2.37k
  isl_basic_set_free(hull);
2559
2.37k
2560
2.37k
  if (!data.lower)
2561
2.37k
    
isl_die0
(isl_set_get_ctx(domain), isl_error_invalid,
2562
2.37k
      "cannot find lower bound for unrolling", return NULL);
2563
2.37k
2564
2.37k
  return data.lower;
2565
0
error:
2566
0
  isl_basic_set_free(hull);
2567
0
  return isl_aff_free(data.lower);
2568
2.37k
}
2569
2570
/* Call "fn" on each iteration of the current dimension of "domain".
2571
 * If "init" is not NULL, then it is called with the number of
2572
 * iterations before any call to "fn".
2573
 * Return -1 on failure.
2574
 *
2575
 * Since we are going to be iterating over the individual values,
2576
 * we first check if there are any strides on the current dimension.
2577
 * If there is, we rewrite the current dimension i as
2578
 *
2579
 *    i = stride i' + offset
2580
 *
2581
 * and then iterate over individual values of i' instead.
2582
 *
2583
 * We then look for a lower bound on i' and a size such that the domain
2584
 * is a subset of
2585
 *
2586
 *  { [j,i'] : l(j) <= i' < l(j) + n }
2587
 *
2588
 * and then take slices of the domain at values of i'
2589
 * between l(j) and l(j) + n - 1.
2590
 *
2591
 * We compute the unshifted simple hull of each slice to ensure that
2592
 * we have a single basic set per offset.  The slicing constraint
2593
 * may get simplified away before the unshifted simple hull is taken
2594
 * and may therefore in some rare cases disappear from the result.
2595
 * We therefore explicitly add the constraint back after computing
2596
 * the unshifted simple hull to ensure that the basic sets
2597
 * remain disjoint.  The constraints that are dropped by taking the hull
2598
 * will be taken into account at the next level, as in the case of the
2599
 * atomic option.
2600
 *
2601
 * Finally, we map i' back to i and call "fn".
2602
 */
2603
static int foreach_iteration(__isl_take isl_set *domain,
2604
  __isl_keep isl_ast_build *build, int (*init)(int n, void *user),
2605
  int (*fn)(__isl_take isl_basic_set *bset, void *user), void *user)
2606
2.37k
{
2607
2.37k
  int i, n;
2608
2.37k
  int empty;
2609
2.37k
  int depth;
2610
2.37k
  isl_multi_aff *expansion;
2611
2.37k
  isl_basic_map *bmap;
2612
2.37k
  isl_aff *lower = NULL;
2613
2.37k
  isl_ast_build *stride_build;
2614
2.37k
2615
2.37k
  depth = isl_ast_build_get_depth(build);
2616
2.37k
2617
2.37k
  domain = isl_ast_build_eliminate_inner(build, domain);
2618
2.37k
  domain = isl_set_intersect(domain, isl_ast_build_get_domain(build));
2619
2.37k
  stride_build = isl_ast_build_copy(build);
2620
2.37k
  stride_build = isl_ast_build_detect_strides(stride_build,
2621
2.37k
              isl_set_copy(domain));
2622
2.37k
  expansion = isl_ast_build_get_stride_expansion(stride_build);
2623
2.37k
2624
2.37k
  domain = isl_set_preimage_multi_aff(domain,
2625
2.37k
              isl_multi_aff_copy(expansion));
2626
2.37k
  domain = isl_ast_build_eliminate_divs(stride_build, domain);
2627
2.37k
  isl_ast_build_free(stride_build);
2628
2.37k
2629
2.37k
  bmap = isl_basic_map_from_multi_aff(expansion);
2630
2.37k
2631
2.37k
  empty = isl_set_is_empty(domain);
2632
2.37k
  if (empty < 0) {
2633
0
    n = -1;
2634
2.37k
  } else if (empty) {
2635
0
    n = 0;
2636
2.37k
  } else {
2637
2.37k
    lower = find_unroll_lower_bound(build, domain, depth, bmap, &n);
2638
2.37k
    if (!lower)
2639
0
      n = -1;
2640
2.37k
  }
2641
2.37k
  if (n >= 0 && init && 
init(n, user) < 02.37k
)
2642
0
    n = -1;
2643
6.89k
  for (i = 0; i < n; 
++i4.52k
) {
2644
4.52k
    isl_set *set;
2645
4.52k
    isl_basic_set *bset;
2646
4.52k
    isl_constraint *slice;
2647
4.52k
2648
4.52k
    slice = at_offset(depth, lower, i);
2649
4.52k
    set = isl_set_copy(domain);
2650
4.52k
    set = isl_set_add_constraint(set, isl_constraint_copy(slice));
2651
4.52k
    bset = isl_set_unshifted_simple_hull(set);
2652
4.52k
    bset = isl_basic_set_add_constraint(bset, slice);
2653
4.52k
    bset = isl_basic_set_apply(bset, isl_basic_map_copy(bmap));
2654
4.52k
2655
4.52k
    if (fn(bset, user) < 0)
2656
0
      break;
2657
4.52k
  }
2658
2.37k
2659
2.37k
  isl_aff_free(lower);
2660
2.37k
  isl_set_free(domain);
2661
2.37k
  isl_basic_map_free(bmap);
2662
2.37k
2663
2.37k
  return n < 0 || i < n ? 
-10
: 0;
2664
2.37k
}
2665
2666
/* Data structure for storing the results and the intermediate objects
2667
 * of compute_domains.
2668
 *
2669
 * "list" is the main result of the function and contains a list
2670
 * of disjoint basic sets for which code should be generated.
2671
 *
2672
 * "executed" and "build" are inputs to compute_domains.
2673
 * "schedule_domain" is the domain of "executed".
2674
 *
2675
 * "option" contains the domains at the current depth that should by
2676
 * atomic, separated or unrolled.  These domains are as specified by
2677
 * the user, except that inner dimensions have been eliminated and
2678
 * that they have been made pair-wise disjoint.
2679
 *
2680
 * "sep_class" contains the user-specified split into separation classes
2681
 * specialized to the current depth.
2682
 * "done" contains the union of the separation domains that have already
2683
 * been handled.
2684
 */
2685
struct isl_codegen_domains {
2686
  isl_basic_set_list *list;
2687
2688
  isl_union_map *executed;
2689
  isl_ast_build *build;
2690
  isl_set *schedule_domain;
2691
2692
  isl_set *option[4];
2693
2694
  isl_map *sep_class;
2695
  isl_set *done;
2696
};
2697
2698
/* Internal data structure for do_unroll.
2699
 *
2700
 * "domains" stores the results of compute_domains.
2701
 * "class_domain" is the original class domain passed to do_unroll.
2702
 * "unroll_domain" collects the unrolled iterations.
2703
 */
2704
struct isl_ast_unroll_data {
2705
  struct isl_codegen_domains *domains;
2706
  isl_set *class_domain;
2707
  isl_set *unroll_domain;
2708
};
2709
2710
/* Given an iteration of an unrolled domain represented by "bset",
2711
 * add it to data->domains->list.
2712
 * Since we may have dropped some constraints, we intersect with
2713
 * the class domain again to ensure that each element in the list
2714
 * is disjoint from the other class domains.
2715
 */
2716
static int do_unroll_iteration(__isl_take isl_basic_set *bset, void *user)
2717
8
{
2718
8
  struct isl_ast_unroll_data *data = user;
2719
8
  isl_set *set;
2720
8
  isl_basic_set_list *list;
2721
8
2722
8
  set = isl_set_from_basic_set(bset);
2723
8
  data->unroll_domain = isl_set_union(data->unroll_domain,
2724
8
              isl_set_copy(set));
2725
8
  set = isl_set_intersect(set, isl_set_copy(data->class_domain));
2726
8
  set = isl_set_make_disjoint(set);
2727
8
  list = isl_basic_set_list_from_set(set);
2728
8
  data->domains->list = isl_basic_set_list_concat(data->domains->list,
2729
8
              list);
2730
8
2731
8
  return 0;
2732
8
}
2733
2734
/* Extend domains->list with a list of basic sets, one for each value
2735
 * of the current dimension in "domain" and remove the corresponding
2736
 * sets from the class domain.  Return the updated class domain.
2737
 * The divs that involve the current dimension have not been projected out
2738
 * from this domain.
2739
 *
2740
 * We call foreach_iteration to iterate over the individual values and
2741
 * in do_unroll_iteration we collect the individual basic sets in
2742
 * domains->list and their union in data->unroll_domain, which is then
2743
 * used to update the class domain.
2744
 */
2745
static __isl_give isl_set *do_unroll(struct isl_codegen_domains *domains,
2746
  __isl_take isl_set *domain, __isl_take isl_set *class_domain)
2747
3
{
2748
3
  struct isl_ast_unroll_data data;
2749
3
2750
3
  if (!domain)
2751
0
    return isl_set_free(class_domain);
2752
3
  if (!class_domain)
2753
0
    return isl_set_free(domain);
2754
3
2755
3
  data.domains = domains;
2756
3
  data.class_domain = class_domain;
2757
3
  data.unroll_domain = isl_set_empty(isl_set_get_space(domain));
2758
3
2759
3
  if (foreach_iteration(domain, domains->build, NULL,
2760
3
        &do_unroll_iteration, &data) < 0)
2761
0
    data.unroll_domain = isl_set_free(data.unroll_domain);
2762
3
2763
3
  class_domain = isl_set_subtract(class_domain, data.unroll_domain);
2764
3
2765
3
  return class_domain;
2766
3
}
2767
2768
/* Add domains to domains->list for each individual value of the current
2769
 * dimension, for that part of the schedule domain that lies in the
2770
 * intersection of the option domain and the class domain.
2771
 * Remove the corresponding sets from the class domain and
2772
 * return the updated class domain.
2773
 *
2774
 * We first break up the unroll option domain into individual pieces
2775
 * and then handle each of them separately.  The unroll option domain
2776
 * has been made disjoint in compute_domains_init_options,
2777
 *
2778
 * Note that we actively want to combine different pieces of the
2779
 * schedule domain that have the same value at the current dimension.
2780
 * We therefore need to break up the unroll option domain before
2781
 * intersecting with class and schedule domain, hoping that the
2782
 * unroll option domain specified by the user is relatively simple.
2783
 */
2784
static __isl_give isl_set *compute_unroll_domains(
2785
  struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
2786
84
{
2787
84
  isl_set *unroll_domain;
2788
84
  isl_basic_set_list *unroll_list;
2789
84
  int i, n;
2790
84
  int empty;
2791
84
2792
84
  empty = isl_set_is_empty(domains->option[isl_ast_loop_unroll]);
2793
84
  if (empty < 0)
2794
0
    return isl_set_free(class_domain);
2795
84
  if (empty)
2796
81
    return class_domain;
2797
3
2798
3
  unroll_domain = isl_set_copy(domains->option[isl_ast_loop_unroll]);
2799
3
  unroll_list = isl_basic_set_list_from_set(unroll_domain);
2800
3
2801
3
  n = isl_basic_set_list_n_basic_set(unroll_list);
2802
6
  for (i = 0; i < n; 
++i3
) {
2803
3
    isl_basic_set *bset;
2804
3
2805
3
    bset = isl_basic_set_list_get_basic_set(unroll_list, i);
2806
3
    unroll_domain = isl_set_from_basic_set(bset);
2807
3
    unroll_domain = isl_set_intersect(unroll_domain,
2808
3
                isl_set_copy(class_domain));
2809
3
    unroll_domain = isl_set_intersect(unroll_domain,
2810
3
          isl_set_copy(domains->schedule_domain));
2811
3
2812
3
    empty = isl_set_is_empty(unroll_domain);
2813
3
    if (empty >= 0 && empty) {
2814
0
      isl_set_free(unroll_domain);
2815
0
      continue;
2816
0
    }
2817
3
2818
3
    class_domain = do_unroll(domains, unroll_domain, class_domain);
2819
3
  }
2820
3
2821
3
  isl_basic_set_list_free(unroll_list);
2822
3
2823
3
  return class_domain;
2824
3
}
2825
2826
/* Try and construct a single basic set that includes the intersection of
2827
 * the schedule domain, the atomic option domain and the class domain.
2828
 * Add the resulting basic set(s) to domains->list and remove them
2829
 * from class_domain.  Return the updated class domain.
2830
 *
2831
 * We construct a single domain rather than trying to combine
2832
 * the schedule domains of individual domains because we are working
2833
 * within a single component so that non-overlapping schedule domains
2834
 * should already have been separated.
2835
 * We do however need to make sure that this single domains is a subset
2836
 * of the class domain so that it would not intersect with any other
2837
 * class domains.  This means that we may end up splitting up the atomic
2838
 * domain in case separation classes are being used.
2839
 *
2840
 * "domain" is the intersection of the schedule domain and the class domain,
2841
 * with inner dimensions projected out.
2842
 */
2843
static __isl_give isl_set *compute_atomic_domain(
2844
  struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
2845
84
{
2846
84
  isl_basic_set *bset;
2847
84
  isl_basic_set_list *list;
2848
84
  isl_set *domain, *atomic_domain;
2849
84
  int empty;
2850
84
2851
84
  domain = isl_set_copy(domains->option[isl_ast_loop_atomic]);
2852
84
  domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2853
84
  domain = isl_set_intersect(domain,
2854
84
        isl_set_copy(domains->schedule_domain));
2855
84
  empty = isl_set_is_empty(domain);
2856
84
  if (empty < 0)
2857
0
    class_domain = isl_set_free(class_domain);
2858
84
  if (empty) {
2859
83
    isl_set_free(domain);
2860
83
    return class_domain;
2861
83
  }
2862
1
2863
1
  domain = isl_ast_build_eliminate(domains->build, domain);
2864
1
  domain = isl_set_coalesce_preserve(domain);
2865
1
  bset = isl_set_unshifted_simple_hull(domain);
2866
1
  domain = isl_set_from_basic_set(bset);
2867
1
  atomic_domain = isl_set_copy(domain);
2868
1
  domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2869
1
  class_domain = isl_set_subtract(class_domain, atomic_domain);
2870
1
  domain = isl_set_make_disjoint(domain);
2871
1
  list = isl_basic_set_list_from_set(domain);
2872
1
  domains->list = isl_basic_set_list_concat(domains->list, list);
2873
1
2874
1
  return class_domain;
2875
1
}
2876
2877
/* Split up the schedule domain into uniform basic sets,
2878
 * in the sense that each element in a basic set is associated to
2879
 * elements of the same domains, and add the result to domains->list.
2880
 * Do this for that part of the schedule domain that lies in the
2881
 * intersection of "class_domain" and the separate option domain.
2882
 *
2883
 * "class_domain" may or may not include the constraints
2884
 * of the schedule domain, but this does not make a difference
2885
 * since we are going to intersect it with the domain of the inverse schedule.
2886
 * If it includes schedule domain constraints, then they may involve
2887
 * inner dimensions, but we will eliminate them in separation_domain.
2888
 */
2889
static int compute_separate_domain(struct isl_codegen_domains *domains,
2890
  __isl_keep isl_set *class_domain)
2891
84
{
2892
84
  isl_space *space;
2893
84
  isl_set *domain;
2894
84
  isl_union_map *executed;
2895
84
  isl_basic_set_list *list;
2896
84
  int empty;
2897
84
2898
84
  domain = isl_set_copy(domains->option[isl_ast_loop_separate]);
2899
84
  domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2900
84
  executed = isl_union_map_copy(domains->executed);
2901
84
  executed = isl_union_map_intersect_domain(executed,
2902
84
            isl_union_set_from_set(domain));
2903
84
  empty = isl_union_map_is_empty(executed);
2904
84
  if (empty < 0 || empty) {
2905
84
    isl_union_map_free(executed);
2906
84
    return empty < 0 ? 
-10
: 0;
2907
84
  }
2908
0
2909
0
  space = isl_set_get_space(class_domain);
2910
0
  domain = separate_schedule_domains(space, executed, domains->build);
2911
0
2912
0
  list = isl_basic_set_list_from_set(domain);
2913
0
  domains->list = isl_basic_set_list_concat(domains->list, list);
2914
0
2915
0
  return 0;
2916
0
}
2917
2918
/* Split up the domain at the current depth into disjoint
2919
 * basic sets for which code should be generated separately
2920
 * for the given separation class domain.
2921
 *
2922
 * If any separation classes have been defined, then "class_domain"
2923
 * is the domain of the current class and does not refer to inner dimensions.
2924
 * Otherwise, "class_domain" is the universe domain.
2925
 *
2926
 * We first make sure that the class domain is disjoint from
2927
 * previously considered class domains.
2928
 *
2929
 * The separate domains can be computed directly from the "class_domain".
2930
 *
2931
 * The unroll, atomic and remainder domains need the constraints
2932
 * from the schedule domain.
2933
 *
2934
 * For unrolling, the actual schedule domain is needed (with divs that
2935
 * may refer to the current dimension) so that stride detection can be
2936
 * performed.
2937
 *
2938
 * For atomic and remainder domains, inner dimensions and divs involving
2939
 * the current dimensions should be eliminated.
2940
 * In case we are working within a separation class, we need to intersect
2941
 * the result with the current "class_domain" to ensure that the domains
2942
 * are disjoint from those generated from other class domains.
2943
 *
2944
 * The domain that has been made atomic may be larger than specified
2945
 * by the user since it needs to be representable as a single basic set.
2946
 * This possibly larger domain is removed from class_domain by
2947
 * compute_atomic_domain.  It is computed first so that the extended domain
2948
 * would not overlap with any domains computed before.
2949
 * Similary, the unrolled domains may have some constraints removed and
2950
 * may therefore also be larger than specified by the user.
2951
 *
2952
 * If anything is left after handling separate, unroll and atomic,
2953
 * we split it up into basic sets and append the basic sets to domains->list.
2954
 */
2955
static isl_stat compute_partial_domains(struct isl_codegen_domains *domains,
2956
  __isl_take isl_set *class_domain)
2957
84
{
2958
84
  isl_basic_set_list *list;
2959
84
  isl_set *domain;
2960
84
2961
84
  class_domain = isl_set_subtract(class_domain,
2962
84
          isl_set_copy(domains->done));
2963
84
  domains->done = isl_set_union(domains->done,
2964
84
          isl_set_copy(class_domain));
2965
84
2966
84
  class_domain = compute_atomic_domain(domains, class_domain);
2967
84
  class_domain = compute_unroll_domains(domains, class_domain);
2968
84
2969
84
  domain = isl_set_copy(class_domain);
2970
84
2971
84
  if (compute_separate_domain(domains, domain) < 0)
2972
0
    goto error;
2973
84
  domain = isl_set_subtract(domain,
2974
84
      isl_set_copy(domains->option[isl_ast_loop_separate]));
2975
84
2976
84
  domain = isl_set_intersect(domain,
2977
84
        isl_set_copy(domains->schedule_domain));
2978
84
2979
84
  domain = isl_ast_build_eliminate(domains->build, domain);
2980
84
  domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2981
84
2982
84
  domain = isl_set_coalesce_preserve(domain);
2983
84
  domain = isl_set_make_disjoint(domain);
2984
84
2985
84
  list = isl_basic_set_list_from_set(domain);
2986
84
  domains->list = isl_basic_set_list_concat(domains->list, list);
2987
84
2988
84
  isl_set_free(class_domain);
2989
84
2990
84
  return isl_stat_ok;
2991
0
error:
2992
0
  isl_set_free(domain);
2993
0
  isl_set_free(class_domain);
2994
0
  return isl_stat_error;
2995
84
}
2996
2997
/* Split up the domain at the current depth into disjoint
2998
 * basic sets for which code should be generated separately
2999
 * for the separation class identified by "pnt".
3000
 *
3001
 * We extract the corresponding class domain from domains->sep_class,
3002
 * eliminate inner dimensions and pass control to compute_partial_domains.
3003
 */
3004
static isl_stat compute_class_domains(__isl_take isl_point *pnt, void *user)
3005
0
{
3006
0
  struct isl_codegen_domains *domains = user;
3007
0
  isl_set *class_set;
3008
0
  isl_set *domain;
3009
0
  int disjoint;
3010
0
3011
0
  class_set = isl_set_from_point(pnt);
3012
0
  domain = isl_map_domain(isl_map_intersect_range(
3013
0
        isl_map_copy(domains->sep_class), class_set));
3014
0
  domain = isl_ast_build_compute_gist(domains->build, domain);
3015
0
  domain = isl_ast_build_eliminate(domains->build, domain);
3016
0
3017
0
  disjoint = isl_set_plain_is_disjoint(domain, domains->schedule_domain);
3018
0
  if (disjoint < 0)
3019
0
    return isl_stat_error;
3020
0
  if (disjoint) {
3021
0
    isl_set_free(domain);
3022
0
    return isl_stat_ok;
3023
0
  }
3024
0
3025
0
  return compute_partial_domains(domains, domain);
3026
0
}
3027
3028
/* Extract the domains at the current depth that should be atomic,
3029
 * separated or unrolled and store them in option.
3030
 *
3031
 * The domains specified by the user might overlap, so we make
3032
 * them disjoint by subtracting earlier domains from later domains.
3033
 */
3034
static void compute_domains_init_options(isl_set *option[4],
3035
  __isl_keep isl_ast_build *build)
3036
84
{
3037
84
  enum isl_ast_loop_type type, type2;
3038
84
  isl_set *unroll;
3039
84
3040
84
  for (type = isl_ast_loop_atomic;
3041
336
      type <= isl_ast_loop_separate; 
++type252
) {
3042
252
    option[type] = isl_ast_build_get_option_domain(build, type);
3043
504
    for (type2 = isl_ast_loop_atomic; type2 < type; 
++type2252
)
3044
252
      option[type] = isl_set_subtract(option[type],
3045
252
            isl_set_copy(option[type2]));
3046
252
  }
3047
84
3048
84
  unroll = option[isl_ast_loop_unroll];
3049
84
  unroll = isl_set_coalesce(unroll);
3050
84
  unroll = isl_set_make_disjoint(unroll);
3051
84
  option[isl_ast_loop_unroll] = unroll;
3052
84
}
3053
3054
/* Split up the domain at the current depth into disjoint
3055
 * basic sets for which code should be generated separately,
3056
 * based on the user-specified options.
3057
 * Return the list of disjoint basic sets.
3058
 *
3059
 * There are three kinds of domains that we need to keep track of.
3060
 * - the "schedule domain" is the domain of "executed"
3061
 * - the "class domain" is the domain corresponding to the currrent
3062
 *  separation class
3063
 * - the "option domain" is the domain corresponding to one of the options
3064
 *  atomic, unroll or separate
3065
 *
3066
 * We first consider the individial values of the separation classes
3067
 * and split up the domain for each of them separately.
3068
 * Finally, we consider the remainder.  If no separation classes were
3069
 * specified, then we call compute_partial_domains with the universe
3070
 * "class_domain".  Otherwise, we take the "schedule_domain" as "class_domain",
3071
 * with inner dimensions removed.  We do this because we want to
3072
 * avoid computing the complement of the class domains (i.e., the difference
3073
 * between the universe and domains->done).
3074
 */
3075
static __isl_give isl_basic_set_list *compute_domains(
3076
  __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
3077
84
{
3078
84
  struct isl_codegen_domains domains;
3079
84
  isl_ctx *ctx;
3080
84
  isl_set *domain;
3081
84
  isl_union_set *schedule_domain;
3082
84
  isl_set *classes;
3083
84
  isl_space *space;
3084
84
  int n_param;
3085
84
  enum isl_ast_loop_type type;
3086
84
  int empty;
3087
84
3088
84
  if (!executed)
3089
0
    return NULL;
3090
84
3091
84
  ctx = isl_union_map_get_ctx(executed);
3092
84
  domains.list = isl_basic_set_list_alloc(ctx, 0);
3093
84
3094
84
  schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3095
84
  domain = isl_set_from_union_set(schedule_domain);
3096
84
3097
84
  compute_domains_init_options(domains.option, build);
3098
84
3099
84
  domains.sep_class = isl_ast_build_get_separation_class(build);
3100
84
  classes = isl_map_range(isl_map_copy(domains.sep_class));
3101
84
  n_param = isl_set_dim(classes, isl_dim_param);
3102
84
  classes = isl_set_project_out(classes, isl_dim_param, 0, n_param);
3103
84
3104
84
  space = isl_set_get_space(domain);
3105
84
  domains.build = build;
3106
84
  domains.schedule_domain = isl_set_copy(domain);
3107
84
  domains.executed = executed;
3108
84
  domains.done = isl_set_empty(space);
3109
84
3110
84
  if (isl_set_foreach_point(classes, &compute_class_domains, &domains) < 0)
3111
0
    domains.list = isl_basic_set_list_free(domains.list);
3112
84
  isl_set_free(classes);
3113
84
3114
84
  empty = isl_set_is_empty(domains.done);
3115
84
  if (empty < 0) {
3116
0
    domains.list = isl_basic_set_list_free(domains.list);
3117
0
    domain = isl_set_free(domain);
3118
84
  } else if (empty) {
3119
84
    isl_set_free(domain);
3120
84
    domain = isl_set_universe(isl_set_get_space(domains.done));
3121
84
  } else {
3122
0
    domain = isl_ast_build_eliminate(build, domain);
3123
0
  }
3124
84
  if (compute_partial_domains(&domains, domain) < 0)
3125
0
    domains.list = isl_basic_set_list_free(domains.list);
3126
84
3127
84
  isl_set_free(domains.schedule_domain);
3128
84
  isl_set_free(domains.done);
3129
84
  isl_map_free(domains.sep_class);
3130
336
  for (type = isl_ast_loop_atomic; type <= isl_ast_loop_separate; 
++type252
)
3131
252
    isl_set_free(domains.option[type]);
3132
84
3133
84
  return domains.list;
3134
84
}
3135
3136
/* Generate code for a single component, after shifting (if any)
3137
 * has been applied, in case the schedule was specified as a union map.
3138
 *
3139
 * We first split up the domain at the current depth into disjoint
3140
 * basic sets based on the user-specified options.
3141
 * Then we generated code for each of them and concatenate the results.
3142
 */
3143
static __isl_give isl_ast_graft_list *generate_shifted_component_flat(
3144
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3145
84
{
3146
84
  isl_basic_set_list *domain_list;
3147
84
  isl_ast_graft_list *list = NULL;
3148
84
3149
84
  domain_list = compute_domains(executed, build);
3150
84
  list = generate_parallel_domains(domain_list, executed, build);
3151
84
3152
84
  isl_basic_set_list_free(domain_list);
3153
84
  isl_union_map_free(executed);
3154
84
  isl_ast_build_free(build);
3155
84
3156
84
  return list;
3157
84
}
3158
3159
/* Generate code for a single component, after shifting (if any)
3160
 * has been applied, in case the schedule was specified as a schedule tree
3161
 * and the separate option was specified.
3162
 *
3163
 * We perform separation on the domain of "executed" and then generate
3164
 * an AST for each of the resulting disjoint basic sets.
3165
 */
3166
static __isl_give isl_ast_graft_list *generate_shifted_component_tree_separate(
3167
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3168
27
{
3169
27
  isl_space *space;
3170
27
  isl_set *domain;
3171
27
  isl_basic_set_list *domain_list;
3172
27
  isl_ast_graft_list *list;
3173
27
3174
27
  space = isl_ast_build_get_space(build, 1);
3175
27
  domain = separate_schedule_domains(space,
3176
27
          isl_union_map_copy(executed), build);
3177
27
  domain_list = isl_basic_set_list_from_set(domain);
3178
27
3179
27
  list = generate_parallel_domains(domain_list, executed, build);
3180
27
3181
27
  isl_basic_set_list_free(domain_list);
3182
27
  isl_union_map_free(executed);
3183
27
  isl_ast_build_free(build);
3184
27
3185
27
  return list;
3186
27
}
3187
3188
/* Internal data structure for generate_shifted_component_tree_unroll.
3189
 *
3190
 * "executed" and "build" are inputs to generate_shifted_component_tree_unroll.
3191
 * "list" collects the constructs grafts.
3192
 */
3193
struct isl_ast_unroll_tree_data {
3194
  isl_union_map *executed;
3195
  isl_ast_build *build;
3196
  isl_ast_graft_list *list;
3197
};
3198
3199
/* Initialize data->list to a list of "n" elements.
3200
 */
3201
static int init_unroll_tree(int n, void *user)
3202
2.37k
{
3203
2.37k
  struct isl_ast_unroll_tree_data *data = user;
3204
2.37k
  isl_ctx *ctx;
3205
2.37k
3206
2.37k
  ctx = isl_ast_build_get_ctx(data->build);
3207
2.37k
  data->list = isl_ast_graft_list_alloc(ctx, n);
3208
2.37k
3209
2.37k
  return 0;
3210
2.37k
}
3211
3212
/* Given an iteration of an unrolled domain represented by "bset",
3213
 * generate the corresponding AST and add the result to data->list.
3214
 */
3215
static int do_unroll_tree_iteration(__isl_take isl_basic_set *bset, void *user)
3216
4.51k
{
3217
4.51k
  struct isl_ast_unroll_tree_data *data = user;
3218
4.51k
3219
4.51k
  data->list = add_node(data->list, isl_union_map_copy(data->executed),
3220
4.51k
        bset, isl_ast_build_copy(data->build));
3221
4.51k
3222
4.51k
  return 0;
3223
4.51k
}
3224
3225
/* Generate code for a single component, after shifting (if any)
3226
 * has been applied, in case the schedule was specified as a schedule tree
3227
 * and the unroll option was specified.
3228
 *
3229
 * We call foreach_iteration to iterate over the individual values and
3230
 * construct and collect the corresponding grafts in do_unroll_tree_iteration.
3231
 */
3232
static __isl_give isl_ast_graft_list *generate_shifted_component_tree_unroll(
3233
  __isl_take isl_union_map *executed, __isl_take isl_set *domain,
3234
  __isl_take isl_ast_build *build)
3235
2.37k
{
3236
2.37k
  struct isl_ast_unroll_tree_data data = { executed, build, NULL };
3237
2.37k
3238
2.37k
  if (foreach_iteration(domain, build, &init_unroll_tree,
3239
2.37k
        &do_unroll_tree_iteration, &data) < 0)
3240
0
    data.list = isl_ast_graft_list_free(data.list);
3241
2.37k
3242
2.37k
  isl_union_map_free(executed);
3243
2.37k
  isl_ast_build_free(build);
3244
2.37k
3245
2.37k
  return data.list;
3246
2.37k
}
3247
3248
/* Does "domain" involve a disjunction that is purely based on
3249
 * constraints involving only outer dimension?
3250
 *
3251
 * In particular, is there a disjunction such that the constraints
3252
 * involving the current and later dimensions are the same over
3253
 * all the disjuncts?
3254
 */
3255
static isl_bool has_pure_outer_disjunction(__isl_keep isl_set *domain,
3256
  __isl_keep isl_ast_build *build)
3257
870
{
3258
870
  isl_basic_set *hull;
3259
870
  isl_set *shared, *inner;
3260
870
  isl_bool equal;
3261
870
  int depth, dim;
3262
870
3263
870
  if (isl_set_n_basic_set(domain) <= 1)
3264
804
    return isl_bool_false;
3265
66
3266
66
  inner = isl_set_copy(domain);
3267
66
  depth = isl_ast_build_get_depth(build);
3268
66
  dim = isl_set_dim(inner, isl_dim_set);
3269
66
  inner = isl_set_drop_constraints_not_involving_dims(inner,
3270
66
              isl_dim_set, depth, dim - depth);
3271
66
  hull = isl_set_plain_unshifted_simple_hull(isl_set_copy(inner));
3272
66
  shared = isl_set_from_basic_set(hull);
3273
66
  equal = isl_set_plain_is_equal(inner, shared);
3274
66
  isl_set_free(inner);
3275
66
  isl_set_free(shared);
3276
66
3277
66
  return equal;
3278
66
}
3279
3280
/* Generate code for a single component, after shifting (if any)
3281
 * has been applied, in case the schedule was specified as a schedule tree.
3282
 * In particular, handle the base case where there is either no isolated
3283
 * set or we are within the isolated set (in which case "isolated" is set)
3284
 * or the iterations that precede or follow the isolated set.
3285
 *
3286
 * The schedule domain is broken up or combined into basic sets
3287
 * according to the AST generation option specified in the current
3288
 * schedule node, which may be either atomic, separate, unroll or
3289
 * unspecified.  If the option is unspecified, then we currently simply
3290
 * split the schedule domain into disjoint basic sets.
3291
 *
3292
 * In case the separate option is specified, the AST generation is
3293
 * handled by generate_shifted_component_tree_separate.
3294
 * In the other cases, we need the global schedule domain.
3295
 * In the unroll case, the AST generation is then handled by
3296
 * generate_shifted_component_tree_unroll which needs the actual
3297
 * schedule domain (with divs that may refer to the current dimension)
3298
 * so that stride detection can be performed.
3299
 * In the atomic or unspecified case, inner dimensions and divs involving
3300
 * the current dimensions should be eliminated.
3301
 * The result is then either combined into a single basic set or
3302
 * split up into disjoint basic sets.
3303
 * Finally an AST is generated for each basic set and the results are
3304
 * concatenated.
3305
 *
3306
 * If the schedule domain involves a disjunction that is purely based on
3307
 * constraints involving only outer dimension, then it is treated as
3308
 * if atomic was specified.  This ensures that only a single loop
3309
 * is generated instead of a sequence of identical loops with
3310
 * different guards.
3311
 */
3312
static __isl_give isl_ast_graft_list *generate_shifted_component_tree_base(
3313
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
3314
  int isolated)
3315
3.26k
{
3316
3.26k
  isl_bool outer_disjunction;
3317
3.26k
  isl_union_set *schedule_domain;
3318
3.26k
  isl_set *domain;
3319
3.26k
  isl_basic_set_list *domain_list;
3320
3.26k
  isl_ast_graft_list *list;
3321
3.26k
  enum isl_ast_loop_type type;
3322
3.26k
3323
3.26k
  type = isl_ast_build_get_loop_type(build, isolated);
3324
3.26k
  if (type < 0)
3325
0
    goto error;
3326
3.26k
3327
3.26k
  if (type == isl_ast_loop_separate)
3328
27
    return generate_shifted_component_tree_separate(executed,
3329
27
                build);
3330
3.24k
3331
3.24k
  schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3332
3.24k
  domain = isl_set_from_union_set(schedule_domain);
3333
3.24k
3334
3.24k
  if (type == isl_ast_loop_unroll)
3335
2.37k
    return generate_shifted_component_tree_unroll(executed, domain,
3336
2.37k
                build);
3337
870
3338
870
  domain = isl_ast_build_eliminate(build, domain);
3339
870
  domain = isl_set_coalesce_preserve(domain);
3340
870
3341
870
  outer_disjunction = has_pure_outer_disjunction(domain, build);
3342
870
  if (outer_disjunction < 0)
3343
0
    domain = isl_set_free(domain);
3344
870
3345
870
  if (outer_disjunction || 
type == isl_ast_loop_atomic851
) {
3346
28
    isl_basic_set *hull;
3347
28
    hull = isl_set_unshifted_simple_hull(domain);
3348
28
    domain_list = isl_basic_set_list_from_basic_set(hull);
3349
842
  } else {
3350
842
    domain = isl_set_make_disjoint(domain);
3351
842
    domain_list = isl_basic_set_list_from_set(domain);
3352
842
  }
3353
870
3354
870
  list = generate_parallel_domains(domain_list, executed, build);
3355
870
3356
870
  isl_basic_set_list_free(domain_list);
3357
870
  isl_union_map_free(executed);
3358
870
  isl_ast_build_free(build);
3359
870
3360
870
  return list;
3361
0
error:
3362
0
  isl_union_map_free(executed);
3363
0
  isl_ast_build_free(build);
3364
0
  return NULL;
3365
870
}
3366
3367
/* Extract out the disjunction imposed by "domain" on the outer
3368
 * schedule dimensions.
3369
 *
3370
 * In particular, remove all inner dimensions from "domain" (including
3371
 * the current dimension) and then remove the constraints that are shared
3372
 * by all disjuncts in the result.
3373
 */
3374
static __isl_give isl_set *extract_disjunction(__isl_take isl_set *domain,
3375
  __isl_keep isl_ast_build *build)
3376
800
{
3377
800
  isl_set *hull;
3378
800
  int depth, dim;
3379
800
3380
800
  domain = isl_ast_build_specialize(build, domain);
3381
800
  depth = isl_ast_build_get_depth(build);
3382
800
  dim = isl_set_dim(domain, isl_dim_set);
3383
800
  domain = isl_set_eliminate(domain, isl_dim_set, depth, dim - depth);
3384
800
  domain = isl_set_remove_unknown_divs(domain);
3385
800
  hull = isl_set_copy(domain);
3386
800
  hull = isl_set_from_basic_set(isl_set_unshifted_simple_hull(hull));
3387
800
  domain = isl_set_gist(domain, hull);
3388
800
3389
800
  return domain;
3390
800
}
3391
3392
/* Add "guard" to the grafts in "list".
3393
 * "build" is the outer AST build, while "sub_build" includes "guard"
3394
 * in its generated domain.
3395
 *
3396
 * First combine the grafts into a single graft and then add the guard.
3397
 * If the list is empty, or if some error occurred, then simply return
3398
 * the list.
3399
 */
3400
static __isl_give isl_ast_graft_list *list_add_guard(
3401
  __isl_take isl_ast_graft_list *list, __isl_keep isl_set *guard,
3402
  __isl_keep isl_ast_build *build, __isl_keep isl_ast_build *sub_build)
3403
800
{
3404
800
  isl_ast_graft *graft;
3405
800
3406
800
  list = isl_ast_graft_list_fuse(list, sub_build);
3407
800
3408
800
  if (isl_ast_graft_list_n_ast_graft(list) != 1)
3409
789
    return list;
3410
11
3411
11
  graft = isl_ast_graft_list_get_ast_graft(list, 0);
3412
11
  graft = isl_ast_graft_add_guard(graft, isl_set_copy(guard), build);
3413
11
  list = isl_ast_graft_list_set_ast_graft(list, 0, graft);
3414
11
3415
11
  return list;
3416
11
}
3417
3418
/* Generate code for a single component, after shifting (if any)
3419
 * has been applied, in case the schedule was specified as a schedule tree.
3420
 * In particular, do so for the specified subset of the schedule domain.
3421
 *
3422
 * If we are outside of the isolated part, then "domain" may include
3423
 * a disjunction.  Explicitly generate this disjunction at this point
3424
 * instead of relying on the disjunction getting hoisted back up
3425
 * to this level.
3426
 */
3427
static __isl_give isl_ast_graft_list *generate_shifted_component_tree_part(
3428
  __isl_keep isl_union_map *executed, __isl_take isl_set *domain,
3429
  __isl_keep isl_ast_build *build, int isolated)
3430
3.66k
{
3431
3.66k
  isl_union_set *uset;
3432
3.66k
  isl_ast_graft_list *list;
3433
3.66k
  isl_ast_build *sub_build;
3434
3.66k
  int empty;
3435
3.66k
3436
3.66k
  uset = isl_union_set_from_set(isl_set_copy(domain));
3437
3.66k
  executed = isl_union_map_copy(executed);
3438
3.66k
  executed = isl_union_map_intersect_domain(executed, uset);
3439
3.66k
  empty = isl_union_map_is_empty(executed);
3440
3.66k
  if (empty < 0)
3441
0
    goto error;
3442
3.66k
  if (empty) {
3443
1.95k
    isl_ctx *ctx;
3444
1.95k
    isl_union_map_free(executed);
3445
1.95k
    isl_set_free(domain);
3446
1.95k
    ctx = isl_ast_build_get_ctx(build);
3447
1.95k
    return isl_ast_graft_list_alloc(ctx, 0);
3448
1.95k
  }
3449
1.71k
3450
1.71k
  sub_build = isl_ast_build_copy(build);
3451
1.71k
  if (!isolated) {
3452
800
    domain = extract_disjunction(domain, build);
3453
800
    sub_build = isl_ast_build_restrict_generated(sub_build,
3454
800
              isl_set_copy(domain));
3455
800
  }
3456
1.71k
  list = generate_shifted_component_tree_base(executed,
3457
1.71k
        isl_ast_build_copy(sub_build), isolated);
3458
1.71k
  if (!isolated)
3459
800
    list = list_add_guard(list, domain, build, sub_build);
3460
1.71k
  isl_ast_build_free(sub_build);
3461
1.71k
  isl_set_free(domain);
3462
1.71k
  return list;
3463
0
error:
3464
0
  isl_union_map_free(executed);
3465
0
  isl_set_free(domain);
3466
0
  return NULL;
3467
1.71k
}
3468
3469
/* Generate code for a single component, after shifting (if any)
3470
 * has been applied, in case the schedule was specified as a schedule tree.
3471
 * In particular, do so for the specified sequence of subsets
3472
 * of the schedule domain, "before", "isolated", "after" and "other",
3473
 * where only the "isolated" part is considered to be isolated.
3474
 */
3475
static __isl_give isl_ast_graft_list *generate_shifted_component_parts(
3476
  __isl_take isl_union_map *executed, __isl_take isl_set *before,
3477
  __isl_take isl_set *isolated, __isl_take isl_set *after,
3478
  __isl_take isl_set *other, __isl_take isl_ast_build *build)
3479
917
{
3480
917
  isl_ast_graft_list *list, *res;
3481
917
3482
917
  res = generate_shifted_component_tree_part(executed, before, build, 0);
3483
917
  list = generate_shifted_component_tree_part(executed, isolated,
3484
917
                build, 1);
3485
917
  res = isl_ast_graft_list_concat(res, list);
3486
917
  list = generate_shifted_component_tree_part(executed, after, build, 0);
3487
917
  res = isl_ast_graft_list_concat(res, list);
3488
917
  list = generate_shifted_component_tree_part(executed, other, build, 0);
3489
917
  res = isl_ast_graft_list_concat(res, list);
3490
917
3491
917
  isl_union_map_free(executed);
3492
917
  isl_ast_build_free(build);
3493
917
3494
917
  return res;
3495
917
}
3496
3497
/* Does "set" intersect "first", but not "second"?
3498
 */
3499
static isl_bool only_intersects_first(__isl_keep isl_set *set,
3500
  __isl_keep isl_set *first, __isl_keep isl_set *second)
3501
917
{
3502
917
  isl_bool disjoint;
3503
917
3504
917
  disjoint = isl_set_is_disjoint(set, first);
3505
917
  if (disjoint < 0)
3506
0
    return isl_bool_error;
3507
917
  if (disjoint)
3508
906
    return isl_bool_false;
3509
11
3510
11
  return isl_set_is_disjoint(set, second);
3511
11
}
3512
3513
/* Generate code for a single component, after shifting (if any)
3514
 * has been applied, in case the schedule was specified as a schedule tree.
3515
 * In particular, do so in case of isolation where there is
3516
 * only an "isolated" part and an "after" part.
3517
 * "dead1" and "dead2" are freed by this function in order to simplify
3518
 * the caller.
3519
 *
3520
 * The "before" and "other" parts are set to empty sets.
3521
 */
3522
static __isl_give isl_ast_graft_list *generate_shifted_component_only_after(
3523
  __isl_take isl_union_map *executed, __isl_take isl_set *isolated,
3524
  __isl_take isl_set *after, __isl_take isl_ast_build *build,
3525
  __isl_take isl_set *dead1, __isl_take isl_set *dead2)
3526
11
{
3527
11
  isl_set *empty;
3528
11
3529
11
  empty = isl_set_empty(isl_set_get_space(after));
3530
11
  isl_set_free(dead1);
3531
11
  isl_set_free(dead2);
3532
11
  return generate_shifted_component_parts(executed, isl_set_copy(empty),
3533
11
            isolated, after, empty, build);
3534
11
}
3535
3536
/* Generate code for a single component, after shifting (if any)
3537
 * has been applied, in case the schedule was specified as a schedule tree.
3538
 *
3539
 * We first check if the user has specified an isolated schedule domain
3540
 * and that we are not already outside of this isolated schedule domain.
3541
 * If so, we break up the schedule domain into iterations that
3542
 * precede the isolated domain, the isolated domain itself,
3543
 * the iterations that follow the isolated domain and
3544
 * the remaining iterations (those that are incomparable
3545
 * to the isolated domain).
3546
 * We generate an AST for each piece and concatenate the results.
3547
 *
3548
 * If the isolated domain is not convex, then it is replaced
3549
 * by a convex superset to ensure that the sets of preceding and
3550
 * following iterations are properly defined and, in particular,
3551
 * that there are no intermediate iterations that do not belong
3552
 * to the isolated domain.
3553
 *
3554
 * In the special case where at least one element of the schedule
3555
 * domain that does not belong to the isolated domain needs
3556
 * to be scheduled after this isolated domain, but none of those
3557
 * elements need to be scheduled before, break up the schedule domain
3558
 * in only two parts, the isolated domain, and a part that will be
3559
 * scheduled after the isolated domain.
3560
 *
3561
 * If no isolated set has been specified, then we generate an
3562
 * AST for the entire inverse schedule.
3563
 */
3564
static __isl_give isl_ast_graft_list *generate_shifted_component_tree(
3565
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3566
2.46k
{
3567
2.46k
  int i, depth;
3568
2.46k
  int empty, has_isolate;
3569
2.46k
  isl_space *space;
3570
2.46k
  isl_union_set *schedule_domain;
3571
2.46k
  isl_set *domain;
3572
2.46k
  isl_basic_set *hull;
3573
2.46k
  isl_set *isolated, *before, *after, *test;
3574
2.46k
  isl_map *gt, *lt;
3575
2.46k
  isl_bool pure;
3576
2.46k
3577
2.46k
  build = isl_ast_build_extract_isolated(build);
3578
2.46k
  has_isolate = isl_ast_build_has_isolated(build);
3579
2.46k
  if (has_isolate < 0)
3580
0
    executed = isl_union_map_free(executed);
3581
2.46k
  else if (!has_isolate)
3582
979
    return generate_shifted_component_tree_base(executed, build, 0);
3583
1.48k
3584
1.48k
  schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3585
1.48k
  domain = isl_set_from_union_set(schedule_domain);
3586
1.48k
3587
1.48k
  isolated = isl_ast_build_get_isolated(build);
3588
1.48k
  isolated = isl_set_intersect(isolated, isl_set_copy(domain));
3589
1.48k
  test = isl_ast_build_specialize(build, isl_set_copy(isolated));
3590
1.48k
  empty = isl_set_is_empty(test);
3591
1.48k
  isl_set_free(test);
3592
1.48k
  if (empty < 0)
3593
0
    goto error;
3594
1.48k
  if (empty) {
3595
572
    isl_set_free(isolated);
3596
572
    isl_set_free(domain);
3597
572
    return generate_shifted_component_tree_base(executed, build, 0);
3598
572
  }
3599
917
  isolated = isl_ast_build_eliminate(build, isolated);
3600
917
  hull = isl_set_unshifted_simple_hull(isolated);
3601
917
  isolated = isl_set_from_basic_set(hull);
3602
917
3603
917
  depth = isl_ast_build_get_depth(build);
3604
917
  space = isl_space_map_from_set(isl_set_get_space(isolated));
3605
917
  gt = isl_map_universe(space);
3606
7.97k
  for (i = 0; i < depth; 
++i7.05k
)
3607
7.05k
    gt = isl_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
3608
917
  gt = isl_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
3609
917
  lt = isl_map_reverse(isl_map_copy(gt));
3610
917
  before = isl_set_apply(isl_set_copy(isolated), gt);
3611
917
  after = isl_set_apply(isl_set_copy(isolated), lt);
3612
917
3613
917
  domain = isl_set_subtract(domain, isl_set_copy(isolated));
3614
917
  pure = only_intersects_first(domain, after, before);
3615
917
  if (pure < 0)
3616
0
    executed = isl_union_map_free(executed);
3617
917
  else if (pure)
3618
11
    return generate_shifted_component_only_after(executed, isolated,
3619
11
            domain, build, before, after);
3620
906
  domain = isl_set_subtract(domain, isl_set_copy(before));
3621
906
  domain = isl_set_subtract(domain, isl_set_copy(after));
3622
906
  after = isl_set_subtract(after, isl_set_copy(isolated));
3623
906
  after = isl_set_subtract(after, isl_set_copy(before));
3624
906
  before = isl_set_subtract(before, isl_set_copy(isolated));
3625
906
3626
906
  return generate_shifted_component_parts(executed, before, isolated,
3627
906
            after, domain, build);
3628
0
error:
3629
0
  isl_set_free(domain);
3630
0
  isl_set_free(isolated);
3631
0
  isl_union_map_free(executed);
3632
0
  isl_ast_build_free(build);
3633
0
  return NULL;
3634
906
}
3635
3636
/* Generate code for a single component, after shifting (if any)
3637
 * has been applied.
3638
 *
3639
 * Call generate_shifted_component_tree or generate_shifted_component_flat
3640
 * depending on whether the schedule was specified as a schedule tree.
3641
 */
3642
static __isl_give isl_ast_graft_list *generate_shifted_component(
3643
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3644
2.55k
{
3645
2.55k
  if (isl_ast_build_has_schedule_node(build))
3646
2.46k
    return generate_shifted_component_tree(executed, build);
3647
84
  else
3648
84
    return generate_shifted_component_flat(executed, build);
3649
2.55k
}
3650
3651
struct isl_set_map_pair {
3652
  isl_set *set;
3653
  isl_map *map;
3654
};
3655
3656
/* Given an array "domain" of isl_set_map_pairs and an array "order"
3657
 * of indices into the "domain" array,
3658
 * return the union of the "map" fields of the elements
3659
 * indexed by the first "n" elements of "order".
3660
 */
3661
static __isl_give isl_union_map *construct_component_executed(
3662
  struct isl_set_map_pair *domain, int *order, int n)
3663
200
{
3664
200
  int i;
3665
200
  isl_map *map;
3666
200
  isl_union_map *executed;
3667
200
3668
200
  map = isl_map_copy(domain[order[0]].map);
3669
200
  executed = isl_union_map_from_map(map);
3670
403
  for (i = 1; i < n; 
++i203
) {
3671
203
    map = isl_map_copy(domain[order[i]].map);
3672
203
    executed = isl_union_map_add_map(executed, map);
3673
203
  }
3674
200
3675
200
  return executed;
3676
200
}
3677
3678
/* Generate code for a single component, after shifting (if any)
3679
 * has been applied.
3680
 *
3681
 * The component inverse schedule is specified as the "map" fields
3682
 * of the elements of "domain" indexed by the first "n" elements of "order".
3683
 */
3684
static __isl_give isl_ast_graft_list *generate_shifted_component_from_list(
3685
  struct isl_set_map_pair *domain, int *order, int n,
3686
  __isl_take isl_ast_build *build)
3687
200
{
3688
200
  isl_union_map *executed;
3689
200
3690
200
  executed = construct_component_executed(domain, order, n);
3691
200
  return generate_shifted_component(executed, build);
3692
200
}
3693
3694
/* Does set dimension "pos" of "set" have an obviously fixed value?
3695
 */
3696
static int dim_is_fixed(__isl_keep isl_set *set, int pos)
3697
486
{
3698
486
  int fixed;
3699
486
  isl_val *v;
3700
486
3701
486
  v = isl_set_plain_get_val_if_fixed(set, isl_dim_set, pos);
3702
486
  if (!v)
3703
0
    return -1;
3704
486
  fixed = !isl_val_is_nan(v);
3705
486
  isl_val_free(v);
3706
486
3707
486
  return fixed;
3708
486
}
3709
3710
/* Given an array "domain" of isl_set_map_pairs and an array "order"
3711
 * of indices into the "domain" array,
3712
 * do all (except for at most one) of the "set" field of the elements
3713
 * indexed by the first "n" elements of "order" have a fixed value
3714
 * at position "depth"?
3715
 */
3716
static int at_most_one_non_fixed(struct isl_set_map_pair *domain,
3717
  int *order, int n, int depth)
3718
119
{
3719
119
  int i;
3720
119
  int non_fixed = -1;
3721
119
3722
269
  for (i = 0; i < n; 
++i150
) {
3723
253
    int f;
3724
253
3725
253
    f = dim_is_fixed(domain[order[i]].set, depth);
3726
253
    if (f < 0)
3727
0
      return -1;
3728
253
    if (f)
3729
42
      continue;
3730
211
    if (non_fixed >= 0)
3731
103
      return 0;
3732
108
    non_fixed = i;
3733
108
  }
3734
119
3735
119
  
return 116
;
3736
119
}
3737
3738
/* Given an array "domain" of isl_set_map_pairs and an array "order"
3739
 * of indices into the "domain" array,
3740
 * eliminate the inner dimensions from the "set" field of the elements
3741
 * indexed by the first "n" elements of "order", provided the current
3742
 * dimension does not have a fixed value.
3743
 *
3744
 * Return the index of the first element in "order" with a corresponding
3745
 * "set" field that does not have an (obviously) fixed value.
3746
 */
3747
static int eliminate_non_fixed(struct isl_set_map_pair *domain,
3748
  int *order, int n, int depth, __isl_keep isl_ast_build *build)
3749
78
{
3750
78
  int i;
3751
78
  int base = -1;
3752
78
3753
298
  for (i = n - 1; i >= 0; 
--i220
) {
3754
220
    int f;
3755
220
    f = dim_is_fixed(domain[order[i]].set, depth);
3756
220
    if (f < 0)
3757
0
      return -1;
3758
220
    if (f)
3759
1
      continue;
3760
219
    domain[order[i]].set = isl_ast_build_eliminate_inner(build,
3761
219
              domain[order[i]].set);
3762
219
    base = i;
3763
219
  }
3764
78
3765
78
  return base;
3766
78
}
3767
3768
/* Given an array "domain" of isl_set_map_pairs and an array "order"
3769
 * of indices into the "domain" array,
3770
 * find the element of "domain" (amongst those indexed by the first "n"
3771
 * elements of "order") with the "set" field that has the smallest
3772
 * value for the current iterator.
3773
 *
3774
 * Note that the domain with the smallest value may depend on the parameters
3775
 * and/or outer loop dimension.  Since the result of this function is only
3776
 * used as heuristic, we only make a reasonable attempt at finding the best
3777
 * domain, one that should work in case a single domain provides the smallest
3778
 * value for the current dimension over all values of the parameters
3779
 * and outer dimensions.
3780
 *
3781
 * In particular, we compute the smallest value of the first domain
3782
 * and replace it by that of any later domain if that later domain
3783
 * has a smallest value that is smaller for at least some value
3784
 * of the parameters and outer dimensions.
3785
 */
3786
static int first_offset(struct isl_set_map_pair *domain, int *order, int n,
3787
  __isl_keep isl_ast_build *build)
3788
3
{
3789
3
  int i;
3790
3
  isl_map *min_first;
3791
3
  int first = 0;
3792
3
3793
3
  min_first = isl_ast_build_map_to_iterator(build,
3794
3
          isl_set_copy(domain[order[0]].set));
3795
3
  min_first = isl_map_lexmin(min_first);
3796
3
3797
9
  for (i = 1; i < n; 
++i6
) {
3798
6
    isl_map *min, *test;
3799
6
    int empty;
3800
6
3801
6
    min = isl_ast_build_map_to_iterator(build,
3802
6
          isl_set_copy(domain[order[i]].set));
3803
6
    min = isl_map_lexmin(min);
3804
6
    test = isl_map_copy(min);
3805
6
    test = isl_map_apply_domain(isl_map_copy(min_first), test);
3806
6
    test = isl_map_order_lt(test, isl_dim_in, 0, isl_dim_out, 0);
3807
6
    empty = isl_map_is_empty(test);
3808
6
    isl_map_free(test);
3809
6
    if (empty >= 0 && !empty) {
3810
1
      isl_map_free(min_first);
3811
1
      first = i;
3812
1
      min_first = min;
3813
1
    } else
3814
5
      isl_map_free(min);
3815
6
3816
6
    if (empty < 0)
3817
0
      break;
3818
6
  }
3819
3
3820
3
  isl_map_free(min_first);
3821
3
3822
3
  return i < n ? 
-10
: first;
3823
3
}
3824
3825
/* Construct a shifted inverse schedule based on the original inverse schedule,
3826
 * the stride and the offset.
3827
 *
3828
 * The original inverse schedule is specified as the "map" fields
3829
 * of the elements of "domain" indexed by the first "n" elements of "order".
3830
 *
3831
 * "stride" and "offset" are such that the difference
3832
 * between the values of the current dimension of domain "i"
3833
 * and the values of the current dimension for some reference domain are
3834
 * equal to
3835
 *
3836
 *  stride * integer + offset[i]
3837
 *
3838
 * Moreover, 0 <= offset[i] < stride.
3839
 *
3840
 * For each domain, we create a map
3841
 *
3842
 *  { [..., j, ...] -> [..., j - offset[i], offset[i], ....] }
3843
 *
3844
 * where j refers to the current dimension and the other dimensions are
3845
 * unchanged, and apply this map to the original schedule domain.
3846
 *
3847
 * For example, for the original schedule
3848
 *
3849
 *  { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
3850
 *
3851
 * and assuming the offset is 0 for the A domain and 1 for the B domain,
3852
 * we apply the mapping
3853
 *
3854
 *  { [j] -> [j, 0] }
3855
 *
3856
 * to the schedule of the "A" domain and the mapping
3857
 *
3858
 *  { [j - 1] -> [j, 1] }
3859
 *
3860
 * to the schedule of the "B" domain.
3861
 *
3862
 *
3863
 * Note that after the transformation, the differences between pairs
3864
 * of values of the current dimension over all domains are multiples
3865
 * of stride and that we have therefore exposed the stride.
3866
 *
3867
 *
3868
 * To see that the mapping preserves the lexicographic order,
3869
 * first note that each of the individual maps above preserves the order.
3870
 * If the value of the current iterator is j1 in one domain and j2 in another,
3871
 * then if j1 = j2, we know that the same map is applied to both domains
3872
 * and the order is preserved.
3873
 * Otherwise, let us assume, without loss of generality, that j1 < j2.
3874
 * If c1 >= c2 (with c1 and c2 the corresponding offsets), then
3875
 *
3876
 *  j1 - c1 < j2 - c2
3877
 *
3878
 * and the order is preserved.
3879
 * If c1 < c2, then we know
3880
 *
3881
 *  0 <= c2 - c1 < s
3882
 *
3883
 * We also have
3884
 *
3885
 *  j2 - j1 = n * s + r
3886
 *
3887
 * with n >= 0 and 0 <= r < s.
3888
 * In other words, r = c2 - c1.
3889
 * If n > 0, then
3890
 *
3891
 *  j1 - c1 < j2 - c2
3892
 *
3893
 * If n = 0, then
3894
 *
3895
 *  j1 - c1 = j2 - c2
3896
 *
3897
 * and so
3898
 *
3899
 *  (j1 - c1, c1) << (j2 - c2, c2)
3900
 *
3901
 * with "<<" the lexicographic order, proving that the order is preserved
3902
 * in all cases.
3903
 */
3904
static __isl_give isl_union_map *construct_shifted_executed(
3905
  struct isl_set_map_pair *domain, int *order, int n,
3906
  __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
3907
  __isl_take isl_ast_build *build)
3908
3
{
3909
3
  int i;
3910
3
  isl_union_map *executed;
3911
3
  isl_space *space;
3912
3
  isl_map *map;
3913
3
  int depth;
3914
3
  isl_constraint *c;
3915
3
3916
3
  depth = isl_ast_build_get_depth(build);
3917
3
  space = isl_ast_build_get_space(build, 1);
3918
3
  executed = isl_union_map_empty(isl_space_copy(space));
3919
3
  space = isl_space_map_from_set(space);
3920
3
  map = isl_map_identity(isl_space_copy(space));
3921
3
  map = isl_map_eliminate(map, isl_dim_out, depth, 1);
3922
3
  map = isl_map_insert_dims(map, isl_dim_out, depth + 1, 1);
3923
3
  space = isl_space_insert_dims(space, isl_dim_out, depth + 1, 1);
3924
3
3925
3
  c = isl_constraint_alloc_equality(isl_local_space_from_space(space));
3926
3
  c = isl_constraint_set_coefficient_si(c, isl_dim_in, depth, 1);
3927
3
  c = isl_constraint_set_coefficient_si(c, isl_dim_out, depth, -1);
3928
3
3929
12
  for (i = 0; i < n; 
++i9
) {
3930
9
    isl_map *map_i;
3931
9
    isl_val *v;
3932
9
3933
9
    v = isl_multi_val_get_val(offset, i);
3934
9
    if (!v)
3935
0
      break;
3936
9
    map_i = isl_map_copy(map);
3937
9
    map_i = isl_map_fix_val(map_i, isl_dim_out, depth + 1,
3938
9
          isl_val_copy(v));
3939
9
    v = isl_val_neg(v);
3940
9
    c = isl_constraint_set_constant_val(c, v);
3941
9
    map_i = isl_map_add_constraint(map_i, isl_constraint_copy(c));
3942
9
3943
9
    map_i = isl_map_apply_domain(isl_map_copy(domain[order[i]].map),
3944
9
            map_i);
3945
9
    executed = isl_union_map_add_map(executed, map_i);
3946
9
  }
3947
3
3948
3
  isl_constraint_free(c);
3949
3
  isl_map_free(map);
3950
3
3951
3
  if (i < n)
3952
0
    executed = isl_union_map_free(executed);
3953
3
3954
3
  return executed;
3955
3
}
3956
3957
/* Generate code for a single component, after exposing the stride,
3958
 * given that the schedule domain is "shifted strided".
3959
 *
3960
 * The component inverse schedule is specified as the "map" fields
3961
 * of the elements of "domain" indexed by the first "n" elements of "order".
3962
 *
3963
 * The schedule domain being "shifted strided" means that the differences
3964
 * between the values of the current dimension of domain "i"
3965
 * and the values of the current dimension for some reference domain are
3966
 * equal to
3967
 *
3968
 *  stride * integer + offset[i]
3969
 *
3970
 * We first look for the domain with the "smallest" value for the current
3971
 * dimension and adjust the offsets such that the offset of the "smallest"
3972
 * domain is equal to zero.  The other offsets are reduced modulo stride.
3973
 *
3974
 * Based on this information, we construct a new inverse schedule in
3975
 * construct_shifted_executed that exposes the stride.
3976
 * Since this involves the introduction of a new schedule dimension,
3977
 * the build needs to be changed accordingly.
3978
 * After computing the AST, the newly introduced dimension needs
3979
 * to be removed again from the list of grafts.  We do this by plugging
3980
 * in a mapping that represents the new schedule domain in terms of the
3981
 * old schedule domain.
3982
 */
3983
static __isl_give isl_ast_graft_list *generate_shift_component(
3984
  struct isl_set_map_pair *domain, int *order, int n,
3985
  __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
3986
  __isl_take isl_ast_build *build)
3987
3
{
3988
3
  isl_ast_graft_list *list;
3989
3
  int first;
3990
3
  int depth;
3991
3
  isl_val *val;
3992
3
  isl_multi_val *mv;
3993
3
  isl_space *space;
3994
3
  isl_multi_aff *ma, *zero;
3995
3
  isl_union_map *executed;
3996
3
3997
3
  depth = isl_ast_build_get_depth(build);
3998
3
3999
3
  first = first_offset(domain, order, n, build);
4000
3
  if (first < 0)
4001
0
    goto error;
4002
3
4003
3
  mv = isl_multi_val_copy(offset);
4004
3
  val = isl_multi_val_get_val(offset, first);
4005
3
  val = isl_val_neg(val);
4006
3
  mv = isl_multi_val_add_val(mv, val);
4007
3
  mv = isl_multi_val_mod_val(mv, isl_val_copy(stride));
4008
3
4009
3
  executed = construct_shifted_executed(domain, order, n, stride, mv,
4010
3
            build);
4011
3
  space = isl_ast_build_get_space(build, 1);
4012
3
  space = isl_space_map_from_set(space);
4013
3
  ma = isl_multi_aff_identity(isl_space_copy(space));
4014
3
  space = isl_space_from_domain(isl_space_domain(space));
4015
3
  space = isl_space_add_dims(space, isl_dim_out, 1);
4016
3
  zero = isl_multi_aff_zero(space);
4017
3
  ma = isl_multi_aff_range_splice(ma, depth + 1, zero);
4018
3
  build = isl_ast_build_insert_dim(build, depth + 1);
4019
3
  list = generate_shifted_component(executed, build);
4020
3
4021
3
  list = isl_ast_graft_list_preimage_multi_aff(list, ma);
4022
3
4023
3
  isl_multi_val_free(mv);
4024
3
4025
3
  return list;
4026
0
error:
4027
0
  isl_ast_build_free(build);
4028
0
  return NULL;
4029
3
}
4030
4031
/* Does any node in the schedule tree rooted at the current schedule node
4032
 * of "build" depend on outer schedule nodes?
4033
 */
4034
static int has_anchored_subtree(__isl_keep isl_ast_build *build)
4035
102
{
4036
102
  isl_schedule_node *node;
4037
102
  int dependent = 0;
4038
102
4039
102
  node = isl_ast_build_get_schedule_node(build);
4040
102
  dependent = isl_schedule_node_is_subtree_anchored(node);
4041
102
  isl_schedule_node_free(node);
4042
102
4043
102
  return dependent;
4044
102
}
4045
4046
/* Generate code for a single component.
4047
 *
4048
 * The component inverse schedule is specified as the "map" fields
4049
 * of the elements of "domain" indexed by the first "n" elements of "order".
4050
 *
4051
 * This function may modify the "set" fields of "domain".
4052
 *
4053
 * Before proceeding with the actual code generation for the component,
4054
 * we first check if there are any "shifted" strides, meaning that
4055
 * the schedule domains of the individual domains are all strided,
4056
 * but that they have different offsets, resulting in the union
4057
 * of schedule domains not being strided anymore.
4058
 *
4059
 * The simplest example is the schedule
4060
 *
4061
 *  { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
4062
 *
4063
 * Both schedule domains are strided, but their union is not.
4064
 * This function detects such cases and then rewrites the schedule to
4065
 *
4066
 *  { A[i] -> [2i, 0]: 0 <= i < 10; B[i] -> [2i, 1] : 0 <= i < 10 }
4067
 *
4068
 * In the new schedule, the schedule domains have the same offset (modulo
4069
 * the stride), ensuring that the union of schedule domains is also strided.
4070
 *
4071
 *
4072
 * If there is only a single domain in the component, then there is
4073
 * nothing to do.   Similarly, if the current schedule dimension has
4074
 * a fixed value for almost all domains then there is nothing to be done.
4075
 * In particular, we need at least two domains where the current schedule
4076
 * dimension does not have a fixed value.
4077
 * Finally, in case of a schedule map input,
4078
 * if any of the options refer to the current schedule dimension,
4079
 * then we bail out as well.  It would be possible to reformulate the options
4080
 * in terms of the new schedule domain, but that would introduce constraints
4081
 * that separate the domains in the options and that is something we would
4082
 * like to avoid.
4083
 * In the case of a schedule tree input, we bail out if any of
4084
 * the descendants of the current schedule node refer to outer
4085
 * schedule nodes in any way.
4086
 *
4087
 *
4088
 * To see if there is any shifted stride, we look at the differences
4089
 * between the values of the current dimension in pairs of domains
4090
 * for equal values of outer dimensions.  These differences should be
4091
 * of the form
4092
 *
4093
 *  m x + r
4094
 *
4095
 * with "m" the stride and "r" a constant.  Note that we cannot perform
4096
 * this analysis on individual domains as the lower bound in each domain
4097
 * may depend on parameters or outer dimensions and so the current dimension
4098
 * itself may not have a fixed remainder on division by the stride.
4099
 *
4100
 * In particular, we compare the first domain that does not have an
4101
 * obviously fixed value for the current dimension to itself and all
4102
 * other domains and collect the offsets and the gcd of the strides.
4103
 * If the gcd becomes one, then we failed to find shifted strides.
4104
 * If the gcd is zero, then the differences were all fixed, meaning
4105
 * that some domains had non-obviously fixed values for the current dimension.
4106
 * If all the offsets are the same (for those domains that do not have
4107
 * an obviously fixed value for the current dimension), then we do not
4108
 * apply the transformation.
4109
 * If none of the domains were skipped, then there is nothing to do.
4110
 * If some of them were skipped, then if we apply separation, the schedule
4111
 * domain should get split in pieces with a (non-shifted) stride.
4112
 *
4113
 * Otherwise, we apply a shift to expose the stride in
4114
 * generate_shift_component.
4115
 */
4116
static __isl_give isl_ast_graft_list *generate_component(
4117
  struct isl_set_map_pair *domain, int *order, int n,
4118
  __isl_take isl_ast_build *build)
4119
203
{
4120
203
  int i, d;
4121
203
  int depth;
4122
203
  isl_ctx *ctx;
4123
203
  isl_map *map;
4124
203
  isl_set *deltas;
4125
203
  isl_val *gcd = NULL;
4126
203
  isl_multi_val *mv;
4127
203
  int fixed, skip;
4128
203
  int base;
4129
203
  isl_ast_graft_list *list;
4130
203
  int res = 0;
4131
203
4132
203
  depth = isl_ast_build_get_depth(build);
4133
203
4134
203
  skip = n == 1;
4135
203
  if (skip >= 0 && !skip)
4136
119
    skip = at_most_one_non_fixed(domain, order, n, depth);
4137
203
  if (skip >= 0 && !skip) {
4138
103
    if (isl_ast_build_has_schedule_node(build))
4139
102
      skip = has_anchored_subtree(build);
4140
1
    else
4141
1
      skip = isl_ast_build_options_involve_depth(build);
4142
103
  }
4143
203
  if (skip < 0)
4144
0
    goto error;
4145
203
  if (skip)
4146
125
    return generate_shifted_component_from_list(domain,
4147
125
                  order, n, build);
4148
78
4149
78
  base = eliminate_non_fixed(domain, order, n, depth, build);
4150
78
  if (base < 0)
4151
0
    goto error;
4152
78
4153
78
  ctx = isl_ast_build_get_ctx(build);
4154
78
4155
78
  mv = isl_multi_val_zero(isl_space_set_alloc(ctx, 0, n));
4156
78
4157
78
  fixed = 1;
4158
91
  for (i = 0; i < n; 
++i13
) {
4159
88
    isl_val *r, *m;
4160
88
4161
88
    map = isl_map_from_domain_and_range(
4162
88
          isl_set_copy(domain[order[base]].set),
4163
88
          isl_set_copy(domain[order[i]].set));
4164
111
    for (d = 0; d < depth; 
++d23
)
4165
23
      map = isl_map_equate(map, isl_dim_in, d,
4166
23
                isl_dim_out, d);
4167
88
    deltas = isl_map_deltas(map);
4168
88
    res = isl_set_dim_residue_class_val(deltas, depth, &m, &r);
4169
88
    isl_set_free(deltas);
4170
88
    if (res < 0)
4171
0
      break;
4172
88
4173
88
    if (i == 0)
4174
78
      gcd = m;
4175
10
    else
4176
10
      gcd = isl_val_gcd(gcd, m);
4177
88
    if (isl_val_is_one(gcd)) {
4178
75
      isl_val_free(r);
4179
75
      break;
4180
75
    }
4181
13
    mv = isl_multi_val_set_val(mv, i, r);
4182
13
4183
13
    res = dim_is_fixed(domain[order[i]].set, depth);
4184
13
    if (res < 0)
4185
0
      break;
4186
13
    if (res)
4187
0
      continue;
4188
13
4189
13
    if (fixed && 
i > base10
) {
4190
5
      isl_val *a, *b;
4191
5
      a = isl_multi_val_get_val(mv, i);
4192
5
      b = isl_multi_val_get_val(mv, base);
4193
5
      if (isl_val_ne(a, b))
4194
4
        fixed = 0;
4195
5
      isl_val_free(a);
4196
5
      isl_val_free(b);
4197
5
    }
4198
13
  }
4199
78
4200
78
  if (res < 0 || !gcd) {
4201
0
    isl_ast_build_free(build);
4202
0
    list = NULL;
4203
78
  } else if (i < n || 
fixed3
||
isl_val_is_zero(gcd)3
) {
4204
75
    list = generate_shifted_component_from_list(domain,
4205
75
                  order, n, build);
4206
75
  } else {
4207
3
    list = generate_shift_component(domain, order, n, gcd, mv,
4208
3
            build);
4209
3
  }
4210
78
4211
78
  isl_val_free(gcd);
4212
78
  isl_multi_val_free(mv);
4213
78
4214
78
  return list;
4215
0
error:
4216
0
  isl_ast_build_free(build);
4217
0
  return NULL;
4218
78
}
4219
4220
/* Store both "map" itself and its domain in the
4221
 * structure pointed to by *next and advance to the next array element.
4222
 */
4223
static isl_stat extract_domain(__isl_take isl_map *map, void *user)
4224
412
{
4225
412
  struct isl_set_map_pair **next = user;
4226
412
4227
412
  (*next)->map = isl_map_copy(map);
4228
412
  (*next)->set = isl_map_domain(map);
4229
412
  (*next)++;
4230
412
4231
412
  return isl_stat_ok;
4232
412
}
4233
4234
static int after_in_tree(__isl_keep isl_union_map *umap,
4235
  __isl_keep isl_schedule_node *node);
4236
4237
/* Is any domain element of "umap" scheduled after any of
4238
 * the corresponding image elements by the tree rooted at
4239
 * the child of "node"?
4240
 */
4241
static int after_in_child(__isl_keep isl_union_map *umap,
4242
  __isl_keep isl_schedule_node *node)
4243
41
{
4244
41
  isl_schedule_node *child;
4245
41
  int after;
4246
41
4247
41
  child = isl_schedule_node_get_child(node, 0);
4248
41
  after = after_in_tree(umap, child);
4249
41
  isl_schedule_node_free(child);
4250
41
4251
41
  return after;
4252
41
}
4253
4254
/* Is any domain element of "umap" scheduled after any of
4255
 * the corresponding image elements by the tree rooted at
4256
 * the band node "node"?
4257
 *
4258
 * We first check if any domain element is scheduled after any
4259
 * of the corresponding image elements by the band node itself.
4260
 * If not, we restrict "map" to those pairs of element that
4261
 * are scheduled together by the band node and continue with
4262
 * the child of the band node.
4263
 * If there are no such pairs then the map passed to after_in_child
4264
 * will be empty causing it to return 0.
4265
 */
4266
static int after_in_band(__isl_keep isl_union_map *umap,
4267
  __isl_keep isl_schedule_node *node)
4268
0
{
4269
0
  isl_multi_union_pw_aff *mupa;
4270
0
  isl_union_map *partial, *test, *gt, *universe, *umap1, *umap2;
4271
0
  isl_union_set *domain, *range;
4272
0
  isl_space *space;
4273
0
  int empty;
4274
0
  int after;
4275
0
4276
0
  if (isl_schedule_node_band_n_member(node) == 0)
4277
0
    return after_in_child(umap, node);
4278
0
4279
0
  mupa = isl_schedule_node_band_get_partial_schedule(node);
4280
0
  space = isl_multi_union_pw_aff_get_space(mupa);
4281
0
  partial = isl_union_map_from_multi_union_pw_aff(mupa);
4282
0
  test = isl_union_map_copy(umap);
4283
0
  test = isl_union_map_apply_domain(test, isl_union_map_copy(partial));
4284
0
  test = isl_union_map_apply_range(test, isl_union_map_copy(partial));
4285
0
  gt = isl_union_map_from_map(isl_map_lex_gt(space));
4286
0
  test = isl_union_map_intersect(test, gt);
4287
0
  empty = isl_union_map_is_empty(test);
4288
0
  isl_union_map_free(test);
4289
0
4290
0
  if (empty < 0 || !empty) {
4291
0
    isl_union_map_free(partial);
4292
0
    return empty < 0 ? -1 : 1;
4293
0
  }
4294
0
4295
0
  universe = isl_union_map_universe(isl_union_map_copy(umap));
4296
0
  domain = isl_union_map_domain(isl_union_map_copy(universe));
4297
0
  range = isl_union_map_range(universe);
4298
0
  umap1 = isl_union_map_copy(partial);
4299
0
  umap1 = isl_union_map_intersect_domain(umap1, domain);
4300
0
  umap2 = isl_union_map_intersect_domain(partial, range);
4301
0
  test = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
4302
0
  test = isl_union_map_intersect(test, isl_union_map_copy(umap));
4303
0
  after = after_in_child(test, node);
4304
0
  isl_union_map_free(test);
4305
0
  return after;
4306
0
}
4307
4308
/* Is any domain element of "umap" scheduled after any of
4309
 * the corresponding image elements by the tree rooted at
4310
 * the context node "node"?
4311
 *
4312
 * The context constraints apply to the schedule domain,
4313
 * so we cannot apply them directly to "umap", which contains
4314
 * pairs of statement instances.  Instead, we add them
4315
 * to the range of the prefix schedule for both domain and
4316
 * range of "umap".
4317
 */
4318
static int after_in_context(__isl_keep isl_union_map *umap,
4319
  __isl_keep isl_schedule_node *node)
4320
0
{
4321
0
  isl_union_map *prefix, *universe, *umap1, *umap2;
4322
0
  isl_union_set *domain, *range;
4323
0
  isl_set *context;
4324
0
  int after;
4325
0
4326
0
  umap = isl_union_map_copy(umap);
4327
0
  context = isl_schedule_node_context_get_context(node);
4328
0
  prefix = isl_schedule_node_get_prefix_schedule_union_map(node);
4329
0
  universe = isl_union_map_universe(isl_union_map_copy(umap));
4330
0
  domain = isl_union_map_domain(isl_union_map_copy(universe));
4331
0
  range = isl_union_map_range(universe);
4332
0
  umap1 = isl_union_map_copy(prefix);
4333
0
  umap1 = isl_union_map_intersect_domain(umap1, domain);
4334
0
  umap2 = isl_union_map_intersect_domain(prefix, range);
4335
0
  umap1 = isl_union_map_intersect_range(umap1,
4336
0
              isl_union_set_from_set(context));
4337
0
  umap1 = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
4338
0
  umap = isl_union_map_intersect(umap, umap1);
4339
0
4340
0
  after = after_in_child(umap, node);
4341
0
4342
0
  isl_union_map_free(umap);
4343
0
4344
0
  return after;
4345
0
}
4346
4347
/* Is any domain element of "umap" scheduled after any of
4348
 * the corresponding image elements by the tree rooted at
4349
 * the expansion node "node"?
4350
 *
4351
 * We apply the expansion to domain and range of "umap" and
4352
 * continue with its child.
4353
 */
4354
static int after_in_expansion(__isl_keep isl_union_map *umap,
4355
  __isl_keep isl_schedule_node *node)
4356
0
{
4357
0
  isl_union_map *expansion;
4358
0
  int after;
4359
0
4360
0
  expansion = isl_schedule_node_expansion_get_expansion(node);
4361
0
  umap = isl_union_map_copy(umap);
4362
0
  umap = isl_union_map_apply_domain(umap, isl_union_map_copy(expansion));
4363
0
  umap = isl_union_map_apply_range(umap, expansion);
4364
0
4365
0
  after = after_in_child(umap, node);
4366
0
4367
0
  isl_union_map_free(umap);
4368
0
4369
0
  return after;
4370
0
}
4371
4372
/* Is any domain element of "umap" scheduled after any of
4373
 * the corresponding image elements by the tree rooted at
4374
 * the extension node "node"?
4375
 *
4376
 * Since the extension node may add statement instances before or
4377
 * after the pairs of statement instances in "umap", we return 1
4378
 * to ensure that these pairs are not broken up.
4379
 */
4380
static int after_in_extension(__isl_keep isl_union_map *umap,
4381
  __isl_keep isl_schedule_node *node)
4382
0
{
4383
0
  return 1;
4384
0
}
4385
4386
/* Is any domain element of "umap" scheduled after any of
4387
 * the corresponding image elements by the tree rooted at
4388
 * the filter node "node"?
4389
 *
4390
 * We intersect domain and range of "umap" with the filter and
4391
 * continue with its child.
4392
 */
4393
static int after_in_filter(__isl_keep isl_union_map *umap,
4394
  __isl_keep isl_schedule_node *node)
4395
41
{
4396
41
  isl_union_set *filter;
4397
41
  int after;
4398
41
4399
41
  umap = isl_union_map_copy(umap);
4400
41
  filter = isl_schedule_node_filter_get_filter(node);
4401
41
  umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(filter));
4402
41
  umap = isl_union_map_intersect_range(umap, filter);
4403
41
4404
41
  after = after_in_child(umap, node);
4405
41
4406
41
  isl_union_map_free(umap);
4407
41
4408
41
  return after;
4409
41
}
4410
4411
/* Is any domain element of "umap" scheduled after any of
4412
 * the corresponding image elements by the tree rooted at
4413
 * the set node "node"?
4414
 *
4415
 * This is only the case if this condition holds in any
4416
 * of the (filter) children of the set node.
4417
 * In particular, if the domain and the range of "umap"
4418
 * are contained in different children, then the condition
4419
 * does not hold.
4420
 */
4421
static int after_in_set(__isl_keep isl_union_map *umap,
4422
  __isl_keep isl_schedule_node *node)
4423
16
{
4424
16
  int i, n;
4425
16
4426
16
  n = isl_schedule_node_n_children(node);
4427
57
  for (i = 0; i < n; 
++i41
) {
4428
41
    isl_schedule_node *child;
4429
41
    int after;
4430
41
4431
41
    child = isl_schedule_node_get_child(node, i);
4432
41
    after = after_in_tree(umap, child);
4433
41
    isl_schedule_node_free(child);
4434
41
4435
41
    if (after < 0 || after)
4436
0
      return after;
4437
41
  }
4438
16
4439
16
  return 0;
4440
16
}
4441
4442
/* Return the filter of child "i" of "node".
4443
 */
4444
static __isl_give isl_union_set *child_filter(
4445
  __isl_keep isl_schedule_node *node, int i)
4446
36
{
4447
36
  isl_schedule_node *child;
4448
36
  isl_union_set *filter;
4449
36
4450
36
  child = isl_schedule_node_get_child(node, i);
4451
36
  filter = isl_schedule_node_filter_get_filter(child);
4452
36
  isl_schedule_node_free(child);
4453
36
4454
36
  return filter;
4455
36
}
4456
4457
/* Is any domain element of "umap" scheduled after any of
4458
 * the corresponding image elements by the tree rooted at
4459
 * the sequence node "node"?
4460
 *
4461
 * This happens in particular if any domain element is
4462
 * contained in a later child than one containing a range element or
4463
 * if the condition holds within a given child in the sequence.
4464
 * The later part of the condition is checked by after_in_set.
4465
 */
4466
static int after_in_sequence(__isl_keep isl_union_map *umap,
4467
  __isl_keep isl_schedule_node *node)
4468
19
{
4469
19
  int i, j, n;
4470
19
  isl_union_map *umap_i;
4471
19
  int empty, after = 0;
4472
19
4473
19
  n = isl_schedule_node_n_children(node);
4474
45
  for (i = 1; i < n; 
++i26
) {
4475
29
    isl_union_set *filter_i;
4476
29
4477
29
    umap_i = isl_union_map_copy(umap);
4478
29
    filter_i = child_filter(node, i);
4479
29
    umap_i = isl_union_map_intersect_domain(umap_i, filter_i);
4480
29
    empty = isl_union_map_is_empty(umap_i);
4481
29
    if (empty < 0)
4482
0
      goto error;
4483
29
    if (empty) {
4484
23
      isl_union_map_free(umap_i);
4485
23
      continue;
4486
23
    }
4487
6
4488
10
    
for (j = 0; 6
j < i;
++j4
) {
4489
7
      isl_union_set *filter_j;
4490
7
      isl_union_map *umap_ij;
4491
7
4492
7
      umap_ij = isl_union_map_copy(umap_i);
4493
7
      filter_j = child_filter(node, j);
4494
7
      umap_ij = isl_union_map_intersect_range(umap_ij,
4495
7
                filter_j);
4496
7
      empty = isl_union_map_is_empty(umap_ij);
4497
7
      isl_union_map_free(umap_ij);
4498
7
4499
7
      if (empty < 0)
4500
0
        goto error;
4501
7
      if (!empty)
4502
3
        after = 1;
4503
7
      if (after)
4504
3
        break;
4505
7
    }
4506
6
4507
6
    isl_union_map_free(umap_i);
4508
6
    if (after)
4509
3
      break;
4510
6
  }
4511
19
4512
19
  if (after < 0 || after)
4513
3
    return after;
4514
16
4515
16
  return after_in_set(umap, node);
4516
0
error:
4517
0
  isl_union_map_free(umap_i);
4518
0
  return -1;
4519
16
}
4520
4521
/* Is any domain element of "umap" scheduled after any of
4522
 * the corresponding image elements by the tree rooted at "node"?
4523
 *
4524
 * If "umap" is empty, then clearly there is no such element.
4525
 * Otherwise, consider the different types of nodes separately.
4526
 */
4527
static int after_in_tree(__isl_keep isl_union_map *umap,
4528
  __isl_keep isl_schedule_node *node)
4529
101
{
4530
101
  int empty;
4531
101
  enum isl_schedule_node_type type;
4532
101
4533
101
  empty = isl_union_map_is_empty(umap);
4534
101
  if (empty < 0)
4535
0
    return -1;
4536
101
  if (empty)
4537
41
    return 0;
4538
60
  if (!node)
4539
0
    return -1;
4540
60
4541
60
  type = isl_schedule_node_get_type(node);
4542
60
  switch (type) {
4543
60
  case isl_schedule_node_error:
4544
0
    return -1;
4545
60
  case isl_schedule_node_leaf:
4546
0
    return 0;
4547
60
  case isl_schedule_node_band:
4548
0
    return after_in_band(umap, node);
4549
60
  case isl_schedule_node_domain:
4550
0
    isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
4551
0
      "unexpected internal domain node", return -1);
4552
0
  case isl_schedule_node_context:
4553
0
    return after_in_context(umap, node);
4554
0
  case isl_schedule_node_expansion:
4555
0
    return after_in_expansion(umap, node);
4556
0
  case isl_schedule_node_extension:
4557
0
    return after_in_extension(umap, node);
4558
41
  case isl_schedule_node_filter:
4559
41
    return after_in_filter(umap, node);
4560
0
  case isl_schedule_node_guard:
4561
0
  case isl_schedule_node_mark:
4562
0
    return after_in_child(umap, node);
4563
0
  case isl_schedule_node_set:
4564
0
    return after_in_set(umap, node);
4565
19
  case isl_schedule_node_sequence:
4566
19
    return after_in_sequence(umap, node);
4567
0
  }
4568
0
4569
0
  return 1;
4570
0
}
4571
4572
/* Is any domain element of "map1" scheduled after any domain
4573
 * element of "map2" by the subtree underneath the current band node,
4574
 * while at the same time being scheduled together by the current
4575
 * band node, i.e., by "map1" and "map2?
4576
 *
4577
 * If the child of the current band node is a leaf, then
4578
 * no element can be scheduled after any other element.
4579
 *
4580
 * Otherwise, we construct a relation between domain elements
4581
 * of "map1" and domain elements of "map2" that are scheduled
4582
 * together and then check if the subtree underneath the current
4583
 * band node determines their relative order.
4584
 */
4585
static int after_in_subtree(__isl_keep isl_ast_build *build,
4586
  __isl_keep isl_map *map1, __isl_keep isl_map *map2)
4587
19
{
4588
19
  isl_schedule_node *node;
4589
19
  isl_map *map;
4590
19
  isl_union_map *umap;
4591
19
  int after;
4592
19
4593
19
  node = isl_ast_build_get_schedule_node(build);
4594
19
  if (!node)
4595
0
    return -1;
4596
19
  node = isl_schedule_node_child(node, 0);
4597
19
  if (isl_schedule_node_get_type(node) == isl_schedule_node_leaf) {
4598
0
    isl_schedule_node_free(node);
4599
0
    return 0;
4600
0
  }
4601
19
  map = isl_map_copy(map2);
4602
19
  map = isl_map_apply_domain(map, isl_map_copy(map1));
4603
19
  umap = isl_union_map_from_map(map);
4604
19
  after = after_in_tree(umap, node);
4605
19
  isl_union_map_free(umap);
4606
19
  isl_schedule_node_free(node);
4607
19
  return after;
4608
19
}
4609
4610
/* Internal data for any_scheduled_after.
4611
 *
4612
 * "build" is the build in which the AST is constructed.
4613
 * "depth" is the number of loops that have already been generated
4614
 * "group_coscheduled" is a local copy of options->ast_build_group_coscheduled
4615
 * "domain" is an array of set-map pairs corresponding to the different
4616
 * iteration domains.  The set is the schedule domain, i.e., the domain
4617
 * of the inverse schedule, while the map is the inverse schedule itself.
4618
 */
4619
struct isl_any_scheduled_after_data {
4620
  isl_ast_build *build;
4621
  int depth;
4622
  int group_coscheduled;
4623
  struct isl_set_map_pair *domain;
4624
};
4625
4626
/* Is any element of domain "i" scheduled after any element of domain "j"
4627
 * (for a common iteration of the first data->depth loops)?
4628
 *
4629
 * data->domain[i].set contains the domain of the inverse schedule
4630
 * for domain "i", i.e., elements in the schedule domain.
4631
 *
4632
 * If we are inside a band of a schedule tree and there is a pair
4633
 * of elements in the two domains that is schedule together by
4634
 * the current band, then we check if any element of "i" may be schedule
4635
 * after element of "j" by the descendants of the band node.
4636
 *
4637
 * If data->group_coscheduled is set, then we also return 1 if there
4638
 * is any pair of elements in the two domains that are scheduled together.
4639
 */
4640
static isl_bool any_scheduled_after(int i, int j, void *user)
4641
519
{
4642
519
  struct isl_any_scheduled_after_data *data = user;
4643
519
  int dim = isl_set_dim(data->domain[i].set, isl_dim_set);
4644
519
  int pos;
4645
519
4646
595
  for (pos = data->depth; pos < dim; 
++pos76
) {
4647
576
    int follows;
4648
576
4649
576
    follows = isl_set_follows_at(data->domain[i].set,
4650
576
            data->domain[j].set, pos);
4651
576
4652
576
    if (follows < -1)
4653
0
      return isl_bool_error;
4654
576
    if (follows > 0)
4655
437
      return isl_bool_true;
4656
139
    if (follows < 0)
4657
63
      return isl_bool_false;
4658
139
  }
4659
519
4660
519
  
if (19
isl_ast_build_has_schedule_node(data->build)19
) {
4661
19
    int after;
4662
19
4663
19
    after = after_in_subtree(data->build, data->domain[i].map,
4664
19
              data->domain[j].map);
4665
19
    if (after < 0 || after)
4666
3
      return after;
4667
16
  }
4668
16
4669
16
  return data->group_coscheduled;
4670
16
}
4671
4672
/* Look for independent components at the current depth and generate code
4673
 * for each component separately.  The resulting lists of grafts are
4674
 * merged in an attempt to combine grafts with identical guards.
4675
 *
4676
 * Code for two domains can be generated separately if all the elements
4677
 * of one domain are scheduled before (or together with) all the elements
4678
 * of the other domain.  We therefore consider the graph with as nodes
4679
 * the domains and an edge between two nodes if any element of the first
4680
 * node is scheduled after any element of the second node.
4681
 * If the ast_build_group_coscheduled is set, then we also add an edge if
4682
 * there is any pair of elements in the two domains that are scheduled
4683
 * together.
4684
 * Code is then generated (by generate_component)
4685
 * for each of the strongly connected components in this graph
4686
 * in their topological order.
4687
 *
4688
 * Since the test is performed on the domain of the inverse schedules of
4689
 * the different domains, we precompute these domains and store
4690
 * them in data.domain.
4691
 */
4692
static __isl_give isl_ast_graft_list *generate_components(
4693
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
4694
153
{
4695
153
  int i;
4696
153
  isl_ctx *ctx = isl_ast_build_get_ctx(build);
4697
153
  int n = isl_union_map_n_map(executed);
4698
153
  struct isl_any_scheduled_after_data data;
4699
153
  struct isl_set_map_pair *next;
4700
153
  struct isl_tarjan_graph *g = NULL;
4701
153
  isl_ast_graft_list *list = NULL;
4702
153
  int n_domain = 0;
4703
153
4704
153
  data.domain = isl_calloc_array(ctx, struct isl_set_map_pair, n);
4705
153
  if (!data.domain)
4706
0
    goto error;
4707
153
  n_domain = n;
4708
153
4709
153
  next = data.domain;
4710
153
  if (isl_union_map_foreach_map(executed, &extract_domain, &next) < 0)
4711
0
    goto error;
4712
153
4713
153
  if (!build)
4714
0
    goto error;
4715
153
  data.build = build;
4716
153
  data.depth = isl_ast_build_get_depth(build);
4717
153
  data.group_coscheduled = isl_options_get_ast_build_group_coscheduled(ctx);
4718
153
  g = isl_tarjan_graph_init(ctx, n, &any_scheduled_after, &data);
4719
153
  if (!g)
4720
0
    goto error;
4721
153
4722
153
  list = isl_ast_graft_list_alloc(ctx, 0);
4723
153
4724
153
  i = 0;
4725
356
  while (list && n) {
4726
203
    isl_ast_graft_list *list_c;
4727
203
    int first = i;
4728
203
4729
203
    if (g->order[i] == -1)
4730
203
      
isl_die0
(ctx, isl_error_internal, "cannot happen",
4731
203
        goto error);
4732
203
    ++i; --n;
4733
412
    while (g->order[i] != -1) {
4734
209
      ++i; --n;
4735
209
    }
4736
203
4737
203
    list_c = generate_component(data.domain,
4738
203
              g->order + first, i - first,
4739
203
              isl_ast_build_copy(build));
4740
203
    list = isl_ast_graft_list_merge(list, list_c, build);
4741
203
4742
203
    ++i;
4743
203
  }
4744
153
4745
153
  if (0)
4746
0
error:    list = isl_ast_graft_list_free(list);
4747
153
  isl_tarjan_graph_free(g);
4748
565
  for (i = 0; i < n_domain; 
++i412
) {
4749
412
    isl_map_free(data.domain[i].map);
4750
412
    isl_set_free(data.domain[i].set);
4751
412
  }
4752
153
  free(data.domain);
4753
153
  isl_union_map_free(executed);
4754
153
  isl_ast_build_free(build);
4755
153
4756
153
  return list;
4757
153
}
4758
4759
/* Generate code for the next level (and all inner levels).
4760
 *
4761
 * If "executed" is empty, i.e., no code needs to be generated,
4762
 * then we return an empty list.
4763
 *
4764
 * If we have already generated code for all loop levels, then we pass
4765
 * control to generate_inner_level.
4766
 *
4767
 * If "executed" lives in a single space, i.e., if code needs to be
4768
 * generated for a single domain, then there can only be a single
4769
 * component and we go directly to generate_shifted_component.
4770
 * Otherwise, we call generate_components to detect the components
4771
 * and to call generate_component on each of them separately.
4772
 */
4773
static __isl_give isl_ast_graft_list *generate_next_level(
4774
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
4775
4.74k
{
4776
4.74k
  int depth;
4777
4.74k
4778
4.74k
  if (!build || !executed)
4779
0
    goto error;
4780
4.74k
4781
4.74k
  if (isl_union_map_is_empty(executed)) {
4782
0
    isl_ctx *ctx = isl_ast_build_get_ctx(build);
4783
0
    isl_union_map_free(executed);
4784
0
    isl_ast_build_free(build);
4785
0
    return isl_ast_graft_list_alloc(ctx, 0);
4786
0
  }
4787
4.74k
4788
4.74k
  depth = isl_ast_build_get_depth(build);
4789
4.74k
  if (depth >= isl_ast_build_dim(build, isl_dim_set))
4790
2.24k
    return generate_inner_level(executed, build);
4791
2.50k
4792
2.50k
  if (isl_union_map_n_map(executed) == 1)
4793
2.34k
    return generate_shifted_component(executed, build);
4794
153
4795
153
  return generate_components(executed, build);
4796
0
error:
4797
0
  isl_union_map_free(executed);
4798
0
  isl_ast_build_free(build);
4799
0
  return NULL;
4800
153
}
4801
4802
/* Internal data structure used by isl_ast_build_node_from_schedule_map.
4803
 * internal, executed and build are the inputs to generate_code.
4804
 * list collects the output.
4805
 */
4806
struct isl_generate_code_data {
4807
  int internal;
4808
  isl_union_map *executed;
4809
  isl_ast_build *build;
4810
4811
  isl_ast_graft_list *list;
4812
};
4813
4814
/* Given an inverse schedule in terms of the external build schedule, i.e.,
4815
 *
4816
 *  [E -> S] -> D
4817
 *
4818
 * with E the external build schedule and S the additional schedule "space",
4819
 * reformulate the inverse schedule in terms of the internal schedule domain,
4820
 * i.e., return
4821
 *
4822
 *  [I -> S] -> D
4823
 *
4824
 * We first obtain a mapping
4825
 *
4826
 *  I -> E
4827
 *
4828
 * take the inverse and the product with S -> S, resulting in
4829
 *
4830
 *  [I -> S] -> [E -> S]
4831
 *
4832
 * Applying the map to the input produces the desired result.
4833
 */
4834
static __isl_give isl_union_map *internal_executed(
4835
  __isl_take isl_union_map *executed, __isl_keep isl_space *space,
4836
  __isl_keep isl_ast_build *build)
4837
3
{
4838
3
  isl_map *id, *proj;
4839
3
4840
3
  proj = isl_ast_build_get_schedule_map(build);
4841
3
  proj = isl_map_reverse(proj);
4842
3
  space = isl_space_map_from_set(isl_space_copy(space));
4843
3
  id = isl_map_identity(space);
4844
3
  proj = isl_map_product(proj, id);
4845
3
  executed = isl_union_map_apply_domain(executed,
4846
3
            isl_union_map_from_map(proj));
4847
3
  return executed;
4848
3
}
4849
4850
/* Generate an AST that visits the elements in the range of data->executed
4851
 * in the relative order specified by the corresponding domain element(s)
4852
 * for those domain elements that belong to "set".
4853
 * Add the result to data->list.
4854
 *
4855
 * The caller ensures that "set" is a universe domain.
4856
 * "space" is the space of the additional part of the schedule.
4857
 * It is equal to the space of "set" if build->domain is parametric.
4858
 * Otherwise, it is equal to the range of the wrapped space of "set".
4859
 *
4860
 * If the build space is not parametric and
4861
 * if isl_ast_build_node_from_schedule_map
4862
 * was called from an outside user (data->internal not set), then
4863
 * the (inverse) schedule refers to the external build domain and needs to
4864
 * be transformed to refer to the internal build domain.
4865
 *
4866
 * If the build space is parametric, then we add some of the parameter
4867
 * constraints to the executed relation.  Adding these constraints
4868
 * allows for an earlier detection of conflicts in some cases.
4869
 * However, we do not want to divide the executed relation into
4870
 * more disjuncts than necessary.  We therefore approximate
4871
 * the constraints on the parameters by a single disjunct set.
4872
 *
4873
 * The build is extended to include the additional part of the schedule.
4874
 * If the original build space was not parametric, then the options
4875
 * in data->build refer only to the additional part of the schedule
4876
 * and they need to be adjusted to refer to the complete AST build
4877
 * domain.
4878
 *
4879
 * After having adjusted inverse schedule and build, we start generating
4880
 * code with the outer loop of the current code generation
4881
 * in generate_next_level.
4882
 *
4883
 * If the original build space was not parametric, we undo the embedding
4884
 * on the resulting isl_ast_node_list so that it can be used within
4885
 * the outer AST build.
4886
 */
4887
static isl_stat generate_code_in_space(struct isl_generate_code_data *data,
4888
  __isl_take isl_set *set, __isl_take isl_space *space)
4889
31
{
4890
31
  isl_union_map *executed;
4891
31
  isl_ast_build *build;
4892
31
  isl_ast_graft_list *list;
4893
31
  int embed;
4894
31
4895
31
  executed = isl_union_map_copy(data->executed);
4896
31
  executed = isl_union_map_intersect_domain(executed,
4897
31
             isl_union_set_from_set(set));
4898
31
4899
31
  embed = !isl_set_is_params(data->build->domain);
4900
31
  if (embed && 
!data->internal25
)
4901
3
    executed = internal_executed(executed, space, data->build);
4902
31
  if (!embed) {
4903
6
    isl_set *domain;
4904
6
    domain = isl_ast_build_get_domain(data->build);
4905
6
    domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
4906
6
    executed = isl_union_map_intersect_params(executed, domain);
4907
6
  }
4908
31
4909
31
  build = isl_ast_build_copy(data->build);
4910
31
  build = isl_ast_build_product(build, space);
4911
31
4912
31
  list = generate_next_level(executed, build);
4913
31
4914
31
  list = isl_ast_graft_list_unembed(list, embed);
4915
31
4916
31
  data->list = isl_ast_graft_list_concat(data->list, list);
4917
31
4918
31
  return isl_stat_ok;
4919
31
}
4920
4921
/* Generate an AST that visits the elements in the range of data->executed
4922
 * in the relative order specified by the corresponding domain element(s)
4923
 * for those domain elements that belong to "set".
4924
 * Add the result to data->list.
4925
 *
4926
 * The caller ensures that "set" is a universe domain.
4927
 *
4928
 * If the build space S is not parametric, then the space of "set"
4929
 * need to be a wrapped relation with S as domain.  That is, it needs
4930
 * to be of the form
4931
 *
4932
 *  [S -> T]
4933
 *
4934
 * Check this property and pass control to generate_code_in_space
4935
 * passing along T.
4936
 * If the build space is not parametric, then T is the space of "set".
4937
 */
4938
static isl_stat generate_code_set(__isl_take isl_set *set, void *user)
4939
31
{
4940
31
  struct isl_generate_code_data *data = user;
4941
31
  isl_space *space, *build_space;
4942
31
  int is_domain;
4943
31
4944
31
  space = isl_set_get_space(set);
4945
31
4946
31
  if (isl_set_is_params(data->build->domain))
4947
6
    return generate_code_in_space(data, set, space);
4948
25
4949
25
  build_space = isl_ast_build_get_space(data->build, data->internal);
4950
25
  space = isl_space_unwrap(space);
4951
25
  is_domain = isl_space_is_domain(build_space, space);
4952
25
  isl_space_free(build_space);
4953
25
  space = isl_space_range(space);
4954
25
4955
25
  if (is_domain < 0)
4956
0
    goto error;
4957
25
  if (!is_domain)
4958
25
    
isl_die0
(isl_set_get_ctx(set), isl_error_invalid,
4959
25
      "invalid nested schedule space", goto error);
4960
25
4961
25
  return generate_code_in_space(data, set, space);
4962
0
error:
4963
0
  isl_set_free(set);
4964
0
  isl_space_free(space);
4965
0
  return isl_stat_error;
4966
25
}
4967
4968
/* Generate an AST that visits the elements in the range of "executed"
4969
 * in the relative order specified by the corresponding domain element(s).
4970
 *
4971
 * "build" is an isl_ast_build that has either been constructed by
4972
 * isl_ast_build_from_context or passed to a callback set by
4973
 * isl_ast_build_set_create_leaf.
4974
 * In the first case, the space of the isl_ast_build is typically
4975
 * a parametric space, although this is currently not enforced.
4976
 * In the second case, the space is never a parametric space.
4977
 * If the space S is not parametric, then the domain space(s) of "executed"
4978
 * need to be wrapped relations with S as domain.
4979
 *
4980
 * If the domain of "executed" consists of several spaces, then an AST
4981
 * is generated for each of them (in arbitrary order) and the results
4982
 * are concatenated.
4983
 *
4984
 * If "internal" is set, then the domain "S" above refers to the internal
4985
 * schedule domain representation.  Otherwise, it refers to the external
4986
 * representation, as returned by isl_ast_build_get_schedule_space.
4987
 *
4988
 * We essentially run over all the spaces in the domain of "executed"
4989
 * and call generate_code_set on each of them.
4990
 */
4991
static __isl_give isl_ast_graft_list *generate_code(
4992
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
4993
  int internal)
4994
31
{
4995
31
  isl_ctx *ctx;
4996
31
  struct isl_generate_code_data data = { 0 };
4997
31
  isl_space *space;
4998
31
  isl_union_set *schedule_domain;
4999
31
  isl_union_map *universe;
5000
31
5001
31
  if (!build)
5002
0
    goto error;
5003
31
  space = isl_ast_build_get_space(build, 1);
5004
31
  space = isl_space_align_params(space,
5005
31
            isl_union_map_get_space(executed));
5006
31
  space = isl_space_align_params(space,
5007
31
            isl_union_map_get_space(build->options));
5008
31
  build = isl_ast_build_align_params(build, isl_space_copy(space));
5009
31
  executed = isl_union_map_align_params(executed, space);
5010
31
  if (!executed || !build)
5011
0
    goto error;
5012
31
5013
31
  ctx = isl_ast_build_get_ctx(build);
5014
31
5015
31
  data.internal = internal;
5016
31
  data.executed = executed;
5017
31
  data.build = build;
5018
31
  data.list = isl_ast_graft_list_alloc(ctx, 0);
5019
31
5020
31
  universe = isl_union_map_universe(isl_union_map_copy(executed));
5021
31
  schedule_domain = isl_union_map_domain(universe);
5022
31
  if (isl_union_set_foreach_set(schedule_domain, &generate_code_set,
5023
31
          &data) < 0)
5024
0
    data.list = isl_ast_graft_list_free(data.list);
5025
31
5026
31
  isl_union_set_free(schedule_domain);
5027
31
  isl_union_map_free(executed);
5028
31
5029
31
  isl_ast_build_free(build);
5030
31
  return data.list;
5031
0
error:
5032
0
  isl_union_map_free(executed);
5033
0
  isl_ast_build_free(build);
5034
0
  return NULL;
5035
31
}
5036
5037
/* Generate an AST that visits the elements in the domain of "schedule"
5038
 * in the relative order specified by the corresponding image element(s).
5039
 *
5040
 * "build" is an isl_ast_build that has either been constructed by
5041
 * isl_ast_build_from_context or passed to a callback set by
5042
 * isl_ast_build_set_create_leaf.
5043
 * In the first case, the space of the isl_ast_build is typically
5044
 * a parametric space, although this is currently not enforced.
5045
 * In the second case, the space is never a parametric space.
5046
 * If the space S is not parametric, then the range space(s) of "schedule"
5047
 * need to be wrapped relations with S as domain.
5048
 *
5049
 * If the range of "schedule" consists of several spaces, then an AST
5050
 * is generated for each of them (in arbitrary order) and the results
5051
 * are concatenated.
5052
 *
5053
 * We first initialize the local copies of the relevant options.
5054
 * We do this here rather than when the isl_ast_build is created
5055
 * because the options may have changed between the construction
5056
 * of the isl_ast_build and the call to isl_generate_code.
5057
 *
5058
 * The main computation is performed on an inverse schedule (with
5059
 * the schedule domain in the domain and the elements to be executed
5060
 * in the range) called "executed".
5061
 */
5062
__isl_give isl_ast_node *isl_ast_build_node_from_schedule_map(
5063
  __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule)
5064
9
{
5065
9
  isl_ast_graft_list *list;
5066
9
  isl_ast_node *node;
5067
9
  isl_union_map *executed;
5068
9
5069
9
  build = isl_ast_build_copy(build);
5070
9
  build = isl_ast_build_set_single_valued(build, 0);
5071
9
  schedule = isl_union_map_coalesce(schedule);
5072
9
  schedule = isl_union_map_remove_redundancies(schedule);
5073
9
  executed = isl_union_map_reverse(schedule);
5074
9
  list = generate_code(executed, isl_ast_build_copy(build), 0);
5075
9
  node = isl_ast_node_from_graft_list(list, build);
5076
9
  isl_ast_build_free(build);
5077
9
5078
9
  return node;
5079
9
}
5080
5081
/* The old name for isl_ast_build_node_from_schedule_map.
5082
 * It is being kept for backward compatibility, but
5083
 * it will be removed in the future.
5084
 */
5085
__isl_give isl_ast_node *isl_ast_build_ast_from_schedule(
5086
  __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule)
5087
0
{
5088
0
  return isl_ast_build_node_from_schedule_map(build, schedule);
5089
0
}
5090
5091
/* Generate an AST that visits the elements in the domain of "executed"
5092
 * in the relative order specified by the band node "node" and its descendants.
5093
 *
5094
 * The relation "executed" maps the outer generated loop iterators
5095
 * to the domain elements executed by those iterations.
5096
 *
5097
 * If the band is empty, we continue with its descendants.
5098
 * Otherwise, we extend the build and the inverse schedule with
5099
 * the additional space/partial schedule and continue generating
5100
 * an AST in generate_next_level.
5101
 * As soon as we have extended the inverse schedule with the additional
5102
 * partial schedule, we look for equalities that may exists between
5103
 * the old and the new part.
5104
 */
5105
static __isl_give isl_ast_graft_list *build_ast_from_band(
5106
  __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5107
  __isl_take isl_union_map *executed)
5108
653
{
5109
653
  isl_space *space;
5110
653
  isl_multi_union_pw_aff *extra;
5111
653
  isl_union_map *extra_umap;
5112
653
  isl_ast_graft_list *list;
5113
653
  unsigned n1, n2;
5114
653
5115
653
  if (!build || !node || !executed)
5116
0
    goto error;
5117
653
5118
653
  if (isl_schedule_node_band_n_member(node) == 0)
5119
0
    return build_ast_from_child(build, node, executed);
5120
653
5121
653
  extra = isl_schedule_node_band_get_partial_schedule(node);
5122
653
  extra = isl_multi_union_pw_aff_align_params(extra,
5123
653
        isl_ast_build_get_space(build, 1));
5124
653
  space = isl_multi_union_pw_aff_get_space(extra);
5125
653
5126
653
  extra_umap = isl_union_map_from_multi_union_pw_aff(extra);
5127
653
  extra_umap = isl_union_map_reverse(extra_umap);
5128
653
5129
653
  executed = isl_union_map_domain_product(executed, extra_umap);
5130
653
  executed = isl_union_map_detect_equalities(executed);
5131
653
5132
653
  n1 = isl_ast_build_dim(build, isl_dim_param);
5133
653
  build = isl_ast_build_product(build, space);
5134
653
  n2 = isl_ast_build_dim(build, isl_dim_param);
5135
653
  if (n2 > n1)
5136
653
    
isl_die0
(isl_ast_build_get_ctx(build), isl_error_invalid,
5137
653
      "band node is not allowed to introduce new parameters",
5138
653
      build = isl_ast_build_free(build));
5139
653
  build = isl_ast_build_set_schedule_node(build, node);
5140
653
5141
653
  list = generate_next_level(executed, build);
5142
653
5143
653
  list = isl_ast_graft_list_unembed(list, 1);
5144
653
5145
653
  return list;
5146
0
error:
5147
0
  isl_schedule_node_free(node);
5148
0
  isl_union_map_free(executed);
5149
0
  isl_ast_build_free(build);
5150
0
  return NULL;
5151
653
}
5152
5153
/* Hoist a list of grafts (in practice containing a single graft)
5154
 * from "sub_build" (which includes extra context information)
5155
 * to "build".
5156
 *
5157
 * In particular, project out all additional parameters introduced
5158
 * by the context node from the enforced constraints and the guard
5159
 * of the single graft.
5160
 */
5161
static __isl_give isl_ast_graft_list *hoist_out_of_context(
5162
  __isl_take isl_ast_graft_list *list, __isl_keep isl_ast_build *build,
5163
  __isl_keep isl_ast_build *sub_build)
5164
0
{
5165
0
  isl_ast_graft *graft;
5166
0
  isl_basic_set *enforced;
5167
0
  isl_set *guard;
5168
0
  unsigned n_param, extra_param;
5169
0
5170
0
  if (!build || !sub_build)
5171
0
    return isl_ast_graft_list_free(list);
5172
0
5173
0
  n_param = isl_ast_build_dim(build, isl_dim_param);
5174
0
  extra_param = isl_ast_build_dim(sub_build, isl_dim_param);
5175
0
5176
0
  if (extra_param == n_param)
5177
0
    return list;
5178
0
5179
0
  extra_param -= n_param;
5180
0
  enforced = isl_ast_graft_list_extract_shared_enforced(list, sub_build);
5181
0
  enforced = isl_basic_set_project_out(enforced, isl_dim_param,
5182
0
              n_param, extra_param);
5183
0
  enforced = isl_basic_set_remove_unknown_divs(enforced);
5184
0
  guard = isl_ast_graft_list_extract_hoistable_guard(list, sub_build);
5185
0
  guard = isl_set_remove_divs_involving_dims(guard, isl_dim_param,
5186
0
              n_param, extra_param);
5187
0
  guard = isl_set_project_out(guard, isl_dim_param, n_param, extra_param);
5188
0
  guard = isl_set_compute_divs(guard);
5189
0
  graft = isl_ast_graft_alloc_from_children(list, guard, enforced,
5190
0
              build, sub_build);
5191
0
  list = isl_ast_graft_list_from_ast_graft(graft);
5192
0
5193
0
  return list;
5194
0
}
5195
5196
/* Generate an AST that visits the elements in the domain of "executed"
5197
 * in the relative order specified by the context node "node"
5198
 * and its descendants.
5199
 *
5200
 * The relation "executed" maps the outer generated loop iterators
5201
 * to the domain elements executed by those iterations.
5202
 *
5203
 * The context node may introduce additional parameters as well as
5204
 * constraints on the outer schedule dimensions or original parameters.
5205
 *
5206
 * We add the extra parameters to a new build and the context
5207
 * constraints to both the build and (as a single disjunct)
5208
 * to the domain of "executed".  Since the context constraints
5209
 * are specified in terms of the input schedule, we first need
5210
 * to map them to the internal schedule domain.
5211
 *
5212
 * After constructing the AST from the descendants of "node",
5213
 * we combine the list of grafts into a single graft within
5214
 * the new build, in order to be able to exploit the additional
5215
 * context constraints during this combination.
5216
 *
5217
 * Additionally, if the current node is the outermost node in
5218
 * the schedule tree (apart from the root domain node), we generate
5219
 * all pending guards, again to be able to exploit the additional
5220
 * context constraints.  We currently do not do this for internal
5221
 * context nodes since we may still want to hoist conditions
5222
 * to outer AST nodes.
5223
 *
5224
 * If the context node introduced any new parameters, then they
5225
 * are removed from the set of enforced constraints and guard
5226
 * in hoist_out_of_context.
5227
 */
5228
static __isl_give isl_ast_graft_list *build_ast_from_context(
5229
  __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5230
  __isl_take isl_union_map *executed)
5231
0
{
5232
0
  isl_set *context;
5233
0
  isl_space *space;
5234
0
  isl_multi_aff *internal2input;
5235
0
  isl_ast_build *sub_build;
5236
0
  isl_ast_graft_list *list;
5237
0
  int n, depth;
5238
0
5239
0
  depth = isl_schedule_node_get_tree_depth(node);
5240
0
  space = isl_ast_build_get_space(build, 1);
5241
0
  context = isl_schedule_node_context_get_context(node);
5242
0
  context = isl_set_align_params(context, space);
5243
0
  sub_build = isl_ast_build_copy(build);
5244
0
  space = isl_set_get_space(context);
5245
0
  sub_build = isl_ast_build_align_params(sub_build, space);
5246
0
  internal2input = isl_ast_build_get_internal2input(sub_build);
5247
0
  context = isl_set_preimage_multi_aff(context, internal2input);
5248
0
  sub_build = isl_ast_build_restrict_generated(sub_build,
5249
0
          isl_set_copy(context));
5250
0
  context = isl_set_from_basic_set(isl_set_simple_hull(context));
5251
0
  executed = isl_union_map_intersect_domain(executed,
5252
0
          isl_union_set_from_set(context));
5253
0
5254
0
  list = build_ast_from_child(isl_ast_build_copy(sub_build),
5255
0
            node, executed);
5256
0
  n = isl_ast_graft_list_n_ast_graft(list);
5257
0
  if (n < 0)
5258
0
    list = isl_ast_graft_list_free(list);
5259
0
5260
0
  list = isl_ast_graft_list_fuse(list, sub_build);
5261
0
  if (depth == 1)
5262
0
    list = isl_ast_graft_list_insert_pending_guard_nodes(list,
5263
0
                sub_build);
5264
0
  if (n >= 1)
5265
0
    list = hoist_out_of_context(list, build, sub_build);
5266
0
5267
0
  isl_ast_build_free(build);
5268
0
  isl_ast_build_free(sub_build);
5269
0
5270
0
  return list;
5271
0
}
5272
5273
/* Generate an AST that visits the elements in the domain of "executed"
5274
 * in the relative order specified by the expansion node "node" and
5275
 * its descendants.
5276
 *
5277
 * The relation "executed" maps the outer generated loop iterators
5278
 * to the domain elements executed by those iterations.
5279
 *
5280
 * We expand the domain elements by the expansion and
5281
 * continue with the descendants of the node.
5282
 */
5283
static __isl_give isl_ast_graft_list *build_ast_from_expansion(
5284
  __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5285
  __isl_take isl_union_map *executed)
5286
0
{
5287
0
  isl_union_map *expansion;
5288
0
  unsigned n1, n2;
5289
0
5290
0
  expansion = isl_schedule_node_expansion_get_expansion(node);
5291
0
  expansion = isl_union_map_align_params(expansion,
5292
0
        isl_union_map_get_space(executed));
5293
0
5294
0
  n1 = isl_union_map_dim(executed, isl_dim_param);
5295
0
  executed = isl_union_map_apply_range(executed, expansion);
5296
0
  n2 = isl_union_map_dim(executed, isl_dim_param);
5297
0
  if (n2 > n1)
5298
0
    isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5299
0
      "expansion node is not allowed to introduce "
5300
0
      "new parameters", goto error);
5301
0
5302
0
  return build_ast_from_child(build, node, executed);
5303
0
error:
5304
0
  isl_ast_build_free(build);
5305
0
  isl_schedule_node_free(node);
5306
0
  isl_union_map_free(executed);
5307
0
  return NULL;
5308
0
}
5309
5310
/* Generate an AST that visits the elements in the domain of "executed"
5311
 * in the relative order specified by the extension node "node" and
5312
 * its descendants.
5313
 *
5314
 * The relation "executed" maps the outer generated loop iterators
5315
 * to the domain elements executed by those iterations.
5316
 *
5317
 * Extend the inverse schedule with the extension applied to current
5318
 * set of generated constraints.  Since the extension if formulated
5319
 * in terms of the input schedule, it first needs to be transformed
5320
 * to refer to the internal schedule.
5321
 */
5322
static __isl_give isl_ast_graft_list *build_ast_from_extension(
5323
  __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5324
  __isl_take isl_union_map *executed)
5325
0
{
5326
0
  isl_union_set *schedule_domain;
5327
0
  isl_union_map *extension;
5328
0
  isl_set *set;
5329
0
5330
0
  set = isl_ast_build_get_generated(build);
5331
0
  set = isl_set_from_basic_set(isl_set_simple_hull(set));
5332
0
  schedule_domain = isl_union_set_from_set(set);
5333
0
5334
0
  extension = isl_schedule_node_extension_get_extension(node);
5335
0
5336</