Coverage Report

Created: 2017-04-29 12:21

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/External/isl/isl_ast_codegen.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2012-2014 Ecole Normale Superieure
3
 * Copyright 2014      INRIA Rocquencourt
4
 *
5
 * Use of this software is governed by the MIT license
6
 *
7
 * Written by Sven Verdoolaege,
8
 * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
9
 * and Inria Paris - Rocquencourt, Domaine de Voluceau - Rocquencourt,
10
 * B.P. 105 - 78153 Le Chesnay, France
11
 */
12
13
#include <limits.h>
14
#include <isl/aff.h>
15
#include <isl/constraint.h>
16
#include <isl/set.h>
17
#include <isl/ilp.h>
18
#include <isl/union_set.h>
19
#include <isl/union_map.h>
20
#include <isl/schedule_node.h>
21
#include <isl_sort.h>
22
#include <isl_tarjan.h>
23
#include <isl_ast_private.h>
24
#include <isl_ast_build_expr.h>
25
#include <isl_ast_build_private.h>
26
#include <isl_ast_graft_private.h>
27
28
/* Data used in generate_domain.
29
 *
30
 * "build" is the input build.
31
 * "list" collects the results.
32
 */
33
struct isl_generate_domain_data {
34
  isl_ast_build *build;
35
36
  isl_ast_graft_list *list;
37
};
38
39
static __isl_give isl_ast_graft_list *generate_next_level(
40
  __isl_take isl_union_map *executed,
41
  __isl_take isl_ast_build *build);
42
static __isl_give isl_ast_graft_list *generate_code(
43
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
44
  int internal);
45
46
/* Generate an AST for a single domain based on
47
 * the (non single valued) inverse schedule "executed".
48
 *
49
 * We extend the schedule with the iteration domain
50
 * and continue generating through a call to generate_code.
51
 *
52
 * In particular, if executed has the form
53
 *
54
 *  S -> D
55
 *
56
 * then we continue generating code on
57
 *
58
 *  [S -> D] -> D
59
 *
60
 * The extended inverse schedule is clearly single valued
61
 * ensuring that the nested generate_code will not reach this function,
62
 * but will instead create calls to all elements of D that need
63
 * to be executed from the current schedule domain.
64
 */
65
static isl_stat generate_non_single_valued(__isl_take isl_map *executed,
66
  struct isl_generate_domain_data *data)
67
16
{
68
16
  isl_map *identity;
69
16
  isl_ast_build *build;
70
16
  isl_ast_graft_list *list;
71
16
72
16
  build = isl_ast_build_copy(data->build);
73
16
74
16
  identity = isl_set_identity(isl_map_range(isl_map_copy(executed)));
75
16
  executed = isl_map_domain_product(executed, identity);
76
16
  build = isl_ast_build_set_single_valued(build, 1);
77
16
78
16
  list = generate_code(isl_union_map_from_map(executed), build, 1);
79
16
80
16
  data->list = isl_ast_graft_list_concat(data->list, list);
81
16
82
16
  return isl_stat_ok;
83
16
}
84
85
/* Call the at_each_domain callback, if requested by the user,
86
 * after recording the current inverse schedule in the build.
87
 */
88
static __isl_give isl_ast_graft *at_each_domain(__isl_take isl_ast_graft *graft,
89
  __isl_keep isl_map *executed, __isl_keep isl_ast_build *build)
90
1.26k
{
91
1.26k
  if (
!graft || 1.26k
!build1.26k
)
92
0
    return isl_ast_graft_free(graft);
93
1.26k
  
if (1.26k
!build->at_each_domain1.26k
)
94
10
    return graft;
95
1.26k
96
1.25k
  build = isl_ast_build_copy(build);
97
1.25k
  build = isl_ast_build_set_executed(build,
98
1.25k
      isl_union_map_from_map(isl_map_copy(executed)));
99
1.25k
  if (!build)
100
0
    return isl_ast_graft_free(graft);
101
1.25k
102
1.25k
  graft->node = build->at_each_domain(graft->node,
103
1.25k
          build, build->at_each_domain_user);
104
1.25k
  isl_ast_build_free(build);
105
1.25k
106
1.25k
  if (!graft->node)
107
0
    graft = isl_ast_graft_free(graft);
108
1.25k
109
1.25k
  return graft;
110
1.25k
}
111
112
/* Generate a call expression for the single executed
113
 * domain element "map" and put a guard around it based its (simplified)
114
 * domain.  "executed" is the original inverse schedule from which "map"
115
 * has been derived.  In particular, "map" is either identical to "executed"
116
 * or it is the result of gisting "executed" with respect to the build domain.
117
 * "executed" is only used if there is an at_each_domain callback.
118
 *
119
 * At this stage, any pending constraints in the build can no longer
120
 * be simplified with respect to any enforced constraints since
121
 * the call node does not have any enforced constraints.
122
 * Since all pending constraints not covered by any enforced constraints
123
 * will be added as a guard to the graft in create_node_scaled,
124
 * even in the eliminated case, the pending constraints
125
 * can be considered to have been generated by outer constructs.
126
 *
127
 * If the user has set an at_each_domain callback, it is called
128
 * on the constructed call expression node.
129
 */
130
static isl_stat add_domain(__isl_take isl_map *executed,
131
  __isl_take isl_map *map, struct isl_generate_domain_data *data)
132
1.26k
{
133
1.26k
  isl_ast_build *build;
134
1.26k
  isl_ast_graft *graft;
135
1.26k
  isl_ast_graft_list *list;
136
1.26k
  isl_set *guard, *pending;
137
1.26k
138
1.26k
  build = isl_ast_build_copy(data->build);
139
1.26k
  pending = isl_ast_build_get_pending(build);
140
1.26k
  build = isl_ast_build_replace_pending_by_guard(build, pending);
141
1.26k
142
1.26k
  guard = isl_map_domain(isl_map_copy(map));
143
1.26k
  guard = isl_set_compute_divs(guard);
144
1.26k
  guard = isl_set_coalesce(guard);
145
1.26k
  guard = isl_set_gist(guard, isl_ast_build_get_generated(build));
146
1.26k
  guard = isl_ast_build_specialize(build, guard);
147
1.26k
148
1.26k
  graft = isl_ast_graft_alloc_domain(map, build);
149
1.26k
  graft = at_each_domain(graft, executed, build);
150
1.26k
  isl_ast_build_free(build);
151
1.26k
  isl_map_free(executed);
152
1.26k
  graft = isl_ast_graft_add_guard(graft, guard, data->build);
153
1.26k
154
1.26k
  list = isl_ast_graft_list_from_ast_graft(graft);
155
1.26k
  data->list = isl_ast_graft_list_concat(data->list, list);
156
1.26k
157
1.26k
  return isl_stat_ok;
158
1.26k
}
159
160
/* Generate an AST for a single domain based on
161
 * the inverse schedule "executed" and add it to data->list.
162
 *
163
 * If there is more than one domain element associated to the current
164
 * schedule "time", then we need to continue the generation process
165
 * in generate_non_single_valued.
166
 * Note that the inverse schedule being single-valued may depend
167
 * on constraints that are only available in the original context
168
 * domain specified by the user.  We therefore first introduce
169
 * some of the constraints of data->build->domain.  In particular,
170
 * we intersect with a single-disjunct approximation of this set.
171
 * We perform this approximation to avoid further splitting up
172
 * the executed relation, possibly introducing a disjunctive guard
173
 * on the statement.
174
 *
175
 * On the other hand, we only perform the test after having taken the gist
176
 * of the domain as the resulting map is the one from which the call
177
 * expression is constructed.  Using this map to construct the call
178
 * expression usually yields simpler results in cases where the original
179
 * map is not obviously single-valued.
180
 * If the original map is obviously single-valued, then the gist
181
 * operation is skipped.
182
 *
183
 * Because we perform the single-valuedness test on the gisted map,
184
 * we may in rare cases fail to recognize that the inverse schedule
185
 * is single-valued.  This becomes problematic if this happens
186
 * from the recursive call through generate_non_single_valued
187
 * as we would then end up in an infinite recursion.
188
 * We therefore check if we are inside a call to generate_non_single_valued
189
 * and revert to the ungisted map if the gisted map turns out not to be
190
 * single-valued.
191
 *
192
 * Otherwise, call add_domain to generate a call expression (with guard) and
193
 * to call the at_each_domain callback, if any.
194
 */
195
static isl_stat generate_domain(__isl_take isl_map *executed, void *user)
196
1.28k
{
197
1.28k
  struct isl_generate_domain_data *data = user;
198
1.28k
  isl_set *domain;
199
1.28k
  isl_map *map = NULL;
200
1.28k
  int empty, sv;
201
1.28k
202
1.28k
  domain = isl_ast_build_get_domain(data->build);
203
1.28k
  domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
204
1.28k
  executed = isl_map_intersect_domain(executed, domain);
205
1.28k
  empty = isl_map_is_empty(executed);
206
1.28k
  if (empty < 0)
207
0
    goto error;
208
1.28k
  
if (1.28k
empty1.28k
)
{0
209
0
    isl_map_free(executed);
210
0
    return isl_stat_ok;
211
0
  }
212
1.28k
213
1.28k
  sv = isl_map_plain_is_single_valued(executed);
214
1.28k
  if (sv < 0)
215
0
    goto error;
216
1.28k
  
if (1.28k
sv1.28k
)
217
1.24k
    return add_domain(executed, isl_map_copy(executed), data);
218
1.28k
219
41
  executed = isl_map_coalesce(executed);
220
41
  map = isl_map_copy(executed);
221
41
  map = isl_ast_build_compute_gist_map_domain(data->build, map);
222
41
  sv = isl_map_is_single_valued(map);
223
41
  if (sv < 0)
224
0
    goto error;
225
41
  
if (41
!sv41
)
{16
226
16
    isl_map_free(map);
227
16
    if (data->build->single_valued)
228
0
      map = isl_map_copy(executed);
229
16
    else
230
16
      return generate_non_single_valued(executed, data);
231
16
  }
232
41
233
25
  return add_domain(executed, map, data);
234
0
error:
235
0
  isl_map_free(map);
236
0
  isl_map_free(executed);
237
0
  return isl_stat_error;
238
41
}
239
240
/* Call build->create_leaf to a create "leaf" node in the AST,
241
 * encapsulate the result in an isl_ast_graft and return the result
242
 * as a 1-element list.
243
 *
244
 * Note that the node returned by the user may be an entire tree.
245
 *
246
 * Since the node itself cannot enforce any constraints, we turn
247
 * all pending constraints into guards and add them to the resulting
248
 * graft to ensure that they will be generated.
249
 *
250
 * Before we pass control to the user, we first clear some information
251
 * from the build that is (presumbably) only meaningful
252
 * for the current code generation.
253
 * This includes the create_leaf callback itself, so we make a copy
254
 * of the build first.
255
 */
256
static __isl_give isl_ast_graft_list *call_create_leaf(
257
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
258
3
{
259
3
  isl_set *guard;
260
3
  isl_ast_node *node;
261
3
  isl_ast_graft *graft;
262
3
  isl_ast_build *user_build;
263
3
264
3
  guard = isl_ast_build_get_pending(build);
265
3
  user_build = isl_ast_build_copy(build);
266
3
  user_build = isl_ast_build_replace_pending_by_guard(user_build,
267
3
              isl_set_copy(guard));
268
3
  user_build = isl_ast_build_set_executed(user_build, executed);
269
3
  user_build = isl_ast_build_clear_local_info(user_build);
270
3
  if (!user_build)
271
0
    node = NULL;
272
3
  else
273
3
    node = build->create_leaf(user_build, build->create_leaf_user);
274
3
  graft = isl_ast_graft_alloc(node, build);
275
3
  graft = isl_ast_graft_add_guard(graft, guard, build);
276
3
  isl_ast_build_free(build);
277
3
  return isl_ast_graft_list_from_ast_graft(graft);
278
3
}
279
280
static __isl_give isl_ast_graft_list *build_ast_from_child(
281
  __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
282
  __isl_take isl_union_map *executed);
283
284
/* Generate an AST after having handled the complete schedule
285
 * of this call to the code generator or the complete band
286
 * if we are generating an AST from a schedule tree.
287
 *
288
 * If we are inside a band node, then move on to the child of the band.
289
 *
290
 * If the user has specified a create_leaf callback, control
291
 * is passed to the user in call_create_leaf.
292
 *
293
 * Otherwise, we generate one or more calls for each individual
294
 * domain in generate_domain.
295
 */
296
static __isl_give isl_ast_graft_list *generate_inner_level(
297
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
298
2.52k
{
299
2.52k
  isl_ctx *ctx;
300
2.52k
  struct isl_generate_domain_data data = { build };
301
2.52k
302
2.52k
  if (
!build || 2.52k
!executed2.52k
)
303
0
    goto error;
304
2.52k
305
2.52k
  
if (2.52k
isl_ast_build_has_schedule_node(build)2.52k
)
{1.23k
306
1.23k
    isl_schedule_node *node;
307
1.23k
    node = isl_ast_build_get_schedule_node(build);
308
1.23k
    build = isl_ast_build_reset_schedule_node(build);
309
1.23k
    return build_ast_from_child(build, node, executed);
310
1.23k
  }
311
2.52k
312
1.28k
  
if (1.28k
build->create_leaf1.28k
)
313
3
    return call_create_leaf(executed, build);
314
1.28k
315
1.28k
  ctx = isl_union_map_get_ctx(executed);
316
1.28k
  data.list = isl_ast_graft_list_alloc(ctx, 0);
317
1.28k
  if (isl_union_map_foreach_map(executed, &generate_domain, &data) < 0)
318
0
    data.list = isl_ast_graft_list_free(data.list);
319
1.28k
320
1.28k
  if (0)
321
0
error:    data.list = NULL;
322
1.28k
  isl_ast_build_free(build);
323
1.28k
  isl_union_map_free(executed);
324
1.28k
  return data.list;
325
1.28k
}
326
327
/* Call the before_each_for callback, if requested by the user.
328
 */
329
static __isl_give isl_ast_node *before_each_for(__isl_take isl_ast_node *node,
330
  __isl_keep isl_ast_build *build)
331
637
{
332
637
  isl_id *id;
333
637
334
637
  if (
!node || 637
!build637
)
335
0
    return isl_ast_node_free(node);
336
637
  
if (637
!build->before_each_for637
)
337
409
    return node;
338
228
  id = build->before_each_for(build, build->before_each_for_user);
339
228
  node = isl_ast_node_set_annotation(node, id);
340
228
  return node;
341
637
}
342
343
/* Call the after_each_for callback, if requested by the user.
344
 */
345
static __isl_give isl_ast_graft *after_each_for(__isl_take isl_ast_graft *graft,
346
  __isl_keep isl_ast_build *build)
347
637
{
348
637
  if (
!graft || 637
!build637
)
349
0
    return isl_ast_graft_free(graft);
350
637
  
if (637
!build->after_each_for637
)
351
409
    return graft;
352
228
  graft->node = build->after_each_for(graft->node, build,
353
228
            build->after_each_for_user);
354
228
  if (!graft->node)
355
0
    return isl_ast_graft_free(graft);
356
228
  return graft;
357
228
}
358
359
/* Plug in all the know values of the current and outer dimensions
360
 * in the domain of "executed".  In principle, we only need to plug
361
 * in the known value of the current dimension since the values of
362
 * outer dimensions have been plugged in already.
363
 * However, it turns out to be easier to just plug in all known values.
364
 */
365
static __isl_give isl_union_map *plug_in_values(
366
  __isl_take isl_union_map *executed, __isl_keep isl_ast_build *build)
367
1.53k
{
368
1.53k
  return isl_ast_build_substitute_values_union_map_domain(build,
369
1.53k
                    executed);
370
1.53k
}
371
372
/* Check if the constraint "c" is a lower bound on dimension "pos",
373
 * an upper bound, or independent of dimension "pos".
374
 */
375
static int constraint_type(isl_constraint *c, int pos)
376
1.68k
{
377
1.68k
  if (isl_constraint_is_lower_bound(c, isl_dim_set, pos))
378
739
    return 1;
379
945
  
if (945
isl_constraint_is_upper_bound(c, isl_dim_set, pos)945
)
380
842
    return 2;
381
103
  return 0;
382
945
}
383
384
/* Compare the types of the constraints "a" and "b",
385
 * resulting in constraints that are independent of "depth"
386
 * to be sorted before the lower bounds on "depth", which in
387
 * turn are sorted before the upper bounds on "depth".
388
 */
389
static int cmp_constraint(__isl_keep isl_constraint *a,
390
  __isl_keep isl_constraint *b, void *user)
391
842
{
392
842
  int *depth = user;
393
842
  int t1 = constraint_type(a, *depth);
394
842
  int t2 = constraint_type(b, *depth);
395
842
396
842
  return t1 - t2;
397
842
}
398
399
/* Extract a lower bound on dimension "pos" from constraint "c".
400
 *
401
 * If the constraint is of the form
402
 *
403
 *  a x + f(...) >= 0
404
 *
405
 * then we essentially return
406
 *
407
 *  l = ceil(-f(...)/a)
408
 *
409
 * However, if the current dimension is strided, then we need to make
410
 * sure that the lower bound we construct is of the form
411
 *
412
 *  f + s a
413
 *
414
 * with f the offset and s the stride.
415
 * We therefore compute
416
 *
417
 *  f + s * ceil((l - f)/s)
418
 */
419
static __isl_give isl_aff *lower_bound(__isl_keep isl_constraint *c,
420
  int pos, __isl_keep isl_ast_build *build)
421
645
{
422
645
  isl_aff *aff;
423
645
424
645
  aff = isl_constraint_get_bound(c, isl_dim_set, pos);
425
645
  aff = isl_aff_ceil(aff);
426
645
427
645
  if (
isl_ast_build_has_stride(build, pos)645
)
{11
428
11
    isl_aff *offset;
429
11
    isl_val *stride;
430
11
431
11
    offset = isl_ast_build_get_offset(build, pos);
432
11
    stride = isl_ast_build_get_stride(build, pos);
433
11
434
11
    aff = isl_aff_sub(aff, isl_aff_copy(offset));
435
11
    aff = isl_aff_scale_down_val(aff, isl_val_copy(stride));
436
11
    aff = isl_aff_ceil(aff);
437
11
    aff = isl_aff_scale_val(aff, stride);
438
11
    aff = isl_aff_add(aff, offset);
439
11
  }
440
645
441
645
  aff = isl_ast_build_compute_gist_aff(build, aff);
442
645
443
645
  return aff;
444
645
}
445
446
/* Return the exact lower bound (or upper bound if "upper" is set)
447
 * of "domain" as a piecewise affine expression.
448
 *
449
 * If we are computing a lower bound (of a strided dimension), then
450
 * we need to make sure it is of the form
451
 *
452
 *  f + s a
453
 *
454
 * where f is the offset and s is the stride.
455
 * We therefore need to include the stride constraint before computing
456
 * the minimum.
457
 */
458
static __isl_give isl_pw_aff *exact_bound(__isl_keep isl_set *domain,
459
  __isl_keep isl_ast_build *build, int upper)
460
0
{
461
0
  isl_set *stride;
462
0
  isl_map *it_map;
463
0
  isl_pw_aff *pa;
464
0
  isl_pw_multi_aff *pma;
465
0
466
0
  domain = isl_set_copy(domain);
467
0
  if (
!upper0
)
{0
468
0
    stride = isl_ast_build_get_stride_constraint(build);
469
0
    domain = isl_set_intersect(domain, stride);
470
0
  }
471
0
  it_map = isl_ast_build_map_to_iterator(build, domain);
472
0
  if (upper)
473
0
    pma = isl_map_lexmax_pw_multi_aff(it_map);
474
0
  else
475
0
    pma = isl_map_lexmin_pw_multi_aff(it_map);
476
0
  pa = isl_pw_multi_aff_get_pw_aff(pma, 0);
477
0
  isl_pw_multi_aff_free(pma);
478
0
  pa = isl_ast_build_compute_gist_pw_aff(build, pa);
479
0
  pa = isl_pw_aff_coalesce(pa);
480
0
481
0
  return pa;
482
0
}
483
484
/* Callback for sorting the isl_pw_aff_list passed to reduce_list and
485
 * remove_redundant_lower_bounds.
486
 */
487
static int reduce_list_cmp(__isl_keep isl_pw_aff *a, __isl_keep isl_pw_aff *b,
488
  void *user)
489
67
{
490
67
  return isl_pw_aff_plain_cmp(a, b);
491
67
}
492
493
/* Given a list of lower bounds "list", remove those that are redundant
494
 * with respect to the other bounds in "list" and the domain of "build".
495
 *
496
 * We first sort the bounds in the same way as they would be sorted
497
 * by set_for_node_expressions so that we can try and remove the last
498
 * bounds first.
499
 *
500
 * For a lower bound to be effective, there needs to be at least
501
 * one domain element for which it is larger than all other lower bounds.
502
 * For each lower bound we therefore intersect the domain with
503
 * the conditions that it is larger than all other bounds and
504
 * check whether the result is empty.  If so, the bound can be removed.
505
 */
506
static __isl_give isl_pw_aff_list *remove_redundant_lower_bounds(
507
  __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
508
11
{
509
11
  int i, j, n;
510
11
  isl_set *domain;
511
11
512
11
  list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
513
11
  if (!list)
514
0
    return NULL;
515
11
516
11
  n = isl_pw_aff_list_n_pw_aff(list);
517
11
  if (n <= 1)
518
11
    return list;
519
11
520
0
  domain = isl_ast_build_get_domain(build);
521
0
522
0
  for (i = n - 1; 
i >= 00
;
--i0
)
{0
523
0
    isl_pw_aff *pa_i;
524
0
    isl_set *domain_i;
525
0
    int empty;
526
0
527
0
    domain_i = isl_set_copy(domain);
528
0
    pa_i = isl_pw_aff_list_get_pw_aff(list, i);
529
0
530
0
    for (j = 0; 
j < n0
;
++j0
)
{0
531
0
      isl_pw_aff *pa_j;
532
0
      isl_set *better;
533
0
534
0
      if (j == i)
535
0
        continue;
536
0
537
0
      pa_j = isl_pw_aff_list_get_pw_aff(list, j);
538
0
      better = isl_pw_aff_gt_set(isl_pw_aff_copy(pa_i), pa_j);
539
0
      domain_i = isl_set_intersect(domain_i, better);
540
0
    }
541
0
542
0
    empty = isl_set_is_empty(domain_i);
543
0
544
0
    isl_set_free(domain_i);
545
0
    isl_pw_aff_free(pa_i);
546
0
547
0
    if (empty < 0)
548
0
      goto error;
549
0
    
if (0
!empty0
)
550
0
      continue;
551
0
    list = isl_pw_aff_list_drop(list, i, 1);
552
0
    n--;
553
0
  }
554
0
555
0
  isl_set_free(domain);
556
0
557
0
  return list;
558
0
error:
559
0
  isl_set_free(domain);
560
0
  return isl_pw_aff_list_free(list);
561
0
}
562
563
/* Extract a lower bound on dimension "pos" from each constraint
564
 * in "constraints" and return the list of lower bounds.
565
 * If "constraints" has zero elements, then we extract a lower bound
566
 * from "domain" instead.
567
 *
568
 * If the current dimension is strided, then the lower bound
569
 * is adjusted by lower_bound to match the stride information.
570
 * This modification may make one or more lower bounds redundant
571
 * with respect to the other lower bounds.  We therefore check
572
 * for this condition and remove the redundant lower bounds.
573
 */
574
static __isl_give isl_pw_aff_list *lower_bounds(
575
  __isl_keep isl_constraint_list *constraints, int pos,
576
  __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
577
637
{
578
637
  isl_ctx *ctx;
579
637
  isl_pw_aff_list *list;
580
637
  int i, n;
581
637
582
637
  if (!build)
583
0
    return NULL;
584
637
585
637
  n = isl_constraint_list_n_constraint(constraints);
586
637
  if (
n == 0637
)
{0
587
0
    isl_pw_aff *pa;
588
0
    pa = exact_bound(domain, build, 0);
589
0
    return isl_pw_aff_list_from_pw_aff(pa);
590
0
  }
591
637
592
637
  ctx = isl_ast_build_get_ctx(build);
593
637
  list = isl_pw_aff_list_alloc(ctx,n);
594
637
595
1.28k
  for (i = 0; 
i < n1.28k
;
++i645
)
{645
596
645
    isl_aff *aff;
597
645
    isl_constraint *c;
598
645
599
645
    c = isl_constraint_list_get_constraint(constraints, i);
600
645
    aff = lower_bound(c, pos, build);
601
645
    isl_constraint_free(c);
602
645
    list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
603
645
  }
604
637
605
637
  if (isl_ast_build_has_stride(build, pos))
606
11
    list = remove_redundant_lower_bounds(list, build);
607
637
608
637
  return list;
609
637
}
610
611
/* Extract an upper bound on dimension "pos" from each constraint
612
 * in "constraints" and return the list of upper bounds.
613
 * If "constraints" has zero elements, then we extract an upper bound
614
 * from "domain" instead.
615
 */
616
static __isl_give isl_pw_aff_list *upper_bounds(
617
  __isl_keep isl_constraint_list *constraints, int pos,
618
  __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
619
637
{
620
637
  isl_ctx *ctx;
621
637
  isl_pw_aff_list *list;
622
637
  int i, n;
623
637
624
637
  n = isl_constraint_list_n_constraint(constraints);
625
637
  if (
n == 0637
)
{0
626
0
    isl_pw_aff *pa;
627
0
    pa = exact_bound(domain, build, 1);
628
0
    return isl_pw_aff_list_from_pw_aff(pa);
629
0
  }
630
637
631
637
  ctx = isl_ast_build_get_ctx(build);
632
637
  list = isl_pw_aff_list_alloc(ctx,n);
633
637
634
1.32k
  for (i = 0; 
i < n1.32k
;
++i692
)
{692
635
692
    isl_aff *aff;
636
692
    isl_constraint *c;
637
692
638
692
    c = isl_constraint_list_get_constraint(constraints, i);
639
692
    aff = isl_constraint_get_bound(c, isl_dim_set, pos);
640
692
    isl_constraint_free(c);
641
692
    aff = isl_aff_floor(aff);
642
692
    list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
643
692
  }
644
637
645
637
  return list;
646
637
}
647
648
/* Return an isl_ast_expr that performs the reduction of type "type"
649
 * on AST expressions corresponding to the elements in "list".
650
 *
651
 * The list is assumed to contain at least one element.
652
 * If the list contains exactly one element, then the returned isl_ast_expr
653
 * simply computes that affine expression.
654
 * If the list contains more than one element, then we sort it
655
 * using a fairly abitrary but hopefully reasonably stable order.
656
 */
657
static __isl_give isl_ast_expr *reduce_list(enum isl_ast_op_type type,
658
  __isl_keep isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
659
1.27k
{
660
1.27k
  int i, n;
661
1.27k
  isl_ctx *ctx;
662
1.27k
  isl_ast_expr *expr;
663
1.27k
664
1.27k
  if (!list)
665
0
    return NULL;
666
1.27k
667
1.27k
  n = isl_pw_aff_list_n_pw_aff(list);
668
1.27k
669
1.27k
  if (n == 1)
670
1.21k
    return isl_ast_build_expr_from_pw_aff_internal(build,
671
1.21k
        isl_pw_aff_list_get_pw_aff(list, 0));
672
1.27k
673
57
  ctx = isl_pw_aff_list_get_ctx(list);
674
57
  expr = isl_ast_expr_alloc_op(ctx, type, n);
675
57
  if (!expr)
676
0
    return NULL;
677
57
678
57
  list = isl_pw_aff_list_copy(list);
679
57
  list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
680
57
  if (!list)
681
0
    return isl_ast_expr_free(expr);
682
57
683
177
  
for (i = 0; 57
i < n177
;
++i120
)
{120
684
120
    isl_ast_expr *expr_i;
685
120
686
120
    expr_i = isl_ast_build_expr_from_pw_aff_internal(build,
687
120
        isl_pw_aff_list_get_pw_aff(list, i));
688
120
    if (!expr_i)
689
0
      goto error;
690
120
    expr->u.op.args[i] = expr_i;
691
120
  }
692
57
693
57
  isl_pw_aff_list_free(list);
694
57
  return expr;
695
0
error:
696
0
  isl_pw_aff_list_free(list);
697
0
  isl_ast_expr_free(expr);
698
0
  return NULL;
699
57
}
700
701
/* Add guards implied by the "generated constraints",
702
 * but not (necessarily) enforced by the generated AST to "guard".
703
 * In particular, if there is any stride constraints,
704
 * then add the guard implied by those constraints.
705
 * If we have generated a degenerate loop, then add the guard
706
 * implied by "bounds" on the outer dimensions, i.e., the guard
707
 * that ensures that the single value actually exists.
708
 * Since there may also be guards implied by a combination
709
 * of these constraints, we first combine them before
710
 * deriving the implied constraints.
711
 */
712
static __isl_give isl_set *add_implied_guards(__isl_take isl_set *guard,
713
  int degenerate, __isl_keep isl_basic_set *bounds,
714
  __isl_keep isl_ast_build *build)
715
637
{
716
637
  int depth, has_stride;
717
637
  isl_space *space;
718
637
  isl_set *dom, *set;
719
637
720
637
  depth = isl_ast_build_get_depth(build);
721
637
  has_stride = isl_ast_build_has_stride(build, depth);
722
637
  if (
!has_stride && 637
!degenerate626
)
723
626
    return guard;
724
637
725
11
  space = isl_basic_set_get_space(bounds);
726
11
  dom = isl_set_universe(space);
727
11
728
11
  if (
degenerate11
)
{0
729
0
    bounds = isl_basic_set_copy(bounds);
730
0
    bounds = isl_basic_set_drop_constraints_not_involving_dims(
731
0
          bounds, isl_dim_set, depth, 1);
732
0
    set = isl_set_from_basic_set(bounds);
733
0
    dom = isl_set_intersect(dom, set);
734
0
  }
735
11
736
11
  if (
has_stride11
)
{11
737
11
    set = isl_ast_build_get_stride_constraint(build);
738
11
    dom = isl_set_intersect(dom, set);
739
11
  }
740
11
741
11
  dom = isl_set_eliminate(dom, isl_dim_set, depth, 1);
742
11
  dom = isl_ast_build_compute_gist(build, dom);
743
11
  guard = isl_set_intersect(guard, dom);
744
11
745
11
  return guard;
746
637
}
747
748
/* Update "graft" based on "sub_build" for the degenerate case.
749
 *
750
 * "build" is the build in which graft->node was created
751
 * "sub_build" contains information about the current level itself,
752
 * including the single value attained.
753
 *
754
 * We set the initialization part of the for loop to the single
755
 * value attained by the current dimension.
756
 * The increment and condition are not strictly needed as the are known
757
 * to be "1" and "iterator <= value" respectively.
758
 */
759
static __isl_give isl_ast_graft *refine_degenerate(
760
  __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build,
761
  __isl_keep isl_ast_build *sub_build)
762
0
{
763
0
  isl_pw_aff *value;
764
0
765
0
  if (
!graft || 0
!sub_build0
)
766
0
    return isl_ast_graft_free(graft);
767
0
768
0
  value = isl_pw_aff_copy(sub_build->value);
769
0
770
0
  graft->node->u.f.init = isl_ast_build_expr_from_pw_aff_internal(build,
771
0
            value);
772
0
  if (!graft->node->u.f.init)
773
0
    return isl_ast_graft_free(graft);
774
0
775
0
  return graft;
776
0
}
777
778
/* Return the intersection of constraints in "list" as a set.
779
 */
780
static __isl_give isl_set *intersect_constraints(
781
  __isl_keep isl_constraint_list *list)
782
0
{
783
0
  int i, n;
784
0
  isl_basic_set *bset;
785
0
786
0
  n = isl_constraint_list_n_constraint(list);
787
0
  if (n < 1)
788
0
    isl_die(isl_constraint_list_get_ctx(list), isl_error_internal,
789
0
      "expecting at least one constraint", return NULL);
790
0
791
0
  bset = isl_basic_set_from_constraint(
792
0
        isl_constraint_list_get_constraint(list, 0));
793
0
  for (i = 1; 
i < n0
;
++i0
)
{0
794
0
    isl_basic_set *bset_i;
795
0
796
0
    bset_i = isl_basic_set_from_constraint(
797
0
        isl_constraint_list_get_constraint(list, i));
798
0
    bset = isl_basic_set_intersect(bset, bset_i);
799
0
  }
800
0
801
0
  return isl_set_from_basic_set(bset);
802
0
}
803
804
/* Compute the constraints on the outer dimensions enforced by
805
 * graft->node and add those constraints to graft->enforced,
806
 * in case the upper bound is expressed as a set "upper".
807
 *
808
 * In particular, if l(...) is a lower bound in "lower", and
809
 *
810
 *  -a i + f(...) >= 0    or  a i <= f(...)
811
 *
812
 * is an upper bound ocnstraint on the current dimension i,
813
 * then the for loop enforces the constraint
814
 *
815
 *  -a l(...) + f(...) >= 0   or  a l(...) <= f(...)
816
 *
817
 * We therefore simply take each lower bound in turn, plug it into
818
 * the upper bounds and compute the intersection over all lower bounds.
819
 *
820
 * If a lower bound is a rational expression, then
821
 * isl_basic_set_preimage_multi_aff will force this rational
822
 * expression to have only integer values.  However, the loop
823
 * itself does not enforce this integrality constraint.  We therefore
824
 * use the ceil of the lower bounds instead of the lower bounds themselves.
825
 * Other constraints will make sure that the for loop is only executed
826
 * when each of the lower bounds attains an integral value.
827
 * In particular, potentially rational values only occur in
828
 * lower_bound if the offset is a (seemingly) rational expression,
829
 * but then outer conditions will make sure that this rational expression
830
 * only attains integer values.
831
 */
832
static __isl_give isl_ast_graft *set_enforced_from_set(
833
  __isl_take isl_ast_graft *graft,
834
  __isl_keep isl_pw_aff_list *lower, int pos, __isl_keep isl_set *upper)
835
0
{
836
0
  isl_space *space;
837
0
  isl_basic_set *enforced;
838
0
  isl_pw_multi_aff *pma;
839
0
  int i, n;
840
0
841
0
  if (
!graft || 0
!lower0
)
842
0
    return isl_ast_graft_free(graft);
843
0
844
0
  space = isl_set_get_space(upper);
845
0
  enforced = isl_basic_set_universe(isl_space_copy(space));
846
0
847
0
  space = isl_space_map_from_set(space);
848
0
  pma = isl_pw_multi_aff_identity(space);
849
0
850
0
  n = isl_pw_aff_list_n_pw_aff(lower);
851
0
  for (i = 0; 
i < n0
;
++i0
)
{0
852
0
    isl_pw_aff *pa;
853
0
    isl_set *enforced_i;
854
0
    isl_basic_set *hull;
855
0
    isl_pw_multi_aff *pma_i;
856
0
857
0
    pa = isl_pw_aff_list_get_pw_aff(lower, i);
858
0
    pa = isl_pw_aff_ceil(pa);
859
0
    pma_i = isl_pw_multi_aff_copy(pma);
860
0
    pma_i = isl_pw_multi_aff_set_pw_aff(pma_i, pos, pa);
861
0
    enforced_i = isl_set_copy(upper);
862
0
    enforced_i = isl_set_preimage_pw_multi_aff(enforced_i, pma_i);
863
0
    hull = isl_set_simple_hull(enforced_i);
864
0
    enforced = isl_basic_set_intersect(enforced, hull);
865
0
  }
866
0
867
0
  isl_pw_multi_aff_free(pma);
868
0
869
0
  graft = isl_ast_graft_enforce(graft, enforced);
870
0
871
0
  return graft;
872
0
}
873
874
/* Compute the constraints on the outer dimensions enforced by
875
 * graft->node and add those constraints to graft->enforced,
876
 * in case the upper bound is expressed as
877
 * a list of affine expressions "upper".
878
 *
879
 * The enforced condition is that each lower bound expression is less
880
 * than or equal to each upper bound expression.
881
 */
882
static __isl_give isl_ast_graft *set_enforced_from_list(
883
  __isl_take isl_ast_graft *graft,
884
  __isl_keep isl_pw_aff_list *lower, __isl_keep isl_pw_aff_list *upper)
885
637
{
886
637
  isl_set *cond;
887
637
  isl_basic_set *enforced;
888
637
889
637
  lower = isl_pw_aff_list_copy(lower);
890
637
  upper = isl_pw_aff_list_copy(upper);
891
637
  cond = isl_pw_aff_list_le_set(lower, upper);
892
637
  enforced = isl_set_simple_hull(cond);
893
637
  graft = isl_ast_graft_enforce(graft, enforced);
894
637
895
637
  return graft;
896
637
}
897
898
/* Does "aff" have a negative constant term?
899
 */
900
static isl_stat aff_constant_is_negative(__isl_take isl_set *set,
901
  __isl_take isl_aff *aff, void *user)
902
647
{
903
647
  int *neg = user;
904
647
  isl_val *v;
905
647
906
647
  v = isl_aff_get_constant_val(aff);
907
647
  *neg = isl_val_is_neg(v);
908
647
  isl_val_free(v);
909
647
  isl_set_free(set);
910
647
  isl_aff_free(aff);
911
647
912
487
  return *neg ? 
isl_stat_ok160
:
isl_stat_error487
;
913
647
}
914
915
/* Does "pa" have a negative constant term over its entire domain?
916
 */
917
static isl_stat pw_aff_constant_is_negative(__isl_take isl_pw_aff *pa,
918
  void *user)
919
647
{
920
647
  isl_stat r;
921
647
  int *neg = user;
922
647
923
647
  r = isl_pw_aff_foreach_piece(pa, &aff_constant_is_negative, user);
924
647
  isl_pw_aff_free(pa);
925
647
926
487
  return (*neg && 
r >= 0160
) ?
isl_stat_ok160
:
isl_stat_error487
;
927
647
}
928
929
/* Does each element in "list" have a negative constant term?
930
 *
931
 * The callback terminates the iteration as soon an element has been
932
 * found that does not have a negative constant term.
933
 */
934
static int list_constant_is_negative(__isl_keep isl_pw_aff_list *list)
935
637
{
936
637
  int neg = 1;
937
637
938
637
  if (isl_pw_aff_list_foreach(list,
939
487
        &pw_aff_constant_is_negative, &neg) < 0 && neg)
940
0
    return -1;
941
637
942
637
  return neg;
943
637
}
944
945
/* Add 1 to each of the elements in "list", where each of these elements
946
 * is defined over the internal schedule space of "build".
947
 */
948
static __isl_give isl_pw_aff_list *list_add_one(
949
  __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
950
150
{
951
150
  int i, n;
952
150
  isl_space *space;
953
150
  isl_aff *aff;
954
150
  isl_pw_aff *one;
955
150
956
150
  space = isl_ast_build_get_space(build, 1);
957
150
  aff = isl_aff_zero_on_domain(isl_local_space_from_space(space));
958
150
  aff = isl_aff_add_constant_si(aff, 1);
959
150
  one = isl_pw_aff_from_aff(aff);
960
150
961
150
  n = isl_pw_aff_list_n_pw_aff(list);
962
300
  for (i = 0; 
i < n300
;
++i150
)
{150
963
150
    isl_pw_aff *pa;
964
150
    pa = isl_pw_aff_list_get_pw_aff(list, i);
965
150
    pa = isl_pw_aff_add(pa, isl_pw_aff_copy(one));
966
150
    list = isl_pw_aff_list_set_pw_aff(list, i, pa);
967
150
  }
968
150
969
150
  isl_pw_aff_free(one);
970
150
971
150
  return list;
972
150
}
973
974
/* Set the condition part of the for node graft->node in case
975
 * the upper bound is represented as a list of piecewise affine expressions.
976
 *
977
 * In particular, set the condition to
978
 *
979
 *  iterator <= min(list of upper bounds)
980
 *
981
 * If each of the upper bounds has a negative constant term, then
982
 * set the condition to
983
 *
984
 *  iterator < min(list of (upper bound + 1)s)
985
 *
986
 */
987
static __isl_give isl_ast_graft *set_for_cond_from_list(
988
  __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *list,
989
  __isl_keep isl_ast_build *build)
990
637
{
991
637
  int neg;
992
637
  isl_ast_expr *bound, *iterator, *cond;
993
637
  enum isl_ast_op_type type = isl_ast_op_le;
994
637
995
637
  if (
!graft || 637
!list637
)
996
0
    return isl_ast_graft_free(graft);
997
637
998
637
  neg = list_constant_is_negative(list);
999
637
  if (neg < 0)
1000
0
    return isl_ast_graft_free(graft);
1001
637
  list = isl_pw_aff_list_copy(list);
1002
637
  if (
neg637
)
{150
1003
150
    list = list_add_one(list, build);
1004
150
    type = isl_ast_op_lt;
1005
150
  }
1006
637
1007
637
  bound = reduce_list(isl_ast_op_min, list, build);
1008
637
  iterator = isl_ast_expr_copy(graft->node->u.f.iterator);
1009
637
  cond = isl_ast_expr_alloc_binary(type, iterator, bound);
1010
637
  graft->node->u.f.cond = cond;
1011
637
1012
637
  isl_pw_aff_list_free(list);
1013
637
  if (!graft->node->u.f.cond)
1014
0
    return isl_ast_graft_free(graft);
1015
637
  return graft;
1016
637
}
1017
1018
/* Set the condition part of the for node graft->node in case
1019
 * the upper bound is represented as a set.
1020
 */
1021
static __isl_give isl_ast_graft *set_for_cond_from_set(
1022
  __isl_take isl_ast_graft *graft, __isl_keep isl_set *set,
1023
  __isl_keep isl_ast_build *build)
1024
0
{
1025
0
  isl_ast_expr *cond;
1026
0
1027
0
  if (!graft)
1028
0
    return NULL;
1029
0
1030
0
  cond = isl_ast_build_expr_from_set_internal(build, isl_set_copy(set));
1031
0
  graft->node->u.f.cond = cond;
1032
0
  if (!graft->node->u.f.cond)
1033
0
    return isl_ast_graft_free(graft);
1034
0
  return graft;
1035
0
}
1036
1037
/* Construct an isl_ast_expr for the increment (i.e., stride) of
1038
 * the current dimension.
1039
 */
1040
static __isl_give isl_ast_expr *for_inc(__isl_keep isl_ast_build *build)
1041
637
{
1042
637
  int depth;
1043
637
  isl_val *v;
1044
637
  isl_ctx *ctx;
1045
637
1046
637
  if (!build)
1047
0
    return NULL;
1048
637
  ctx = isl_ast_build_get_ctx(build);
1049
637
  depth = isl_ast_build_get_depth(build);
1050
637
1051
637
  if (!isl_ast_build_has_stride(build, depth))
1052
626
    return isl_ast_expr_alloc_int_si(ctx, 1);
1053
637
1054
11
  v = isl_ast_build_get_stride(build, depth);
1055
11
  return isl_ast_expr_from_val(v);
1056
637
}
1057
1058
/* Should we express the loop condition as
1059
 *
1060
 *  iterator <= min(list of upper bounds)
1061
 *
1062
 * or as a conjunction of constraints?
1063
 *
1064
 * The first is constructed from a list of upper bounds.
1065
 * The second is constructed from a set.
1066
 *
1067
 * If there are no upper bounds in "constraints", then this could mean
1068
 * that "domain" simply doesn't have an upper bound or that we didn't
1069
 * pick any upper bound.  In the first case, we want to generate the
1070
 * loop condition as a(n empty) conjunction of constraints
1071
 * In the second case, we will compute
1072
 * a single upper bound from "domain" and so we use the list form.
1073
 *
1074
 * If there are upper bounds in "constraints",
1075
 * then we use the list form iff the atomic_upper_bound option is set.
1076
 */
1077
static int use_upper_bound_list(isl_ctx *ctx, int n_upper,
1078
  __isl_keep isl_set *domain, int depth)
1079
637
{
1080
637
  if (n_upper > 0)
1081
637
    return isl_options_get_ast_build_atomic_upper_bound(ctx);
1082
637
  else
1083
0
    return isl_set_dim_has_upper_bound(domain, isl_dim_set, depth);
1084
637
}
1085
1086
/* Fill in the expressions of the for node in graft->node.
1087
 *
1088
 * In particular,
1089
 * - set the initialization part of the loop to the maximum of the lower bounds
1090
 * - extract the increment from the stride of the current dimension
1091
 * - construct the for condition either based on a list of upper bounds
1092
 *  or on a set of upper bound constraints.
1093
 */
1094
static __isl_give isl_ast_graft *set_for_node_expressions(
1095
  __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower,
1096
  int use_list, __isl_keep isl_pw_aff_list *upper_list,
1097
  __isl_keep isl_set *upper_set, __isl_keep isl_ast_build *build)
1098
637
{
1099
637
  isl_ast_node *node;
1100
637
1101
637
  if (!graft)
1102
0
    return NULL;
1103
637
1104
637
  build = isl_ast_build_copy(build);
1105
637
1106
637
  node = graft->node;
1107
637
  node->u.f.init = reduce_list(isl_ast_op_max, lower, build);
1108
637
  node->u.f.inc = for_inc(build);
1109
637
1110
637
  if (
!node->u.f.init || 637
!node->u.f.inc637
)
1111
0
    graft = isl_ast_graft_free(graft);
1112
637
1113
637
  if (use_list)
1114
637
    graft = set_for_cond_from_list(graft, upper_list, build);
1115
637
  else
1116
0
    graft = set_for_cond_from_set(graft, upper_set, build);
1117
637
1118
637
  isl_ast_build_free(build);
1119
637
1120
637
  return graft;
1121
637
}
1122
1123
/* Update "graft" based on "bounds" and "domain" for the generic,
1124
 * non-degenerate, case.
1125
 *
1126
 * "c_lower" and "c_upper" contain the lower and upper bounds
1127
 * that the loop node should express.
1128
 * "domain" is the subset of the intersection of the constraints
1129
 * for which some code is executed.
1130
 *
1131
 * There may be zero lower bounds or zero upper bounds in "constraints"
1132
 * in case the list of constraints was created
1133
 * based on the atomic option or based on separation with explicit bounds.
1134
 * In that case, we use "domain" to derive lower and/or upper bounds.
1135
 *
1136
 * We first compute a list of one or more lower bounds.
1137
 *
1138
 * Then we decide if we want to express the condition as
1139
 *
1140
 *  iterator <= min(list of upper bounds)
1141
 *
1142
 * or as a conjunction of constraints.
1143
 *
1144
 * The set of enforced constraints is then computed either based on
1145
 * a list of upper bounds or on a set of upper bound constraints.
1146
 * We do not compute any enforced constraints if we were forced
1147
 * to compute a lower or upper bound using exact_bound.  The domains
1148
 * of the resulting expressions may imply some bounds on outer dimensions
1149
 * that we do not want to appear in the enforced constraints since
1150
 * they are not actually enforced by the corresponding code.
1151
 *
1152
 * Finally, we fill in the expressions of the for node.
1153
 */
1154
static __isl_give isl_ast_graft *refine_generic_bounds(
1155
  __isl_take isl_ast_graft *graft,
1156
  __isl_take isl_constraint_list *c_lower,
1157
  __isl_take isl_constraint_list *c_upper,
1158
  __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
1159
637
{
1160
637
  int depth;
1161
637
  isl_ctx *ctx;
1162
637
  isl_pw_aff_list *lower;
1163
637
  int use_list;
1164
637
  isl_set *upper_set = NULL;
1165
637
  isl_pw_aff_list *upper_list = NULL;
1166
637
  int n_lower, n_upper;
1167
637
1168
637
  if (
!graft || 637
!c_lower637
||
!c_upper637
||
!build637
)
1169
0
    goto error;
1170
637
1171
637
  depth = isl_ast_build_get_depth(build);
1172
637
  ctx = isl_ast_graft_get_ctx(graft);
1173
637
1174
637
  n_lower = isl_constraint_list_n_constraint(c_lower);
1175
637
  n_upper = isl_constraint_list_n_constraint(c_upper);
1176
637
1177
637
  use_list = use_upper_bound_list(ctx, n_upper, domain, depth);
1178
637
1179
637
  lower = lower_bounds(c_lower, depth, domain, build);
1180
637
1181
637
  if (use_list)
1182
637
    upper_list = upper_bounds(c_upper, depth, domain, build);
1183
0
  else 
if (0
n_upper > 00
)
1184
0
    upper_set = intersect_constraints(c_upper);
1185
0
  else
1186
0
    upper_set = isl_set_universe(isl_set_get_space(domain));
1187
637
1188
637
  if (
n_lower == 0 || 637
n_upper == 0637
)
1189
0
    ;
1190
637
  else 
if (637
use_list637
)
1191
637
    graft = set_enforced_from_list(graft, lower, upper_list);
1192
637
  else
1193
0
    graft = set_enforced_from_set(graft, lower, depth, upper_set);
1194
637
1195
637
  graft = set_for_node_expressions(graft, lower, use_list, upper_list,
1196
637
          upper_set, build);
1197
637
1198
637
  isl_pw_aff_list_free(lower);
1199
637
  isl_pw_aff_list_free(upper_list);
1200
637
  isl_set_free(upper_set);
1201
637
  isl_constraint_list_free(c_lower);
1202
637
  isl_constraint_list_free(c_upper);
1203
637
1204
637
  return graft;
1205
0
error:
1206
0
  isl_constraint_list_free(c_lower);
1207
0
  isl_constraint_list_free(c_upper);
1208
0
  return isl_ast_graft_free(graft);
1209
637
}
1210
1211
/* Internal data structure used inside count_constraints to keep
1212
 * track of the number of constraints that are independent of dimension "pos",
1213
 * the lower bounds in "pos" and the upper bounds in "pos".
1214
 */
1215
struct isl_ast_count_constraints_data {
1216
  int pos;
1217
1218
  int n_indep;
1219
  int n_lower;
1220
  int n_upper;
1221
};
1222
1223
/* Increment data->n_indep, data->lower or data->upper depending
1224
 * on whether "c" is independenct of dimensions data->pos,
1225
 * a lower bound or an upper bound.
1226
 */
1227
static isl_stat count_constraints(__isl_take isl_constraint *c, void *user)
1228
1.38k
{
1229
1.38k
  struct isl_ast_count_constraints_data *data = user;
1230
1.38k
1231
1.38k
  if (isl_constraint_is_lower_bound(c, isl_dim_set, data->pos))
1232
645
    data->n_lower++;
1233
741
  else 
if (741
isl_constraint_is_upper_bound(c, isl_dim_set, data->pos)741
)
1234
692
    data->n_upper++;
1235
741
  else
1236
49
    data->n_indep++;
1237
1.38k
1238
1.38k
  isl_constraint_free(c);
1239
1.38k
1240
1.38k
  return isl_stat_ok;
1241
1.38k
}
1242
1243
/* Update "graft" based on "bounds" and "domain" for the generic,
1244
 * non-degenerate, case.
1245
 *
1246
 * "list" respresent the list of bounds that need to be encoded by
1247
 * the for loop.  Only the constraints that involve the iterator
1248
 * are relevant here.  The other constraints are taken care of by
1249
 * the caller and are included in the generated constraints of "build".
1250
 * "domain" is the subset of the intersection of the constraints
1251
 * for which some code is executed.
1252
 * "build" is the build in which graft->node was created.
1253
 *
1254
 * We separate lower bounds, upper bounds and constraints that
1255
 * are independent of the loop iterator.
1256
 *
1257
 * The actual for loop bounds are generated in refine_generic_bounds.
1258
 */
1259
static __isl_give isl_ast_graft *refine_generic_split(
1260
  __isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *list,
1261
  __isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
1262
637
{
1263
637
  struct isl_ast_count_constraints_data data;
1264
637
  isl_constraint_list *lower;
1265
637
  isl_constraint_list *upper;
1266
637
1267
637
  if (!list)
1268
0
    return isl_ast_graft_free(graft);
1269
637
1270
637
  data.pos = isl_ast_build_get_depth(build);
1271
637
1272
637
  list = isl_constraint_list_sort(list, &cmp_constraint, &data.pos);
1273
637
  if (!list)
1274
0
    return isl_ast_graft_free(graft);
1275
637
1276
637
  data.n_indep = data.n_lower = data.n_upper = 0;
1277
637
  if (
isl_constraint_list_foreach(list, &count_constraints, &data) < 0637
)
{0
1278
0
    isl_constraint_list_free(list);
1279
0
    return isl_ast_graft_free(graft);
1280
0
  }
1281
637
1282
637
  lower = isl_constraint_list_drop(list, 0, data.n_indep);
1283
637
  upper = isl_constraint_list_copy(lower);
1284
637
  lower = isl_constraint_list_drop(lower, data.n_lower, data.n_upper);
1285
637
  upper = isl_constraint_list_drop(upper, 0, data.n_lower);
1286
637
1287
637
  return refine_generic_bounds(graft, lower, upper, domain, build);
1288
637
}
1289
1290
/* Update "graft" based on "bounds" and "domain" for the generic,
1291
 * non-degenerate, case.
1292
 *
1293
 * "bounds" respresent the bounds that need to be encoded by
1294
 * the for loop (or a guard around the for loop).
1295
 * "domain" is the subset of "bounds" for which some code is executed.
1296
 * "build" is the build in which graft->node was created.
1297
 *
1298
 * We break up "bounds" into a list of constraints and continue with
1299
 * refine_generic_split.
1300
 */
1301
static __isl_give isl_ast_graft *refine_generic(
1302
  __isl_take isl_ast_graft *graft,
1303
  __isl_keep isl_basic_set *bounds, __isl_keep isl_set *domain,
1304
  __isl_keep isl_ast_build *build)
1305
637
{
1306
637
  isl_constraint_list *list;
1307
637
1308
637
  if (
!build || 637
!graft637
)
1309
0
    return isl_ast_graft_free(graft);
1310
637
1311
637
  list = isl_basic_set_get_constraint_list(bounds);
1312
637
1313
637
  graft = refine_generic_split(graft, list, domain, build);
1314
637
1315
637
  return graft;
1316
637
}
1317
1318
/* Create a for node for the current level.
1319
 *
1320
 * Mark the for node degenerate if "degenerate" is set.
1321
 */
1322
static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build,
1323
  int degenerate)
1324
637
{
1325
637
  int depth;
1326
637
  isl_id *id;
1327
637
  isl_ast_node *node;
1328
637
1329
637
  if (!build)
1330
0
    return NULL;
1331
637
1332
637
  depth = isl_ast_build_get_depth(build);
1333
637
  id = isl_ast_build_get_iterator_id(build, depth);
1334
637
  node = isl_ast_node_alloc_for(id);
1335
637
  if (degenerate)
1336
0
    node = isl_ast_node_for_mark_degenerate(node);
1337
637
1338
637
  return node;
1339
637
}
1340
1341
/* If the ast_build_exploit_nested_bounds option is set, then return
1342
 * the constraints enforced by all elements in "list".
1343
 * Otherwise, return the universe.
1344
 */
1345
static __isl_give isl_basic_set *extract_shared_enforced(
1346
  __isl_keep isl_ast_graft_list *list, __isl_keep isl_ast_build *build)
1347
2.16k
{
1348
2.16k
  isl_ctx *ctx;
1349
2.16k
  isl_space *space;
1350
2.16k
1351
2.16k
  if (!list)
1352
0
    return NULL;
1353
2.16k
1354
2.16k
  ctx = isl_ast_graft_list_get_ctx(list);
1355
2.16k
  if (isl_options_get_ast_build_exploit_nested_bounds(ctx))
1356
2.16k
    return isl_ast_graft_list_extract_shared_enforced(list, build);
1357
2.16k
1358
2
  space = isl_ast_build_get_space(build, 1);
1359
2
  return isl_basic_set_universe(space);
1360
2.16k
}
1361
1362
/* Return the pending constraints of "build" that are not already taken
1363
 * care of (by a combination of "enforced" and the generated constraints
1364
 * of "build").
1365
 */
1366
static __isl_give isl_set *extract_pending(__isl_keep isl_ast_build *build,
1367
  __isl_keep isl_basic_set *enforced)
1368
2.16k
{
1369
2.16k
  isl_set *guard, *context;
1370
2.16k
1371
2.16k
  guard = isl_ast_build_get_pending(build);
1372
2.16k
  context = isl_set_from_basic_set(isl_basic_set_copy(enforced));
1373
2.16k
  context = isl_set_intersect(context,
1374
2.16k
          isl_ast_build_get_generated(build));
1375
2.16k
  return isl_set_gist(guard, context);
1376
2.16k
}
1377
1378
/* Create an AST node for the current dimension based on
1379
 * the schedule domain "bounds" and return the node encapsulated
1380
 * in an isl_ast_graft.
1381
 *
1382
 * "executed" is the current inverse schedule, taking into account
1383
 * the bounds in "bounds"
1384
 * "domain" is the domain of "executed", with inner dimensions projected out.
1385
 * It may be a strict subset of "bounds" in case "bounds" was created
1386
 * based on the atomic option or based on separation with explicit bounds.
1387
 *
1388
 * "domain" may satisfy additional equalities that result
1389
 * from intersecting "executed" with "bounds" in add_node.
1390
 * It may also satisfy some global constraints that were dropped out because
1391
 * we performed separation with explicit bounds.
1392
 * The very first step is then to copy these constraints to "bounds".
1393
 *
1394
 * Since we may be calling before_each_for and after_each_for
1395
 * callbacks, we record the current inverse schedule in the build.
1396
 *
1397
 * We consider three builds,
1398
 * "build" is the one in which the current level is created,
1399
 * "body_build" is the build in which the next level is created,
1400
 * "sub_build" is essentially the same as "body_build", except that
1401
 * the depth has not been increased yet.
1402
 *
1403
 * "build" already contains information (in strides and offsets)
1404
 * about the strides at the current level, but this information is not
1405
 * reflected in the build->domain.
1406
 * We first add this information and the "bounds" to the sub_build->domain.
1407
 * isl_ast_build_set_loop_bounds adds the stride information and
1408
 * checks whether the current dimension attains
1409
 * only a single value and whether this single value can be represented using
1410
 * a single affine expression.
1411
 * In the first case, the current level is considered "degenerate".
1412
 * In the second, sub-case, the current level is considered "eliminated".
1413
 * Eliminated levels don't need to be reflected in the AST since we can
1414
 * simply plug in the affine expression.  For degenerate, but non-eliminated,
1415
 * levels, we do introduce a for node, but mark is as degenerate so that
1416
 * it can be printed as an assignment of the single value to the loop
1417
 * "iterator".
1418
 *
1419
 * If the current level is eliminated, we explicitly plug in the value
1420
 * for the current level found by isl_ast_build_set_loop_bounds in the
1421
 * inverse schedule.  This ensures that if we are working on a slice
1422
 * of the domain based on information available in the inverse schedule
1423
 * and the build domain, that then this information is also reflected
1424
 * in the inverse schedule.  This operation also eliminates the current
1425
 * dimension from the inverse schedule making sure no inner dimensions depend
1426
 * on the current dimension.  Otherwise, we create a for node, marking
1427
 * it degenerate if appropriate.  The initial for node is still incomplete
1428
 * and will be completed in either refine_degenerate or refine_generic.
1429
 *
1430
 * We then generate a sequence of grafts for the next level,
1431
 * create a surrounding graft for the current level and insert
1432
 * the for node we created (if the current level is not eliminated).
1433
 * Before creating a graft for the current level, we first extract
1434
 * hoistable constraints from the child guards and combine them
1435
 * with the pending constraints in the build.  These constraints
1436
 * are used to simplify the child guards and then added to the guard
1437
 * of the current graft to ensure that they will be generated.
1438
 * If the hoisted guard is a disjunction, then we use it directly
1439
 * to gist the guards on the children before intersect it with the
1440
 * pending constraints.  We do so because this disjunction is typically
1441
 * identical to the guards on the children such that these guards
1442
 * can be effectively removed completely.  After the intersection,
1443
 * the gist operation would have a harder time figuring this out.
1444
 *
1445
 * Finally, we set the bounds of the for loop in either
1446
 * refine_degenerate or refine_generic.
1447
 * We do so in a context where the pending constraints of the build
1448
 * have been replaced by the guard of the current graft.
1449
 */
1450
static __isl_give isl_ast_graft *create_node_scaled(
1451
  __isl_take isl_union_map *executed,
1452
  __isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
1453
  __isl_take isl_ast_build *build)
1454
2.16k
{
1455
2.16k
  int depth;
1456
2.16k
  int degenerate, eliminated;
1457
2.16k
  isl_basic_set *hull;
1458
2.16k
  isl_basic_set *enforced;
1459
2.16k
  isl_set *guard, *hoisted;
1460
2.16k
  isl_ast_node *node = NULL;
1461
2.16k
  isl_ast_graft *graft;
1462
2.16k
  isl_ast_graft_list *children;
1463
2.16k
  isl_ast_build *sub_build;
1464
2.16k
  isl_ast_build *body_build;
1465
2.16k
1466
2.16k
  domain = isl_ast_build_eliminate_divs(build, domain);
1467
2.16k
  domain = isl_set_detect_equalities(domain);
1468
2.16k
  hull = isl_set_unshifted_simple_hull(isl_set_copy(domain));
1469
2.16k
  bounds = isl_basic_set_intersect(bounds, hull);
1470
2.16k
  build = isl_ast_build_set_executed(build, isl_union_map_copy(executed));
1471
2.16k
1472
2.16k
  depth = isl_ast_build_get_depth(build);
1473
2.16k
  sub_build = isl_ast_build_copy(build);
1474
2.16k
  bounds = isl_basic_set_remove_redundancies(bounds);
1475
2.16k
  bounds = isl_ast_build_specialize_basic_set(sub_build, bounds);
1476
2.16k
  sub_build = isl_ast_build_set_loop_bounds(sub_build,
1477
2.16k
            isl_basic_set_copy(bounds));
1478
2.16k
  degenerate = isl_ast_build_has_value(sub_build);
1479
2.16k
  eliminated = isl_ast_build_has_affine_value(sub_build, depth);
1480
2.16k
  if (
degenerate < 0 || 2.16k
eliminated < 02.16k
)
1481
0
    executed = isl_union_map_free(executed);
1482
2.16k
  if (!degenerate)
1483
637
    bounds = isl_ast_build_compute_gist_basic_set(build, bounds);
1484
2.16k
  sub_build = isl_ast_build_set_pending_generated(sub_build,
1485
2.16k
            isl_basic_set_copy(bounds));
1486
2.16k
  if (eliminated)
1487
1.53k
    executed = plug_in_values(executed, sub_build);
1488
2.16k
  else
1489
637
    node = create_for(build, degenerate);
1490
2.16k
1491
2.16k
  body_build = isl_ast_build_copy(sub_build);
1492
2.16k
  body_build = isl_ast_build_increase_depth(body_build);
1493
2.16k
  if (!eliminated)
1494
637
    node = before_each_for(node, body_build);
1495
2.16k
  children = generate_next_level(executed,
1496
2.16k
            isl_ast_build_copy(body_build));
1497
2.16k
1498
2.16k
  enforced = extract_shared_enforced(children, build);
1499
2.16k
  guard = extract_pending(sub_build, enforced);
1500
2.16k
  hoisted = isl_ast_graft_list_extract_hoistable_guard(children, build);
1501
2.16k
  if (isl_set_n_basic_set(hoisted) > 1)
1502
13
    children = isl_ast_graft_list_gist_guards(children,
1503
13
                isl_set_copy(hoisted));
1504
2.16k
  guard = isl_set_intersect(guard, hoisted);
1505
2.16k
  if (!eliminated)
1506
637
    guard = add_implied_guards(guard, degenerate, bounds, build);
1507
2.16k
1508
2.16k
  graft = isl_ast_graft_alloc_from_children(children,
1509
2.16k
          isl_set_copy(guard), enforced, build, sub_build);
1510
2.16k
1511
2.16k
  if (
!eliminated2.16k
)
{637
1512
637
    isl_ast_build *for_build;
1513
637
1514
637
    graft = isl_ast_graft_insert_for(graft, node);
1515
637
    for_build = isl_ast_build_copy(build);
1516
637
    for_build = isl_ast_build_replace_pending_by_guard(for_build,
1517
637
              isl_set_copy(guard));
1518
637
    if (degenerate)
1519
0
      graft = refine_degenerate(graft, for_build, sub_build);
1520
637
    else
1521
637
      graft = refine_generic(graft, bounds,
1522
637
          domain, for_build);
1523
637
    isl_ast_build_free(for_build);
1524
637
  }
1525
2.16k
  isl_set_free(guard);
1526
2.16k
  if (!eliminated)
1527
637
    graft = after_each_for(graft, body_build);
1528
2.16k
1529
2.16k
  isl_ast_build_free(body_build);
1530
2.16k
  isl_ast_build_free(sub_build);
1531
2.16k
  isl_ast_build_free(build);
1532
2.16k
  isl_basic_set_free(bounds);
1533
2.16k
  isl_set_free(domain);
1534
2.16k
1535
2.16k
  return graft;
1536
2.16k
}
1537
1538
/* Internal data structure for checking if all constraints involving
1539
 * the input dimension "depth" are such that the other coefficients
1540
 * are multiples of "m", reducing "m" if they are not.
1541
 * If "m" is reduced all the way down to "1", then the check has failed
1542
 * and we break out of the iteration.
1543
 */
1544
struct isl_check_scaled_data {
1545
  int depth;
1546
  isl_val *m;
1547
};
1548
1549
/* If constraint "c" involves the input dimension data->depth,
1550
 * then make sure that all the other coefficients are multiples of data->m,
1551
 * reducing data->m if needed.
1552
 * Break out of the iteration if data->m has become equal to "1".
1553
 */
1554
static isl_stat constraint_check_scaled(__isl_take isl_constraint *c,
1555
  void *user)
1556
21
{
1557
21
  struct isl_check_scaled_data *data = user;
1558
21
  int i, j, n;
1559
21
  enum isl_dim_type t[] = { isl_dim_param, isl_dim_in, isl_dim_out,
1560
21
            isl_dim_div };
1561
21
1562
21
  if (
!isl_constraint_involves_dims(c, isl_dim_in, data->depth, 1)21
)
{9
1563
9
    isl_constraint_free(c);
1564
9
    return isl_stat_ok;
1565
9
  }
1566
21
1567
45
  
for (i = 0; 12
i < 445
;
++i33
)
{39
1568
39
    n = isl_constraint_dim(c, t[i]);
1569
80
    for (j = 0; 
j < n80
;
++j41
)
{47
1570
47
      isl_val *d;
1571
47
1572
47
      if (
t[i] == isl_dim_in && 47
j == data->depth26
)
1573
12
        continue;
1574
35
      
if (35
!isl_constraint_involves_dims(c, t[i], j, 1)35
)
1575
23
        continue;
1576
12
      d = isl_constraint_get_coefficient_val(c, t[i], j);
1577
12
      data->m = isl_val_gcd(data->m, d);
1578
12
      if (isl_val_is_one(data->m))
1579
6
        break;
1580
12
    }
1581
39
    if (j < n)
1582
6
      break;
1583
39
  }
1584
12
1585
12
  isl_constraint_free(c);
1586
12
1587
6
  return i < 4 ? 
isl_stat_error6
:
isl_stat_ok6
;
1588
21
}
1589
1590
/* For each constraint of "bmap" that involves the input dimension data->depth,
1591
 * make sure that all the other coefficients are multiples of data->m,
1592
 * reducing data->m if needed.
1593
 * Break out of the iteration if data->m has become equal to "1".
1594
 */
1595
static isl_stat basic_map_check_scaled(__isl_take isl_basic_map *bmap,
1596
  void *user)
1597
6
{
1598
6
  isl_stat r;
1599
6
1600
6
  r = isl_basic_map_foreach_constraint(bmap,
1601
6
            &constraint_check_scaled, user);
1602
6
  isl_basic_map_free(bmap);
1603
6
1604
6
  return r;
1605
6
}
1606
1607
/* For each constraint of "map" that involves the input dimension data->depth,
1608
 * make sure that all the other coefficients are multiples of data->m,
1609
 * reducing data->m if needed.
1610
 * Break out of the iteration if data->m has become equal to "1".
1611
 */
1612
static isl_stat map_check_scaled(__isl_take isl_map *map, void *user)
1613
6
{
1614
6
  isl_stat r;
1615
6
1616
6
  r = isl_map_foreach_basic_map(map, &basic_map_check_scaled, user);
1617
6
  isl_map_free(map);
1618
6
1619
6
  return r;
1620
6
}
1621
1622
/* Create an AST node for the current dimension based on
1623
 * the schedule domain "bounds" and return the node encapsulated
1624
 * in an isl_ast_graft.
1625
 *
1626
 * "executed" is the current inverse schedule, taking into account
1627
 * the bounds in "bounds"
1628
 * "domain" is the domain of "executed", with inner dimensions projected out.
1629
 *
1630
 *
1631
 * Before moving on to the actual AST node construction in create_node_scaled,
1632
 * we first check if the current dimension is strided and if we can scale
1633
 * down this stride.  Note that we only do this if the ast_build_scale_strides
1634
 * option is set.
1635
 *
1636
 * In particular, let the current dimension take on values
1637
 *
1638
 *  f + s a
1639
 *
1640
 * with a an integer.  We check if we can find an integer m that (obviously)
1641
 * divides both f and s.
1642
 *
1643
 * If so, we check if the current dimension only appears in constraints
1644
 * where the coefficients of the other variables are multiples of m.
1645
 * We perform this extra check to avoid the risk of introducing
1646
 * divisions by scaling down the current dimension.
1647
 *
1648
 * If so, we scale the current dimension down by a factor of m.
1649
 * That is, we plug in
1650
 *
1651
 *  i = m i'              (1)
1652
 *
1653
 * Note that in principle we could always scale down strided loops
1654
 * by plugging in
1655
 *
1656
 *  i = f + s i'
1657
 *
1658
 * but this may result in i' taking on larger values than the original i,
1659
 * due to the shift by "f".
1660
 * By constrast, the scaling in (1) can only reduce the (absolute) value "i".
1661
 */
1662
static __isl_give isl_ast_graft *create_node(__isl_take isl_union_map *executed,
1663
  __isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
1664
  __isl_take isl_ast_build *build)
1665
2.16k
{
1666
2.16k
  struct isl_check_scaled_data data;
1667
2.16k
  isl_ctx *ctx;
1668
2.16k
  isl_aff *offset;
1669
2.16k
  isl_val *d;
1670
2.16k
1671
2.16k
  ctx = isl_ast_build_get_ctx(build);
1672
2.16k
  if (!isl_options_get_ast_build_scale_strides(ctx))
1673
0
    return create_node_scaled(executed, bounds, domain, build);
1674
2.16k
1675
2.16k
  data.depth = isl_ast_build_get_depth(build);
1676
2.16k
  if (!isl_ast_build_has_stride(build, data.depth))
1677
2.15k
    return create_node_scaled(executed, bounds, domain, build);
1678
2.16k
1679
14
  offset = isl_ast_build_get_offset(build, data.depth);
1680
14
  data.m = isl_ast_build_get_stride(build, data.depth);
1681
14
  if (!data.m)
1682
0
    offset = isl_aff_free(offset);
1683
14
  offset = isl_aff_scale_down_val(offset, isl_val_copy(data.m));
1684
14
  d = isl_aff_get_denominator_val(offset);
1685
14
  if (!d)
1686
0
    executed = isl_union_map_free(executed);
1687
14
1688
14
  if (
executed && 14
isl_val_is_divisible_by(data.m, d)14
)
1689
12
    data.m = isl_val_div(data.m, d);
1690
2
  else {
1691
2
    data.m = isl_val_set_si(data.m, 1);
1692
2
    isl_val_free(d);
1693
2
  }
1694
14
1695
14
  if (
!isl_val_is_one(data.m)14
)
{6
1696
6
    if (isl_union_map_foreach_map(executed, &map_check_scaled,
1697
6
            &data) < 0 &&
1698
6
        !isl_val_is_one(data.m))
1699
0
      executed = isl_union_map_free(executed);
1700
6
  }
1701
14
1702
14
  if (
!isl_val_is_one(data.m)14
)
{0
1703
0
    isl_space *space;
1704
0
    isl_multi_aff *ma;
1705
0
    isl_aff *aff;
1706
0
    isl_map *map;
1707
0
    isl_union_map *umap;
1708
0
1709
0
    space = isl_ast_build_get_space(build, 1);
1710
0
    space = isl_space_map_from_set(space);
1711
0
    ma = isl_multi_aff_identity(space);
1712
0
    aff = isl_multi_aff_get_aff(ma, data.depth);
1713
0
    aff = isl_aff_scale_val(aff, isl_val_copy(data.m));
1714
0
    ma = isl_multi_aff_set_aff(ma, data.depth, aff);
1715
0
1716
0
    bounds = isl_basic_set_preimage_multi_aff(bounds,
1717
0
            isl_multi_aff_copy(ma));
1718
0
    domain = isl_set_preimage_multi_aff(domain,
1719
0
            isl_multi_aff_copy(ma));
1720
0
    map = isl_map_reverse(isl_map_from_multi_aff(ma));
1721
0
    umap = isl_union_map_from_map(map);
1722
0
    executed = isl_union_map_apply_domain(executed,
1723
0
            isl_union_map_copy(umap));
1724
0
    build = isl_ast_build_scale_down(build, isl_val_copy(data.m),
1725
0
            umap);
1726
0
  }
1727
14
  isl_aff_free(offset);
1728
14
  isl_val_free(data.m);
1729
14
1730
14
  return create_node_scaled(executed, bounds, domain, build);
1731
2.16k
}
1732
1733
/* Add the basic set to the list that "user" points to.
1734
 */
1735
static isl_stat collect_basic_set(__isl_take isl_basic_set *bset, void *user)
1736
806
{
1737
806
  isl_basic_set_list **list = user;
1738
806
1739
806
  *list = isl_basic_set_list_add(*list, bset);
1740
806
1741
806
  return isl_stat_ok;
1742
806
}
1743
1744
/* Extract the basic sets of "set" and collect them in an isl_basic_set_list.
1745
 */
1746
static __isl_give isl_basic_set_list *isl_basic_set_list_from_set(
1747
  __isl_take isl_set *set)
1748
778
{
1749
778
  int n;
1750
778
  isl_ctx *ctx;
1751
778
  isl_basic_set_list *list;
1752
778
1753
778
  if (!set)
1754
0
    return NULL;
1755
778
1756
778
  ctx = isl_set_get_ctx(set);
1757
778
1758
778
  n = isl_set_n_basic_set(set);
1759
778
  list = isl_basic_set_list_alloc(ctx, n);
1760
778
  if (isl_set_foreach_basic_set(set, &collect_basic_set, &list) < 0)
1761
0
    list = isl_basic_set_list_free(list);
1762
778
1763
778
  isl_set_free(set);
1764
778
  return list;
1765
778
}
1766
1767
/* Generate code for the schedule domain "bounds"
1768
 * and add the result to "list".
1769
 *
1770
 * We mainly detect strides here and check if the bounds do not
1771
 * conflict with the current build domain
1772
 * and then pass over control to create_node.
1773
 *
1774
 * "bounds" reflects the bounds on the current dimension and possibly
1775
 * some extra conditions on outer dimensions.
1776
 * It does not, however, include any divs involving the current dimension,
1777
 * so it does not capture any stride constraints.
1778
 * We therefore need to compute that part of the schedule domain that
1779
 * intersects with "bounds" and derive the strides from the result.
1780
 */
1781
static __isl_give isl_ast_graft_list *add_node(
1782
  __isl_take isl_ast_graft_list *list, __isl_take isl_union_map *executed,
1783
  __isl_take isl_basic_set *bounds, __isl_take isl_ast_build *build)
1784
2.65k
{
1785
2.65k
  isl_ast_graft *graft;
1786
2.65k
  isl_set *domain = NULL;
1787
2.65k
  isl_union_set *uset;
1788
2.65k
  int empty, disjoint;
1789
2.65k
1790
2.65k
  uset = isl_union_set_from_basic_set(isl_basic_set_copy(bounds));
1791
2.65k
  executed = isl_union_map_intersect_domain(executed, uset);
1792
2.65k
  empty = isl_union_map_is_empty(executed);
1793
2.65k
  if (empty < 0)
1794
0
    goto error;
1795
2.65k
  
if (2.65k
empty2.65k
)
1796
0
    goto done;
1797
2.65k
1798
2.65k
  uset = isl_union_map_domain(isl_union_map_copy(executed));
1799
2.65k
  domain = isl_set_from_union_set(uset);
1800
2.65k
  domain = isl_ast_build_specialize(build, domain);
1801
2.65k
1802
2.65k
  domain = isl_set_compute_divs(domain);
1803
2.65k
  domain = isl_ast_build_eliminate_inner(build, domain);
1804
2.65k
  disjoint = isl_set_is_disjoint(domain, build->domain);
1805
2.65k
  if (disjoint < 0)
1806
0
    goto error;
1807
2.65k
  
if (2.65k
disjoint2.65k
)
1808
491
    goto done;
1809
2.65k
1810
2.16k
  build = isl_ast_build_detect_strides(build, isl_set_copy(domain));
1811
2.16k
1812
2.16k
  graft = create_node(executed, bounds, domain,
1813
2.16k
        isl_ast_build_copy(build));
1814
2.16k
  list = isl_ast_graft_list_add(list, graft);
1815
2.16k
  isl_ast_build_free(build);
1816
2.16k
  return list;
1817
0
error:
1818
0
  list = isl_ast_graft_list_free(list);
1819
491
done:
1820
491
  isl_set_free(domain);
1821
491
  isl_basic_set_free(bounds);
1822
491
  isl_union_map_free(executed);
1823
491
  isl_ast_build_free(build);
1824
491
  return list;
1825
0
}
1826
1827
/* Does any element of i follow or coincide with any element of j
1828
 * at the current depth for equal values of the outer dimensions?
1829
 */
1830
static isl_bool domain_follows_at_depth(__isl_keep isl_basic_set *i,
1831
  __isl_keep isl_basic_set *j, void *user)
1832
37
{
1833
37
  int depth = *(int *) user;
1834
37
  isl_basic_map *test;
1835
37
  isl_bool empty;
1836
37
  int l;
1837
37
1838
37
  test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
1839
37
                isl_basic_set_copy(j));
1840
55
  for (l = 0; 
l < depth55
;
++l18
)
1841
18
    test = isl_basic_map_equate(test, isl_dim_in, l,
1842
18
            isl_dim_out, l);
1843
37
  test = isl_basic_map_order_ge(test, isl_dim_in, depth,
1844
37
          isl_dim_out, depth);
1845
37
  empty = isl_basic_map_is_empty(test);
1846
37
  isl_basic_map_free(test);
1847
37
1848
37
  return empty < 0 ? 
isl_bool_error0
:
!empty37
;
1849
37
}
1850
1851
/* Split up each element of "list" into a part that is related to "bset"
1852
 * according to "gt" and a part that is not.
1853
 * Return a list that consist of "bset" and all the pieces.
1854
 */
1855
static __isl_give isl_basic_set_list *add_split_on(
1856
  __isl_take isl_basic_set_list *list, __isl_take isl_basic_set *bset,
1857
  __isl_keep isl_basic_map *gt)
1858
0
{
1859
0
  int i, n;
1860
0
  isl_basic_set_list *res;
1861
0
1862
0
  if (!list)
1863
0
    bset = isl_basic_set_free(bset);
1864
0
1865
0
  gt = isl_basic_map_copy(gt);
1866
0
  gt = isl_basic_map_intersect_domain(gt, isl_basic_set_copy(bset));
1867
0
  n = isl_basic_set_list_n_basic_set(list);
1868
0
  res = isl_basic_set_list_from_basic_set(bset);
1869
0
  for (i = 0; 
res && 0
i < n0
;
++i0
)
{0
1870
0
    isl_basic_set *bset;
1871
0
    isl_set *set1, *set2;
1872
0
    isl_basic_map *bmap;
1873
0
    int empty;
1874
0
1875
0
    bset = isl_basic_set_list_get_basic_set(list, i);
1876
0
    bmap = isl_basic_map_copy(gt);
1877
0
    bmap = isl_basic_map_intersect_range(bmap, bset);
1878
0
    bset = isl_basic_map_range(bmap);
1879
0
    empty = isl_basic_set_is_empty(bset);
1880
0
    if (empty < 0)
1881
0
      res = isl_basic_set_list_free(res);
1882
0
    if (
empty0
)
{0
1883
0
      isl_basic_set_free(bset);
1884
0
      bset = isl_basic_set_list_get_basic_set(list, i);
1885
0
      res = isl_basic_set_list_add(res, bset);
1886
0
      continue;
1887
0
    }
1888
0
1889
0
    res = isl_basic_set_list_add(res, isl_basic_set_copy(bset));
1890
0
    set1 = isl_set_from_basic_set(bset);
1891
0
    bset = isl_basic_set_list_get_basic_set(list, i);
1892
0
    set2 = isl_set_from_basic_set(bset);
1893
0
    set1 = isl_set_subtract(set2, set1);
1894
0
    set1 = isl_set_make_disjoint(set1);
1895
0
1896
0
    res = isl_basic_set_list_concat(res,
1897
0
              isl_basic_set_list_from_set(set1));
1898
0
  }
1899
0
  isl_basic_map_free(gt);
1900
0
  isl_basic_set_list_free(list);
1901
0
  return res;
1902
0
}
1903
1904
static __isl_give isl_ast_graft_list *generate_sorted_domains(
1905
  __isl_keep isl_basic_set_list *domain_list,
1906
  __isl_keep isl_union_map *executed,
1907
  __isl_keep isl_ast_build *build);
1908
1909
/* Internal data structure for add_nodes.
1910
 *
1911
 * "executed" and "build" are extra arguments to be passed to add_node.
1912
 * "list" collects the results.
1913
 */
1914
struct isl_add_nodes_data {
1915
  isl_union_map *executed;
1916
  isl_ast_build *build;
1917
1918
  isl_ast_graft_list *list;
1919
};
1920
1921
/* Generate code for the schedule domains in "scc"
1922
 * and add the results to "list".
1923
 *
1924
 * The domains in "scc" form a strongly connected component in the ordering.
1925
 * If the number of domains in "scc" is larger than 1, then this means
1926
 * that we cannot determine a valid ordering for the domains in the component.
1927
 * This should be fairly rare because the individual domains
1928
 * have been made disjoint first.
1929
 * The problem is that the domains may be integrally disjoint but not
1930
 * rationally disjoint.  For example, we may have domains
1931
 *
1932
 *  { [i,i] : 0 <= i <= 1 }   and { [i,1-i] : 0 <= i <= 1 }
1933
 *
1934
 * These two domains have an empty intersection, but their rational
1935
 * relaxations do intersect.  It is impossible to order these domains
1936
 * in the second dimension because the first should be ordered before
1937
 * the second for outer dimension equal to 0, while it should be ordered
1938
 * after for outer dimension equal to 1.
1939
 *
1940
 * This may happen in particular in case of unrolling since the domain
1941
 * of each slice is replaced by its simple hull.
1942
 *
1943
 * For each basic set i in "scc" and for each of the following basic sets j,
1944
 * we split off that part of the basic set i that shares the outer dimensions
1945
 * with j and lies before j in the current dimension.
1946
 * We collect all the pieces in a new list that replaces "scc".
1947
 *
1948
 * While the elements in "scc" should be disjoint, we double-check
1949
 * this property to avoid running into an infinite recursion in case
1950
 * they intersect due to some internal error.
1951
 */
1952
static isl_stat add_nodes(__isl_take isl_basic_set_list *scc, void *user)
1953
32
{
1954
32
  struct isl_add_nodes_data *data = user;
1955
32
  int i, n, depth;
1956
32
  isl_basic_set *bset, *first;
1957
32
  isl_basic_set_list *list;
1958
32
  isl_space *space;
1959
32
  isl_basic_map *gt;
1960
32
1961
32
  n = isl_basic_set_list_n_basic_set(scc);
1962
32
  bset = isl_basic_set_list_get_basic_set(scc, 0);
1963
32
  if (
n == 132
)
{32
1964
32
    isl_basic_set_list_free(scc);
1965
32
    data->list = add_node(data->list,
1966
32
        isl_union_map_copy(data->executed), bset,
1967
32
        isl_ast_build_copy(data->build));
1968
32
    return data->list ? 
isl_stat_ok32
:
isl_stat_error0
;
1969
32
  }
1970
32
1971
0
  depth = isl_ast_build_get_depth(data->build);
1972
0
  space = isl_basic_set_get_space(bset);
1973
0
  space = isl_space_map_from_set(space);
1974
0
  gt = isl_basic_map_universe(space);
1975
0
  for (i = 0; 
i < depth0
;
++i0
)
1976
0
    gt = isl_basic_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
1977
0
  gt = isl_basic_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
1978
0
1979
0
  first = isl_basic_set_copy(bset);
1980
0
  list = isl_basic_set_list_from_basic_set(bset);
1981
0
  for (i = 1; 
i < n0
;
++i0
)
{0
1982
0
    int disjoint;
1983
0
1984
0
    bset = isl_basic_set_list_get_basic_set(scc, i);
1985
0
1986
0
    disjoint = isl_basic_set_is_disjoint(bset, first);
1987
0
    if (disjoint < 0)
1988
0
      list = isl_basic_set_list_free(list);
1989
0
    else 
if (0
!disjoint0
)
1990
0
      isl_die(isl_basic_set_list_get_ctx(scc),
1991
0
        isl_error_internal,
1992
0
        "basic sets in scc are assumed to be disjoint",
1993
0
        list = isl_basic_set_list_free(list));
1994
0
1995
0
    list = add_split_on(list, bset, gt);
1996
0
  }
1997
0
  isl_basic_set_free(first);
1998
0
  isl_basic_map_free(gt);
1999
0
  isl_basic_set_list_free(scc);
2000
0
  scc = list;
2001
0
  data->list = isl_ast_graft_list_concat(data->list,
2002
0
        generate_sorted_domains(scc, data->executed, data->build));
2003
0
  isl_basic_set_list_free(scc);
2004
0
2005
0
  return data->list ? 
isl_stat_ok0
:
isl_stat_error0
;
2006
32
}
2007
2008
/* Sort the domains in "domain_list" according to the execution order
2009
 * at the current depth (for equal values of the outer dimensions),
2010
 * generate code for each of them, collecting the results in a list.
2011
 * If no code is generated (because the intersection of the inverse schedule
2012
 * with the domains turns out to be empty), then an empty list is returned.
2013
 *
2014
 * The caller is responsible for ensuring that the basic sets in "domain_list"
2015
 * are pair-wise disjoint.  It can, however, in principle happen that
2016
 * two basic sets should be ordered one way for one value of the outer
2017
 * dimensions and the other way for some other value of the outer dimensions.
2018
 * We therefore play safe and look for strongly connected components.
2019
 * The function add_nodes takes care of handling non-trivial components.
2020
 */
2021
static __isl_give isl_ast_graft_list *generate_sorted_domains(
2022
  __isl_keep isl_basic_set_list *domain_list,
2023
  __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
2024
1.30k
{
2025
1.30k
  isl_ctx *ctx;
2026
1.30k
  struct isl_add_nodes_data data;
2027
1.30k
  int depth;
2028
1.30k
  int n;
2029
1.30k
2030
1.30k
  if (!domain_list)
2031
0
    return NULL;
2032
1.30k
2033
1.30k
  ctx = isl_basic_set_list_get_ctx(domain_list);
2034
1.30k
  n = isl_basic_set_list_n_basic_set(domain_list);
2035
1.30k
  data.list = isl_ast_graft_list_alloc(ctx, n);
2036
1.30k
  if (n == 0)
2037
0
    return data.list;
2038
1.30k
  
if (1.30k
n == 11.30k
)
2039
1.28k
    return add_node(data.list, isl_union_map_copy(executed),
2040
1.28k
      isl_basic_set_list_get_basic_set(domain_list, 0),
2041
1.28k
      isl_ast_build_copy(build));
2042
1.30k
2043
13
  depth = isl_ast_build_get_depth(build);
2044
13
  data.executed = executed;
2045
13
  data.build = build;
2046
13
  if (isl_basic_set_list_foreach_scc(domain_list,
2047
13
          &domain_follows_at_depth, &depth,
2048
13
          &add_nodes, &data) < 0)
2049
0
    data.list = isl_ast_graft_list_free(data.list);
2050
13
2051
13
  return data.list;
2052
1.30k
}
2053
2054
/* Do i and j share any values for the outer dimensions?
2055
 */
2056
static isl_bool shared_outer(__isl_keep isl_basic_set *i,
2057
  __isl_keep isl_basic_set *j, void *user)
2058
69
{
2059
69
  int depth = *(int *) user;
2060
69
  isl_basic_map *test;
2061
69
  isl_bool empty;
2062
69
  int l;
2063
69
2064
69
  test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
2065
69
                isl_basic_set_copy(j));
2066
102
  for (l = 0; 
l < depth102
;
++l33
)
2067
33
    test = isl_basic_map_equate(test, isl_dim_in, l,
2068
33
            isl_dim_out, l);
2069
69
  empty = isl_basic_map_is_empty(test);
2070
69
  isl_basic_map_free(test);
2071
69
2072
69
  return empty < 0 ? 
isl_bool_error0
:
!empty69
;
2073
69
}
2074
2075
/* Internal data structure for generate_sorted_domains_wrap.
2076
 *
2077
 * "n" is the total number of basic sets
2078
 * "executed" and "build" are extra arguments to be passed
2079
 *  to generate_sorted_domains.
2080
 *
2081
 * "single" is set to 1 by generate_sorted_domains_wrap if there
2082
 * is only a single component.
2083
 * "list" collects the results.
2084
 */
2085
struct isl_ast_generate_parallel_domains_data {
2086
  int n;
2087
  isl_union_map *executed;
2088
  isl_ast_build *build;
2089
2090
  int single;
2091
  isl_ast_graft_list *list;
2092
};
2093
2094
/* Call generate_sorted_domains on "scc", fuse the result into a list
2095
 * with either zero or one graft and collect the these single element
2096
 * lists into data->list.
2097
 *
2098
 * If there is only one component, i.e., if the number of basic sets
2099
 * in the current component is equal to the total number of basic sets,
2100
 * then data->single is set to 1 and the result of generate_sorted_domains
2101
 * is not fused.
2102
 */
2103
static isl_stat generate_sorted_domains_wrap(__isl_take isl_basic_set_list *scc,
2104
  void *user)
2105
49
{
2106
49
  struct isl_ast_generate_parallel_domains_data *data = user;
2107
49
  isl_ast_graft_list *list;
2108
49
2109
49
  list = generate_sorted_domains(scc, data->executed, data->build);
2110
49
  data->single = isl_basic_set_list_n_basic_set(scc) == data->n;
2111
49
  if (!data->single)
2112
36
    list = isl_ast_graft_list_fuse(list, data->build);
2113
49
  if (!data->list)
2114
31
    data->list = list;
2115
49
  else
2116
18
    data->list = isl_ast_graft_list_concat(data->list, list);
2117
49
2118
49
  isl_basic_set_list_free(scc);
2119
49
  if (!data->list)
2120
0
    return isl_stat_error;
2121
49
2122
49
  return isl_stat_ok;
2123
49
}
2124
2125
/* Look for any (weakly connected) components in the "domain_list"
2126
 * of domains that share some values of the outer dimensions.
2127
 * That is, domains in different components do not share any values
2128
 * of the outer dimensions.  This means that these components
2129
 * can be freely reordered.
2130
 * Within each of the components, we sort the domains according
2131
 * to the execution order at the current depth.
2132
 *
2133
 * If there is more than one component, then generate_sorted_domains_wrap
2134
 * fuses the result of each call to generate_sorted_domains
2135
 * into a list with either zero or one graft and collects these (at most)
2136
 * single element lists into a bigger list. This means that the elements of the
2137
 * final list can be freely reordered.  In particular, we sort them
2138
 * according to an arbitrary but fixed ordering to ease merging of
2139
 * graft lists from different components.
2140
 */
2141
static __isl_give isl_ast_graft_list *generate_parallel_domains(
2142
  __isl_keep isl_basic_set_list *domain_list,
2143
  __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
2144
1.28k
{
2145
1.28k
  int depth;
2146
1.28k
  struct isl_ast_generate_parallel_domains_data data;
2147
1.28k
2148
1.28k
  if (!domain_list)
2149
0
    return NULL;
2150
1.28k
2151
1.28k
  data.n = isl_basic_set_list_n_basic_set(domain_list);
2152
1.28k
  if (data.n <= 1)
2153
1.25k
    return generate_sorted_domains(domain_list, executed, build);
2154
1.28k
2155
31
  depth = isl_ast_build_get_depth(build);
2156
31
  data.list = NULL;
2157
31
  data.executed = executed;
2158
31
  data.build = build;
2159
31
  data.single = 0;
2160
31
  if (isl_basic_set_list_foreach_scc(domain_list, &shared_outer, &depth,
2161
31
              &generate_sorted_domains_wrap,
2162
31
              &data) < 0)
2163
0
    data.list = isl_ast_graft_list_free(data.list);
2164
31
2165
31
  if (!data.single)
2166
18
    data.list = isl_ast_graft_list_sort_guard(data.list);
2167
31
2168
31
  return data.list;
2169
1.28k
}
2170
2171
/* Internal data for separate_domain.
2172
 *
2173
 * "explicit" is set if we only want to use explicit bounds.
2174
 *
2175
 * "domain" collects the separated domains.
2176
 */
2177
struct isl_separate_domain_data {
2178
  isl_ast_build *build;
2179
  int explicit;
2180
  isl_set *domain;
2181
};
2182
2183
/* Extract implicit bounds on the current dimension for the executed "map".
2184
 *
2185
 * The domain of "map" may involve inner dimensions, so we
2186
 * need to eliminate them.
2187
 */
2188
static __isl_give isl_set *implicit_bounds(__isl_take isl_map *map,
2189
  __isl_keep isl_ast_build *build)
2190
0
{
2191
0
  isl_set *domain;
2192
0
2193
0
  domain = isl_map_domain(map);
2194
0
  domain = isl_ast_build_eliminate(build, domain);
2195
0
2196
0
  return domain;
2197
0
}
2198
2199
/* Extract explicit bounds on the current dimension for the executed "map".
2200
 *
2201
 * Rather than eliminating the inner dimensions as in implicit_bounds,
2202
 * we simply drop any constraints involving those inner dimensions.
2203
 * The idea is that most bounds that are implied by constraints on the
2204
 * inner dimensions will be enforced by for loops and not by explicit guards.
2205
 * There is then no need to separate along those bounds.
2206
 */
2207
static __isl_give isl_set *explicit_bounds(__isl_take isl_map *map,
2208
  __isl_keep isl_ast_build *build)
2209
0
{
2210
0
  isl_set *domain;
2211
0
  int depth, dim;
2212
0
2213
0
  dim = isl_map_dim(map, isl_dim_out);
2214
0
  map = isl_map_drop_constraints_involving_dims(map, isl_dim_out, 0, dim);
2215
0
2216
0
  domain = isl_map_domain(map);
2217
0
  depth = isl_ast_build_get_depth(build);
2218
0
  dim = isl_set_dim(domain, isl_dim_set);
2219
0
  domain = isl_set_detect_equalities(domain);
2220
0
  domain = isl_set_drop_constraints_involving_dims(domain,
2221
0
        isl_dim_set, depth + 1, dim - (depth + 1));
2222
0
  domain = isl_set_remove_divs_involving_dims(domain,
2223
0
        isl_dim_set, depth, 1);
2224
0
  domain = isl_set_remove_unknown_divs(domain);
2225
0
2226
0
  return domain;
2227
0
}
2228
2229
/* Split data->domain into pieces that intersect with the range of "map"
2230
 * and pieces that do not intersect with the range of "map"
2231
 * and then add that part of the range of "map" that does not intersect
2232
 * with data->domain.
2233
 */
2234
static isl_stat separate_domain(__isl_take isl_map *map, void *user)
2235
0
{
2236
0
  struct isl_separate_domain_data *data = user;
2237
0
  isl_set *domain;
2238
0
  isl_set *d1, *d2;
2239
0
2240
0
  if (data->explicit)
2241
0
    domain = explicit_bounds(map, data->build);
2242
0
  else
2243
0
    domain = implicit_bounds(map, data->build);
2244
0
2245
0
  domain = isl_set_coalesce(domain);
2246
0
  domain = isl_set_make_disjoint(domain);
2247
0
  d1 = isl_set_subtract(isl_set_copy(domain), isl_set_copy(data->domain));
2248
0
  d2 = isl_set_subtract(isl_set_copy(data->domain), isl_set_copy(domain));
2249
0
  data->domain = isl_set_intersect(data->domain, domain);
2250
0
  data->domain = isl_set_union(data->domain, d1);
2251
0
  data->domain = isl_set_union(data->domain, d2);
2252
0
2253
0
  return isl_stat_ok;
2254
0
}
2255
2256
/* Separate the schedule domains of "executed".
2257
 *
2258
 * That is, break up the domain of "executed" into basic sets,
2259
 * such that for each basic set S, every element in S is associated with
2260
 * the same domain spaces.
2261
 *
2262
 * "space" is the (single) domain space of "executed".
2263
 */
2264
static __isl_give isl_set *separate_schedule_domains(
2265
  __isl_take isl_space *space, __isl_take isl_union_map *executed,
2266
  __isl_keep isl_ast_build *build)
2267
0
{
2268
0
  struct isl_separate_domain_data data = { build };
2269
0
  isl_ctx *ctx;
2270
0
2271
0
  ctx = isl_ast_build_get_ctx(build);
2272
0
  data.explicit = isl_options_get_ast_build_separation_bounds(ctx) ==
2273
0
            ISL_AST_BUILD_SEPARATION_BOUNDS_EXPLICIT;
2274
0
  data.domain = isl_set_empty(space);
2275
0
  if (isl_union_map_foreach_map(executed, &separate_domain, &data) < 0)
2276
0
    data.domain = isl_set_free(data.domain);
2277
0
2278
0
  isl_union_map_free(executed);
2279
0
  return data.domain;
2280
0
}
2281
2282
/* Temporary data used during the search for a lower bound for unrolling.
2283
 *
2284
 * "build" is the build in which the unrolling will be performed
2285
 * "domain" is the original set for which to find a lower bound
2286
 * "depth" is the dimension for which to find a lower boudn
2287
 * "expansion" is the expansion that needs to be applied to "domain"
2288
 * in the unrolling that will be performed
2289
 *
2290
 * "lower" is the best lower bound found so far.  It is NULL if we have not
2291
 * found any yet.
2292
 * "n" is the corresponding size.  If lower is NULL, then the value of n
2293
 * is undefined.
2294
 * "n_div" is the maximal number of integer divisions in the first
2295
 * unrolled iteration (after expansion).  It is set to -1 if it hasn't
2296
 * been computed yet.
2297
 */
2298
struct isl_find_unroll_data {
2299
  isl_ast_build *build;
2300
  isl_set *domain;
2301
  int depth;
2302
  isl_basic_map *expansion;
2303
2304
  isl_aff *lower;
2305
  int *n;
2306
  int n_div;
2307
};
2308
2309
/* Return the constraint
2310
 *
2311
 *  i_"depth" = aff + offset
2312
 */
2313
static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff,
2314
  int offset)
2315
1.34k
{
2316
1.34k
  aff = isl_aff_copy(aff);
2317
1.34k
  aff = isl_aff_add_coefficient_si(aff, isl_dim_in, depth, -1);
2318
1.34k
  aff = isl_aff_add_constant_si(aff, offset);
2319
1.34k
  return isl_equality_from_aff(aff);
2320
1.34k
}
2321
2322
/* Update *user to the number of integer divsions in the first element
2323
 * of "ma", if it is larger than the current value.
2324
 */
2325
static isl_stat update_n_div(__isl_take isl_set *set,
2326
  __isl_take isl_multi_aff *ma, void *user)
2327
2
{
2328
2
  isl_aff *aff;
2329
2
  int *n = user;
2330
2
  int n_div;
2331
2
2332
2
  aff = isl_multi_aff_get_aff(ma, 0);
2333
2
  n_div = isl_aff_dim(aff, isl_dim_div);
2334
2
  isl_aff_free(aff);
2335
2
  isl_multi_aff_free(ma);
2336
2
  isl_set_free(set);
2337
2
2338
2
  if (n_div > *n)
2339
2
    *n = n_div;
2340
2
2341
2
  return aff ? 
isl_stat_ok2
:
isl_stat_error0
;
2342
2
}
2343
2344
/* Get the number of integer divisions in the expression for the iterator
2345
 * value at the first slice in the unrolling based on lower bound "lower",
2346
 * taking into account the expansion that needs to be performed on this slice.
2347
 */
2348
static int get_expanded_n_div(struct isl_find_unroll_data *data,
2349
  __isl_keep isl_aff *lower)
2350
2
{
2351
2
  isl_constraint *c;
2352
2
  isl_set *set;
2353
2
  isl_map *it_map, *expansion;
2354
2
  isl_pw_multi_aff *pma;
2355
2
  int n;
2356
2
2357
2
  c = at_offset(data->depth, lower, 0);
2358
2
  set = isl_set_copy(data->domain);
2359
2
  set = isl_set_add_constraint(set, c);
2360
2
  expansion = isl_map_from_basic_map(isl_basic_map_copy(data->expansion));
2361
2
  set = isl_set_apply(set, expansion);
2362
2
  it_map = isl_ast_build_map_to_iterator(data->build, set);
2363
2
  pma = isl_pw_multi_aff_from_map(it_map);
2364
2
  n = 0;
2365
2
  if (isl_pw_multi_aff_foreach_piece(pma, &update_n_div, &n) < 0)
2366
0
    n = -1;
2367
2
  isl_pw_multi_aff_free(pma);
2368
2
2369
2
  return n;
2370
2
}
2371
2372
/* Is the lower bound "lower" with corresponding iteration count "n"
2373
 * better than the one stored in "data"?
2374
 * If there is no upper bound on the iteration count ("n" is infinity) or
2375
 * if the count is too large, then we cannot use this lower bound.
2376
 * Otherwise, if there was no previous lower bound or
2377
 * if the iteration count of the new lower bound is smaller than
2378
 * the iteration count of the previous lower bound, then we consider
2379
 * the new lower bound to be better.
2380
 * If the iteration count is the same, then compare the number
2381
 * of integer divisions that would be needed to express
2382
 * the iterator value at the first slice in the unrolling
2383
 * according to the lower bound.  If we end up computing this
2384
 * number, then store the lowest value in data->n_div.
2385
 */
2386
static int is_better_lower_bound(struct isl_find_unroll_data *data,
2387
  __isl_keep isl_aff *lower, __isl_keep isl_val *n)
2388
758
{
2389
758
  int cmp;
2390
758
  int n_div;
2391
758
2392
758
  if (!n)
2393
0
    return -1;
2394
758
  
if (758
isl_val_is_infty(n)758
)
2395
0
    return 0;
2396
758
  
if (758
isl_val_cmp_si(n, INT_MAX) > 0758
)
2397
0
    return 0;
2398
758
  
if (758
!data->lower758
)
2399
703
    return 1;
2400
55
  cmp = isl_val_cmp_si(n, *data->n);
2401
55
  if (cmp < 0)
2402
53
    return 1;
2403
2
  
if (2
cmp > 02
)
2404
1
    return 0;
2405
1
  
if (1
data->n_div < 01
)
2406
1
    data->n_div = get_expanded_n_div(data, data->lower);
2407
1
  if (data->n_div < 0)
2408
0
    return -1;
2409
1
  
if (1
data->n_div == 01
)
2410
0
    return 0;
2411
1
  n_div = get_expanded_n_div(data, lower);
2412
1
  if (n_div < 0)
2413
0
    return -1;
2414
1
  
if (1
n_div >= data->n_div1
)
2415
0
    return 0;
2416
1
  data->n_div = n_div;
2417
1
2418
1
  return 1;
2419
1
}
2420
2421
/* Check if we can use "c" as a lower bound and if it is better than
2422
 * any previously found lower bound.
2423
 *
2424
 * If "c" does not involve the dimension at the current depth,
2425
 * then we cannot use it.
2426
 * Otherwise, let "c" be of the form
2427
 *
2428
 *  i >= f(j)/a
2429
 *
2430
 * We compute the maximal value of
2431
 *
2432
 *  -ceil(f(j)/a)) + i + 1
2433
 *
2434
 * over the domain.  If there is such a value "n", then we know
2435
 *
2436
 *  -ceil(f(j)/a)) + i + 1 <= n
2437
 *
2438
 * or
2439
 *
2440
 *  i < ceil(f(j)/a)) + n
2441
 *
2442
 * meaning that we can use ceil(f(j)/a)) as a lower bound for unrolling.
2443
 * We just need to check if we have found any lower bound before and
2444
 * if the new lower bound is better (smaller n or fewer integer divisions)
2445
 * than the previously found lower bounds.
2446
 */
2447
static isl_stat update_unrolling_lower_bound(struct isl_find_unroll_data *data,
2448
  __isl_keep isl_constraint *c)
2449
10.1k
{
2450
10.1k
  isl_aff *aff, *lower;
2451
10.1k
  isl_val *max;
2452
10.1k
  int better;
2453
10.1k
2454
10.1k
  if (!isl_constraint_is_lower_bound(c, isl_dim_set, data->depth))
2455
9.41k
    return isl_stat_ok;
2456
10.1k
2457
758
  lower = isl_constraint_get_bound(c, isl_dim_set, data->depth);
2458
758
  lower = isl_aff_ceil(lower);
2459
758
  aff = isl_aff_copy(lower);
2460
758
  aff = isl_aff_neg(aff);
2461
758
  aff = isl_aff_add_coefficient_si(aff, isl_dim_in, data->depth, 1);
2462
758
  aff = isl_aff_add_constant_si(aff, 1);
2463
758
  max = isl_set_max_val(data->domain, aff);
2464
758
  isl_aff_free(aff);
2465
758
2466
758
  better = is_better_lower_bound(data, lower, max);
2467
758
  if (
better < 0 || 758
!better758
)
{1
2468
1
    isl_val_free(max);
2469
1
    isl_aff_free(lower);
2470
1
    return better < 0 ? 
isl_stat_error0
:
isl_stat_ok1
;
2471
1
  }
2472
758
2473
757
  isl_aff_free(data->lower);
2474
757
  data->lower = lower;
2475
757
  *data->n = isl_val_get_num_si(max);
2476
757
  isl_val_free(max);
2477
757
2478
757
  return isl_stat_ok;
2479
758
}
2480
2481
/* Check if we can use "c" as a lower bound and if it is better than
2482
 * any previously found lower bound.
2483
 */
2484
static isl_stat constraint_find_unroll(__isl_take isl_constraint *c, void *user)
2485
10.1k
{
2486
10.1k
  struct isl_find_unroll_data *data;
2487
10.1k
  isl_stat r;
2488
10.1k
2489
10.1k
  data = (struct isl_find_unroll_data *) user;
2490
10.1k
  r = update_unrolling_lower_bound(data, c);
2491
10.1k
  isl_constraint_free(c);
2492
10.1k
2493
10.1k
  return r;
2494
10.1k
}
2495
2496
/* Look for a lower bound l(i) on the dimension at "depth"
2497
 * and a size n such that "domain" is a subset of
2498
 *
2499
 *  { [i] : l(i) <= i_d < l(i) + n }
2500
 *
2501
 * where d is "depth" and l(i) depends only on earlier dimensions.
2502
 * Furthermore, try and find a lower bound such that n is as small as possible.
2503
 * In particular, "n" needs to be finite.
2504
 * "build" is the build in which the unrolling will be performed.
2505
 * "expansion" is the expansion that needs to be applied to "domain"
2506
 * in the unrolling that will be performed.
2507
 *
2508
 * Inner dimensions have been eliminated from "domain" by the caller.
2509
 *
2510
 * We first construct a collection of lower bounds on the input set
2511
 * by computing its simple hull.  We then iterate through them,
2512
 * discarding those that we cannot use (either because they do not
2513
 * involve the dimension at "depth" or because they have no corresponding
2514
 * upper bound, meaning that "n" would be unbounded) and pick out the
2515
 * best from the remaining ones.
2516
 *
2517
 * If we cannot find a suitable lower bound, then we consider that
2518
 * to be an error.
2519
 */
2520
static __isl_give isl_aff *find_unroll_lower_bound(
2521
  __isl_keep isl_ast_build *build, __isl_keep isl_set *domain,
2522
  int depth, __isl_keep isl_basic_map *expansion, int *n)
2523
703
{
2524
703
  struct isl_find_unroll_data data =
2525
703
      { build, domain, depth, expansion, NULL, n, -1 };
2526
703
  isl_basic_set *hull;
2527
703
2528
703
  hull = isl_set_simple_hull(isl_set_copy(domain));
2529
703
2530
703
  if (isl_basic_set_foreach_constraint(hull,
2531
703
              &constraint_find_unroll, &data) < 0)
2532
0
    goto error;
2533
703
2534
703
  isl_basic_set_free(hull);
2535
703
2536
703
  if (!data.lower)
2537
0
    isl_die(isl_set_get_ctx(domain), isl_error_invalid,
2538
703
      "cannot find lower bound for unrolling", return NULL);
2539
703
2540
703
  return data.lower;
2541
0
error:
2542
0
  isl_basic_set_free(hull);
2543
0
  return isl_aff_free(data.lower);
2544
703
}
2545
2546
/* Call "fn" on each iteration of the current dimension of "domain".
2547
 * If "init" is not NULL, then it is called with the number of
2548
 * iterations before any call to "fn".
2549
 * Return -1 on failure.
2550
 *
2551
 * Since we are going to be iterating over the individual values,
2552
 * we first check if there are any strides on the current dimension.
2553
 * If there is, we rewrite the current dimension i as
2554
 *
2555
 *    i = stride i' + offset
2556
 *
2557
 * and then iterate over individual values of i' instead.
2558
 *
2559
 * We then look for a lower bound on i' and a size such that the domain
2560
 * is a subset of
2561
 *
2562
 *  { [j,i'] : l(j) <= i' < l(j) + n }
2563
 *
2564
 * and then take slices of the domain at values of i'
2565
 * between l(j) and l(j) + n - 1.
2566
 *
2567
 * We compute the unshifted simple hull of each slice to ensure that
2568
 * we have a single basic set per offset.  The slicing constraint
2569
 * may get simplified away before the unshifted simple hull is taken
2570
 * and may therefore in some rare cases disappear from the result.
2571
 * We therefore explicitly add the constraint back after computing
2572
 * the unshifted simple hull to ensure that the basic sets
2573
 * remain disjoint.  The constraints that are dropped by taking the hull
2574
 * will be taken into account at the next level, as in the case of the
2575
 * atomic option.
2576
 *
2577
 * Finally, we map i' back to i and call "fn".
2578
 */
2579
static int foreach_iteration(__isl_take isl_set *domain,
2580
  __isl_keep isl_ast_build *build, int (*init)(int n, void *user),
2581
  int (*fn)(__isl_take isl_basic_set *bset, void *user), void *user)
2582
703
{
2583
703
  int i, n;
2584
703
  int empty;
2585
703
  int depth;
2586
703
  isl_multi_aff *expansion;
2587
703
  isl_basic_map *bmap;
2588
703
  isl_aff *lower = NULL;
2589
703
  isl_ast_build *stride_build;
2590
703
2591
703
  depth = isl_ast_build_get_depth(build);
2592
703
2593
703
  domain = isl_ast_build_eliminate_inner(build, domain);
2594
703
  domain = isl_set_intersect(domain, isl_ast_build_get_domain(build));
2595
703
  stride_build = isl_ast_build_copy(build);
2596
703
  stride_build = isl_ast_build_detect_strides(stride_build,
2597
703
              isl_set_copy(domain));
2598
703
  expansion = isl_ast_build_get_stride_expansion(stride_build);
2599
703
2600
703
  domain = isl_set_preimage_multi_aff(domain,
2601
703
              isl_multi_aff_copy(expansion));
2602
703
  domain = isl_ast_build_eliminate_divs(stride_build, domain);
2603
703
  isl_ast_build_free(stride_build);
2604
703
2605
703
  bmap = isl_basic_map_from_multi_aff(expansion);
2606
703
2607
703
  empty = isl_set_is_empty(domain);
2608
703
  if (
empty < 0703
)
{0
2609
0
    n = -1;
2610
703
  } else 
if (703
empty703
)
{0
2611
0
    n = 0;
2612
703
  } else {
2613
703
    lower = find_unroll_lower_bound(build, domain, depth, bmap, &n);
2614
703
    if (!lower)
2615
0
      n = -1;
2616
703
  }
2617
703
  if (
n >= 0 && 703
init703
&&
init(n, user) < 0700
)
2618
0
    n = -1;
2619
2.04k
  for (i = 0; 
i < n2.04k
;
++i1.34k
)
{1.34k
2620
1.34k
    isl_set *set;
2621
1.34k
    isl_basic_set *bset;
2622
1.34k
    isl_constraint *slice;
2623
1.34k
2624
1.34k
    slice = at_offset(depth, lower, i);
2625
1.34k
    set = isl_set_copy(domain);
2626
1.34k
    set = isl_set_add_constraint(set, isl_constraint_copy(slice));
2627
1.34k
    bset = isl_set_unshifted_simple_hull(set);
2628
1.34k
    bset = isl_basic_set_add_constraint(bset, slice);
2629
1.34k
    bset = isl_basic_set_apply(bset, isl_basic_map_copy(bmap));
2630
1.34k
2631
1.34k
    if (fn(bset, user) < 0)
2632
0
      break;
2633
1.34k
  }
2634
703
2635
703
  isl_aff_free(lower);
2636
703
  isl_set_free(domain);
2637
703
  isl_basic_map_free(bmap);
2638
703
2639
703
  return n < 0 || 
i < n703
?
-10
:
0703
;
2640
703
}
2641
2642
/* Data structure for storing the results and the intermediate objects
2643
 * of compute_domains.
2644
 *
2645
 * "list" is the main result of the function and contains a list
2646
 * of disjoint basic sets for which code should be generated.
2647
 *
2648
 * "executed" and "build" are inputs to compute_domains.
2649
 * "schedule_domain" is the domain of "executed".
2650
 *
2651
 * "option" constains the domains at the current depth that should by
2652
 * atomic, separated or unrolled.  These domains are as specified by
2653
 * the user, except that inner dimensions have been eliminated and
2654
 * that they have been made pair-wise disjoint.
2655
 *
2656
 * "sep_class" contains the user-specified split into separation classes
2657
 * specialized to the current depth.
2658
 * "done" contains the union of the separation domains that have already
2659
 * been handled.
2660
 */
2661
struct isl_codegen_domains {
2662
  isl_basic_set_list *list;
2663
2664
  isl_union_map *executed;
2665
  isl_ast_build *build;
2666
  isl_set *schedule_domain;
2667
2668
  isl_set *option[4];
2669
2670
  isl_map *sep_class;
2671
  isl_set *done;
2672
};
2673
2674
/* Internal data structure for do_unroll.
2675
 *
2676
 * "domains" stores the results of compute_domains.
2677
 * "class_domain" is the original class domain passed to do_unroll.
2678
 * "unroll_domain" collects the unrolled iterations.
2679
 */
2680
struct isl_ast_unroll_data {
2681
  struct isl_codegen_domains *domains;
2682
  isl_set *class_domain;
2683
  isl_set *unroll_domain;
2684
};
2685
2686
/* Given an iteration of an unrolled domain represented by "bset",
2687
 * add it to data->domains->list.
2688
 * Since we may have dropped some constraints, we intersect with
2689
 * the class domain again to ensure that each element in the list
2690
 * is disjoint from the other class domains.
2691
 */
2692
static int do_unroll_iteration(__isl_take isl_basic_set *bset, void *user)
2693
8
{
2694
8
  struct isl_ast_unroll_data *data = user;
2695
8
  isl_set *set;
2696
8
  isl_basic_set_list *list;
2697
8
2698
8
  set = isl_set_from_basic_set(bset);
2699
8
  data->unroll_domain = isl_set_union(data->unroll_domain,
2700
8
              isl_set_copy(set));
2701
8
  set = isl_set_intersect(set, isl_set_copy(data->class_domain));
2702
8
  set = isl_set_make_disjoint(set);
2703
8
  list = isl_basic_set_list_from_set(set);
2704
8
  data->domains->list = isl_basic_set_list_concat(data->domains->list,
2705
8
              list);
2706
8
2707
8
  return 0;
2708
8
}
2709
2710
/* Extend domains->list with a list of basic sets, one for each value
2711
 * of the current dimension in "domain" and remove the corresponding
2712
 * sets from the class domain.  Return the updated class domain.
2713
 * The divs that involve the current dimension have not been projected out
2714
 * from this domain.
2715
 *
2716
 * We call foreach_iteration to iterate over the individual values and
2717
 * in do_unroll_iteration we collect the individual basic sets in
2718
 * domains->list and their union in data->unroll_domain, which is then
2719
 * used to update the class domain.
2720
 */
2721
static __isl_give isl_set *do_unroll(struct isl_codegen_domains *domains,
2722
  __isl_take isl_set *domain, __isl_take isl_set *class_domain)
2723
3
{
2724
3
  struct isl_ast_unroll_data data;
2725
3
2726
3
  if (!domain)
2727
0
    return isl_set_free(class_domain);
2728
3
  
if (3
!class_domain3
)
2729
0
    return isl_set_free(domain);
2730
3
2731
3
  data.domains = domains;
2732
3
  data.class_domain = class_domain;
2733
3
  data.unroll_domain = isl_set_empty(isl_set_get_space(domain));
2734
3
2735
3
  if (foreach_iteration(domain, domains->build, NULL,
2736
3
        &do_unroll_iteration, &data) < 0)
2737
0
    data.unroll_domain = isl_set_free(data.unroll_domain);
2738
3
2739
3
  class_domain = isl_set_subtract(class_domain, data.unroll_domain);
2740
3
2741
3
  return class_domain;
2742
3
}
2743
2744
/* Add domains to domains->list for each individual value of the current
2745
 * dimension, for that part of the schedule domain that lies in the
2746
 * intersection of the option domain and the class domain.
2747
 * Remove the corresponding sets from the class domain and
2748
 * return the updated class domain.
2749
 *
2750
 * We first break up the unroll option domain into individual pieces
2751
 * and then handle each of them separately.  The unroll option domain
2752
 * has been made disjoint in compute_domains_init_options,
2753
 *
2754
 * Note that we actively want to combine different pieces of the
2755
 * schedule domain that have the same value at the current dimension.
2756
 * We therefore need to break up the unroll option domain before
2757
 * intersecting with class and schedule domain, hoping that the
2758
 * unroll option domain specified by the user is relatively simple.
2759
 */
2760
static __isl_give isl_set *compute_unroll_domains(
2761
  struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
2762
66
{
2763
66
  isl_set *unroll_domain;
2764
66
  isl_basic_set_list *unroll_list;
2765
66
  int i, n;
2766
66
  int empty;
2767
66
2768
66
  empty = isl_set_is_empty(domains->option[isl_ast_loop_unroll]);
2769
66
  if (empty < 0)
2770
0
    return isl_set_free(class_domain);
2771
66
  
if (66
empty66
)
2772
63
    return class_domain;
2773
66
2774
3
  unroll_domain = isl_set_copy(domains->option[isl_ast_loop_unroll]);
2775
3
  unroll_list = isl_basic_set_list_from_set(unroll_domain);
2776
3
2777
3
  n = isl_basic_set_list_n_basic_set(unroll_list);
2778
6
  for (i = 0; 
i < n6
;
++i3
)
{3
2779
3
    isl_basic_set *bset;
2780
3
2781
3
    bset = isl_basic_set_list_get_basic_set(unroll_list, i);
2782
3
    unroll_domain = isl_set_from_basic_set(bset);
2783
3
    unroll_domain = isl_set_intersect(unroll_domain,
2784
3
                isl_set_copy(class_domain));
2785
3
    unroll_domain = isl_set_intersect(unroll_domain,
2786
3
          isl_set_copy(domains->schedule_domain));
2787
3
2788
3
    empty = isl_set_is_empty(unroll_domain);
2789
3
    if (
empty >= 0 && 3
empty3
)
{0
2790
0
      isl_set_free(unroll_domain);
2791
0
      continue;
2792
0
    }
2793
3
2794
3
    class_domain = do_unroll(domains, unroll_domain, class_domain);
2795
3
  }
2796
3
2797
3
  isl_basic_set_list_free(unroll_list);
2798
3
2799
3
  return class_domain;
2800
66
}
2801
2802
/* Try and construct a single basic set that includes the intersection of
2803
 * the schedule domain, the atomic option domain and the class domain.
2804
 * Add the resulting basic set(s) to domains->list and remove them
2805
 * from class_domain.  Return the updated class domain.
2806
 *
2807
 * We construct a single domain rather than trying to combine
2808
 * the schedule domains of individual domains because we are working
2809
 * within a single component so that non-overlapping schedule domains
2810
 * should already have been separated.
2811
 * We do however need to make sure that this single domains is a subset
2812
 * of the class domain so that it would not intersect with any other
2813
 * class domains.  This means that we may end up splitting up the atomic
2814
 * domain in case separation classes are being used.
2815
 *
2816
 * "domain" is the intersection of the schedule domain and the class domain,
2817
 * with inner dimensions projected out.
2818
 */
2819
static __isl_give isl_set *compute_atomic_domain(
2820
  struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
2821
66
{
2822
66
  isl_basic_set *bset;
2823
66
  isl_basic_set_list *list;
2824
66
  isl_set *domain, *atomic_domain;
2825
66
  int empty;
2826
66
2827
66
  domain = isl_set_copy(domains->option[isl_ast_loop_atomic]);
2828
66
  domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2829
66
  domain = isl_set_intersect(domain,
2830
66
        isl_set_copy(domains->schedule_domain));
2831
66
  empty = isl_set_is_empty(domain);
2832
66
  if (empty < 0)
2833
0
    class_domain = isl_set_free(class_domain);
2834
66
  if (
empty66
)
{65
2835
65
    isl_set_free(domain);
2836
65
    return class_domain;
2837
65
  }
2838
66
2839
1
  domain = isl_ast_build_eliminate(domains->build, domain);
2840
1
  domain = isl_set_coalesce(domain);
2841
1
  bset = isl_set_unshifted_simple_hull(domain);
2842
1
  domain = isl_set_from_basic_set(bset);
2843
1
  atomic_domain = isl_set_copy(domain);
2844
1
  domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2845
1
  class_domain = isl_set_subtract(class_domain, atomic_domain);
2846
1
  domain = isl_set_make_disjoint(domain);
2847
1
  list = isl_basic_set_list_from_set(domain);
2848
1
  domains->list = isl_basic_set_list_concat(domains->list, list);
2849
1
2850
1
  return class_domain;
2851
66
}
2852
2853
/* Split up the schedule domain into uniform basic sets,
2854
 * in the sense that each element in a basic set is associated to
2855
 * elements of the same domains, and add the result to domains->list.
2856
 * Do this for that part of the schedule domain that lies in the
2857
 * intersection of "class_domain" and the separate option domain.
2858
 *
2859
 * "class_domain" may or may not include the constraints
2860
 * of the schedule domain, but this does not make a difference
2861
 * since we are going to intersect it with the domain of the inverse schedule.
2862
 * If it includes schedule domain constraints, then they may involve
2863
 * inner dimensions, but we will eliminate them in separation_domain.
2864
 */
2865
static int compute_separate_domain(struct isl_codegen_domains *domains,
2866
  __isl_keep isl_set *class_domain)
2867
66
{
2868
66
  isl_space *space;
2869
66
  isl_set *domain;
2870
66
  isl_union_map *executed;
2871
66
  isl_basic_set_list *list;
2872
66
  int empty;
2873
66
2874
66
  domain = isl_set_copy(domains->option[isl_ast_loop_separate]);
2875
66
  domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2876
66
  executed = isl_union_map_copy(domains->executed);
2877
66
  executed = isl_union_map_intersect_domain(executed,
2878
66
            isl_union_set_from_set(domain));
2879
66
  empty = isl_union_map_is_empty(executed);
2880
66
  if (
empty < 0 || 66
empty66
)
{66
2881
66
    isl_union_map_free(executed);
2882
66
    return empty < 0 ? 
-10
:
066
;
2883
66
  }
2884
66
2885
0
  space = isl_set_get_space(class_domain);
2886
0
  domain = separate_schedule_domains(space, executed, domains->build);
2887
0
2888
0
  list = isl_basic_set_list_from_set(domain);
2889
0
  domains->list = isl_basic_set_list_concat(domains->list, list);
2890
0
2891
0
  return 0;
2892
66
}
2893
2894
/* Split up the domain at the current depth into disjoint
2895
 * basic sets for which code should be generated separately
2896
 * for the given separation class domain.
2897
 *
2898
 * If any separation classes have been defined, then "class_domain"
2899
 * is the domain of the current class and does not refer to inner dimensions.
2900
 * Otherwise, "class_domain" is the universe domain.
2901
 *
2902
 * We first make sure that the class domain is disjoint from
2903
 * previously considered class domains.
2904
 *
2905
 * The separate domains can be computed directly from the "class_domain".
2906
 *
2907
 * The unroll, atomic and remainder domains need the constraints
2908
 * from the schedule domain.
2909
 *
2910
 * For unrolling, the actual schedule domain is needed (with divs that
2911
 * may refer to the current dimension) so that stride detection can be
2912
 * performed.
2913
 *
2914
 * For atomic and remainder domains, inner dimensions and divs involving
2915
 * the current dimensions should be eliminated.
2916
 * In case we are working within a separation class, we need to intersect
2917
 * the result with the current "class_domain" to ensure that the domains
2918
 * are disjoint from those generated from other class domains.
2919
 *
2920
 * The domain that has been made atomic may be larger than specified
2921
 * by the user since it needs to be representable as a single basic set.
2922
 * This possibly larger domain is removed from class_domain by
2923
 * compute_atomic_domain.  It is computed first so that the extended domain
2924
 * would not overlap with any domains computed before.
2925
 * Similary, the unrolled domains may have some constraints removed and
2926
 * may therefore also be larger than specified by the user.
2927
 *
2928
 * If anything is left after handling separate, unroll and atomic,
2929
 * we split it up into basic sets and append the basic sets to domains->list.
2930
 */
2931
static isl_stat compute_partial_domains(struct isl_codegen_domains *domains,
2932
  __isl_take isl_set *class_domain)
2933
66
{
2934
66
  isl_basic_set_list *list;
2935
66
  isl_set *domain;
2936
66
2937
66
  class_domain = isl_set_subtract(class_domain,
2938
66
          isl_set_copy(domains->done));
2939
66
  domains->done = isl_set_union(domains->done,
2940
66
          isl_set_copy(class_domain));
2941
66
2942
66
  class_domain = compute_atomic_domain(domains, class_domain);
2943
66
  class_domain = compute_unroll_domains(domains, class_domain);
2944
66
2945
66
  domain = isl_set_copy(class_domain);
2946
66
2947
66
  if (compute_separate_domain(domains, domain) < 0)
2948
0
    goto error;
2949
66
  domain = isl_set_subtract(domain,
2950
66
      isl_set_copy(domains->option[isl_ast_loop_separate]));
2951
66
2952
66
  domain = isl_set_intersect(domain,
2953
66
        isl_set_copy(domains->schedule_domain));
2954
66
2955
66
  domain = isl_ast_build_eliminate(domains->build, domain);
2956
66
  domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2957
66
2958
66
  domain = isl_set_coalesce(domain);
2959
66
  domain = isl_set_make_disjoint(domain);
2960
66
2961
66
  list = isl_basic_set_list_from_set(domain);
2962
66
  domains->list = isl_basic_set_list_concat(domains->list, list);
2963
66
2964
66
  isl_set_free(class_domain);
2965
66
2966
66
  return isl_stat_ok;
2967
0
error:
2968
0
  isl_set_free(domain);
2969
0
  isl_set_free(class_domain);
2970
0
  return isl_stat_error;
2971
66
}
2972
2973
/* Split up the domain at the current depth into disjoint
2974
 * basic sets for which code should be generated separately
2975
 * for the separation class identified by "pnt".
2976
 *
2977
 * We extract the corresponding class domain from domains->sep_class,
2978
 * eliminate inner dimensions and pass control to compute_partial_domains.
2979
 */
2980
static isl_stat compute_class_domains(__isl_take isl_point *pnt, void *user)
2981
0
{
2982
0
  struct isl_codegen_domains *domains = user;
2983
0
  isl_set *class_set;
2984
0
  isl_set *domain;
2985
0
  int disjoint;
2986
0
2987
0
  class_set = isl_set_from_point(pnt);
2988
0
  domain = isl_map_domain(isl_map_intersect_range(
2989
0
        isl_map_copy(domains->sep_class), class_set));
2990
0
  domain = isl_ast_build_compute_gist(domains->build, domain);
2991
0
  domain = isl_ast_build_eliminate(domains->build, domain);
2992
0
2993
0
  disjoint = isl_set_plain_is_disjoint(domain, domains->schedule_domain);
2994
0
  if (disjoint < 0)
2995
0
    return isl_stat_error;
2996
0
  
if (0
disjoint0
)
{0
2997
0
    isl_set_free(domain);
2998
0
    return isl_stat_ok;
2999
0
  }
3000
0
3001
0
  return compute_partial_domains(domains, domain);
3002
0
}
3003
3004
/* Extract the domains at the current depth that should be atomic,
3005
 * separated or unrolled and store them in option.
3006
 *
3007
 * The domains specified by the user might overlap, so we make
3008
 * them disjoint by subtracting earlier domains from later domains.
3009
 */
3010
static void compute_domains_init_options(isl_set *option[4],
3011
  __isl_keep isl_ast_build *build)
3012
66
{
3013
66
  enum isl_ast_loop_type type, type2;
3014
66
  isl_set *unroll;
3015
66
3016
66
  for (type = isl_ast_loop_atomic;
3017
264
      
type <= isl_ast_loop_separate264
;
++type198
)
{198
3018
198
    option[type] = isl_ast_build_get_option_domain(build, type);
3019
396
    for (type2 = isl_ast_loop_atomic; 
type2 < type396
;
++type2198
)
3020
198
      option[type] = isl_set_subtract(option[type],
3021
198
            isl_set_copy(option[type2]));
3022
198
  }
3023
66
3024
66
  unroll = option[isl_ast_loop_unroll];
3025
66
  unroll = isl_set_coalesce(unroll);
3026
66
  unroll = isl_set_make_disjoint(unroll);
3027
66
  option[isl_ast_loop_unroll] = unroll;
3028
66
}
3029
3030
/* Split up the domain at the current depth into disjoint
3031
 * basic sets for which code should be generated separately,
3032
 * based on the user-specified options.
3033
 * Return the list of disjoint basic sets.
3034
 *
3035
 * There are three kinds of domains that we need to keep track of.
3036
 * - the "schedule domain" is the domain of "executed"
3037
 * - the "class domain" is the domain corresponding to the currrent
3038
 *  separation class
3039
 * - the "option domain" is the domain corresponding to one of the options
3040
 *  atomic, unroll or separate
3041
 *
3042
 * We first consider the individial values of the separation classes
3043
 * and split up the domain for each of them separately.
3044
 * Finally, we consider the remainder.  If no separation classes were
3045
 * specified, then we call compute_partial_domains with the universe
3046
 * "class_domain".  Otherwise, we take the "schedule_domain" as "class_domain",
3047
 * with inner dimensions removed.  We do this because we want to
3048
 * avoid computing the complement of the class domains (i.e., the difference
3049
 * between the universe and domains->done).
3050
 */
3051
static __isl_give isl_basic_set_list *compute_domains(
3052
  __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
3053
66
{
3054
66
  struct isl_codegen_domains domains;
3055
66
  isl_ctx *ctx;
3056
66
  isl_set *domain;
3057
66
  isl_union_set *schedule_domain;
3058
66
  isl_set *classes;
3059
66
  isl_space *space;
3060
66
  int n_param;
3061
66
  enum isl_ast_loop_type type;
3062
66
  int empty;
3063
66
3064
66
  if (!executed)
3065
0
    return NULL;
3066
66
3067
66
  ctx = isl_union_map_get_ctx(executed);
3068
66
  domains.list = isl_basic_set_list_alloc(ctx, 0);
3069
66
3070
66
  schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3071
66
  domain = isl_set_from_union_set(schedule_domain);
3072
66
3073
66
  compute_domains_init_options(domains.option, build);
3074
66
3075
66
  domains.sep_class = isl_ast_build_get_separation_class(build);
3076
66
  classes = isl_map_range(isl_map_copy(domains.sep_class));
3077
66
  n_param = isl_set_dim(classes, isl_dim_param);
3078
66
  classes = isl_set_project_out(classes, isl_dim_param, 0, n_param);
3079
66
3080
66
  space = isl_set_get_space(domain);
3081
66
  domains.build = build;
3082
66
  domains.schedule_domain = isl_set_copy(domain);
3083
66
  domains.executed = executed;
3084
66
  domains.done = isl_set_empty(space);
3085
66
3086
66
  if (isl_set_foreach_point(classes, &compute_class_domains, &domains) < 0)
3087
0
    domains.list = isl_basic_set_list_free(domains.list);
3088
66
  isl_set_free(classes);
3089
66
3090
66
  empty = isl_set_is_empty(domains.done);
3091
66
  if (
empty < 066
)
{0
3092
0
    domains.list = isl_basic_set_list_free(domains.list);
3093
0
    domain = isl_set_free(domain);
3094
66
  } else 
if (66
empty66
)
{66
3095
66
    isl_set_free(domain);
3096
66
    domain = isl_set_universe(isl_set_get_space(domains.done));
3097
0
  } else {
3098
0
    domain = isl_ast_build_eliminate(build, domain);
3099
0
  }
3100
66
  if (compute_partial_domains(&domains, domain) < 0)
3101
0
    domains.list = isl_basic_set_list_free(domains.list);
3102
66
3103
66
  isl_set_free(domains.schedule_domain);
3104
66
  isl_set_free(domains.done);
3105
66
  isl_map_free(domains.sep_class);
3106
264
  for (type = isl_ast_loop_atomic; 
type <= isl_ast_loop_separate264
;
++type198
)
3107
198
    isl_set_free(domains.option[type]);
3108
66
3109
66
  return domains.list;
3110
66
}
3111
3112
/* Generate code for a single component, after shifting (if any)
3113
 * has been applied, in case the schedule was specified as a union map.
3114
 *
3115
 * We first split up the domain at the current depth into disjoint
3116
 * basic sets based on the user-specified options.
3117
 * Then we generated code for each of them and concatenate the results.
3118
 */
3119
static __isl_give isl_ast_graft_list *generate_shifted_component_flat(
3120
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3121
66
{
3122
66
  isl_basic_set_list *domain_list;
3123
66
  isl_ast_graft_list *list = NULL;
3124
66
3125
66
  domain_list = compute_domains(executed, build);
3126
66
  list = generate_parallel_domains(domain_list, executed, build);
3127
66
3128
66
  isl_basic_set_list_free(domain_list);
3129
66
  isl_union_map_free(executed);
3130
66
  isl_ast_build_free(build);
3131
66
3132
66
  return list;
3133
66
}
3134
3135
/* Generate code for a single component, after shifting (if any)
3136
 * has been applied, in case the schedule was specified as a schedule tree
3137
 * and the separate option was specified.
3138
 *
3139
 * We perform separation on the domain of "executed" and then generate
3140
 * an AST for each of the resulting disjoint basic sets.
3141
 */
3142
static __isl_give isl_ast_graft_list *generate_shifted_component_tree_separate(
3143
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3144
0
{
3145
0
  isl_space *space;
3146
0
  isl_set *domain;
3147
0
  isl_basic_set_list *domain_list;
3148
0
  isl_ast_graft_list *list;
3149
0
3150
0
  space = isl_ast_build_get_space(build, 1);
3151
0
  domain = separate_schedule_domains(space,
3152
0
          isl_union_map_copy(executed), build);
3153
0
  domain_list = isl_basic_set_list_from_set(domain);
3154
0
3155
0
  list = generate_parallel_domains(domain_list, executed, build);
3156
0
3157
0
  isl_basic_set_list_free(domain_list);
3158
0
  isl_union_map_free(executed);
3159
0
  isl_ast_build_free(build);
3160
0
3161
0
  return list;
3162
0
}
3163
3164
/* Internal data structure for generate_shifted_component_tree_unroll.
3165
 *
3166
 * "executed" and "build" are inputs to generate_shifted_component_tree_unroll.
3167
 * "list" collects the constructs grafts.
3168
 */
3169
struct isl_ast_unroll_tree_data {
3170
  isl_union_map *executed;
3171
  isl_ast_build *build;
3172
  isl_ast_graft_list *list;
3173
};
3174
3175
/* Initialize data->list to a list of "n" elements.
3176
 */
3177
static int init_unroll_tree(int n, void *user)
3178
700
{
3179
700
  struct isl_ast_unroll_tree_data *data = user;
3180
700
  isl_ctx *ctx;
3181
700
3182
700
  ctx = isl_ast_build_get_ctx(data->build);
3183
700
  data->list = isl_ast_graft_list_alloc(ctx, n);
3184
700
3185
700
  return 0;
3186
700
}
3187
3188
/* Given an iteration of an unrolled domain represented by "bset",
3189
 * generate the corresponding AST and add the result to data->list.
3190
 */
3191
static int do_unroll_tree_iteration(__isl_take isl_basic_set *bset, void *user)
3192
1.33k
{
3193
1.33k
  struct isl_ast_unroll_tree_data *data = user;
3194
1.33k
3195
1.33k
  data->list = add_node(data->list, isl_union_map_copy(data->executed),
3196
1.33k
        bset, isl_ast_build_copy(data->build));
3197
1.33k
3198
1.33k
  return 0;
3199
1.33k
}
3200
3201
/* Generate code for a single component, after shifting (if any)
3202
 * has been applied, in case the schedule was specified as a schedule tree
3203
 * and the unroll option was specified.
3204
 *
3205
 * We call foreach_iteration to iterate over the individual values and
3206
 * construct and collect the corresponding grafts in do_unroll_tree_iteration.
3207
 */
3208
static __isl_give isl_ast_graft_list *generate_shifted_component_tree_unroll(
3209
  __isl_take isl_union_map *executed, __isl_take isl_set *domain,
3210
  __isl_take isl_ast_build *build)
3211
700
{
3212
700
  struct isl_ast_unroll_tree_data data = { executed, build, NULL };
3213
700
3214
700
  if (foreach_iteration(domain, build, &init_unroll_tree,
3215
700
        &do_unroll_tree_iteration, &data) < 0)
3216
0
    data.list = isl_ast_graft_list_free(data.list);
3217
700
3218
700
  isl_union_map_free(executed);
3219
700
  isl_ast_build_free(build);
3220
700
3221
700
  return data.list;
3222
700
}
3223
3224
/* Does "domain" involve a disjunction that is purely based on
3225
 * constraints involving only outer dimension?
3226
 *
3227
 * In particular, is there a disjunction such that the constraints
3228
 * involving the current and later dimensions are the same over
3229
 * all the disjuncts?
3230
 */
3231
static isl_bool has_pure_outer_disjunction(__isl_keep isl_set *domain,
3232
  __isl_keep isl_ast_build *build)
3233
1.21k
{
3234
1.21k
  isl_basic_set *hull;
3235
1.21k
  isl_set *shared, *inner;
3236
1.21k
  isl_bool equal;
3237
1.21k
  int depth, dim;
3238
1.21k
3239
1.21k
  if (isl_set_n_basic_set(domain) <= 1)
3240
683
    return isl_bool_false;
3241
1.21k
3242
534
  inner = isl_set_copy(domain);
3243
534
  depth = isl_ast_build_get_depth(build);
3244
534
  dim = isl_set_dim(inner, isl_dim_set);
3245
534
  inner = isl_set_drop_constraints_not_involving_dims(inner,
3246
534
              isl_dim_set, depth, dim - depth);
3247
534
  hull = isl_set_plain_unshifted_simple_hull(isl_set_copy(inner));
3248
534
  shared = isl_set_from_basic_set(hull);
3249
534
  equal = isl_set_plain_is_equal(inner, shared);
3250
534
  isl_set_free(inner);
3251
534
  isl_set_free(shared);
3252
534
3253
534
  return equal;
3254
1.21k
}
3255
3256
/* Generate code for a single component, after shifting (if any)
3257
 * has been applied, in case the schedule was specified as a schedule tree.
3258
 * In particular, handle the base case where there is either no isolated
3259
 * set or we are within the isolated set (in which case "isolated" is set)
3260
 * or the iterations that precede or follow the isolated set.
3261
 *
3262
 * The schedule domain is broken up or combined into basic sets
3263
 * according to the AST generation option specified in the current
3264
 * schedule node, which may be either atomic, separate, unroll or
3265
 * unspecified.  If the option is unspecified, then we currently simply
3266
 * split the schedule domain into disjoint basic sets.
3267
 *
3268
 * In case the separate option is specified, the AST generation is
3269
 * handled by generate_shifted_component_tree_separate.
3270
 * In the other cases, we need the global schedule domain.
3271
 * In the unroll case, the AST generation is then handled by
3272
 * generate_shifted_component_tree_unroll which needs the actual
3273
 * schedule domain (with divs that may refer to the current dimension)
3274
 * so that stride detection can be performed.
3275
 * In the atomic or unspecified case, inner dimensions and divs involving
3276
 * the current dimensions should be eliminated.
3277
 * The result is then either combined into a single basic set or
3278
 * split up into disjoint basic sets.
3279
 * Finally an AST is generated for each basic set and the results are
3280
 * concatenated.
3281
 *
3282
 * If the schedule domain involves a disjunction that is purely based on
3283
 * constraints involving only outer dimension, then it is treated as
3284
 * if atomic was specified.  This ensures that only a single loop
3285
 * is generated instead of a sequence of identical loops with
3286
 * different guards.
3287
 */
3288
static __isl_give isl_ast_graft_list *generate_shifted_component_tree_base(
3289
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
3290
  int isolated)
3291
1.91k
{
3292
1.91k
  isl_bool outer_disjunction;
3293
1.91k
  isl_union_set *schedule_domain;
3294
1.91k
  isl_set *domain;
3295
1.91k
  isl_basic_set_list *domain_list;
3296
1.91k
  isl_ast_graft_list *list;
3297
1.91k
  enum isl_ast_loop_type type;
3298
1.91k
3299
1.91k
  type = isl_ast_build_get_loop_type(build, isolated);
3300
1.91k
  if (type < 0)
3301
0
    goto error;
3302
1.91k
3303
1.91k
  
if (1.91k
type == isl_ast_loop_separate1.91k
)
3304
0
    return generate_shifted_component_tree_separate(executed,
3305
0
                build);
3306
1.91k
3307
1.91k
  schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3308
1.91k
  domain = isl_set_from_union_set(schedule_domain);
3309
1.91k
3310
1.91k
  if (type == isl_ast_loop_unroll)
3311
700
    return generate_shifted_component_tree_unroll(executed, domain,
3312
700
                build);
3313
1.91k
3314
1.21k
  domain = isl_ast_build_eliminate(build, domain);
3315
1.21k
  domain = isl_set_coalesce(domain);
3316
1.21k
3317
1.21k
  outer_disjunction = has_pure_outer_disjunction(domain, build);
3318
1.21k
  if (outer_disjunction < 0)
3319
0
    domain = isl_set_free(domain);
3320
1.21k
3321
1.21k
  if (
outer_disjunction || 1.21k
type == isl_ast_loop_atomic716
)
{517
3322
517
    isl_basic_set *hull;
3323
517
    hull = isl_set_unshifted_simple_hull(domain);
3324
517
    domain_list = isl_basic_set_list_from_basic_set(hull);
3325
700
  } else {
3326
700
    domain = isl_set_make_disjoint(domain);
3327
700
    domain_list = isl_basic_set_list_from_set(domain);
3328
700
  }
3329
1.21k
3330
1.21k
  list = generate_parallel_domains(domain_list, executed, build);
3331
1.21k
3332
1.21k
  isl_basic_set_list_free(domain_list);
3333
1.21k
  isl_union_map_free(executed);
3334
1.21k
  isl_ast_build_free(build);
3335
1.21k
3336
1.21k
  return list;
3337
0
error:
3338
0
  isl_union_map_free(executed);
3339
0
  isl_ast_build_free(build);
3340
0
  return NULL;
3341
1.91k
}
3342
3343
/* Extract out the disjunction imposed by "domain" on the outer
3344
 * schedule dimensions.
3345
 *
3346
 * In particular, remove all inner dimensions from "domain" (including
3347
 * the current dimension) and then remove the constraints that are shared
3348
 * by all disjuncts in the result.
3349
 */
3350
static __isl_give isl_set *extract_disjunction(__isl_take isl_set *domain,
3351
  __isl_keep isl_ast_build *build)
3352
492
{
3353
492
  isl_set *hull;
3354
492
  int depth, dim;
3355
492
3356
492
  domain = isl_ast_build_specialize(build, domain);
3357
492
  depth = isl_ast_build_get_depth(build);
3358
492
  dim = isl_set_dim(domain, isl_dim_set);
3359
492
  domain = isl_set_eliminate(domain, isl_dim_set, depth, dim - depth);
3360
492
  domain = isl_set_remove_unknown_divs(domain);
3361
492
  hull = isl_set_copy(domain);
3362
492
  hull = isl_set_from_basic_set(isl_set_unshifted_simple_hull(hull));
3363
492
  domain = isl_set_gist(domain, hull);
3364
492
3365
492
  return domain;
3366
492
}
3367
3368
/* Add "guard" to the grafts in "list".
3369
 * "build" is the outer AST build, while "sub_build" includes "guard"
3370
 * in its generated domain.
3371
 *
3372
 * First combine the grafts into a single graft and then add the guard.
3373
 * If the list is empty, or if some error occurred, then simply return
3374
 * the list.
3375
 */
3376
static __isl_give isl_ast_graft_list *list_add_guard(
3377
  __isl_take isl_ast_graft_list *list, __isl_keep isl_set *guard,
3378
  __isl_keep isl_ast_build *build, __isl_keep isl_ast_build *sub_build)
3379
492
{
3380
492
  isl_ast_graft *graft;
3381
492
3382
492
  list = isl_ast_graft_list_fuse(list, sub_build);
3383
492
3384
492
  if (isl_ast_graft_list_n_ast_graft(list) != 1)
3385
488
    return list;
3386
492
3387
4
  graft = isl_ast_graft_list_get_ast_graft(list, 0);
3388
4
  graft = isl_ast_graft_add_guard(graft, isl_set_copy(guard), build);
3389
4
  list = isl_ast_graft_list_set_ast_graft(list, 0, graft);
3390
4
3391
4
  return list;
3392
492
}
3393
3394
/* Generate code for a single component, after shifting (if any)
3395
 * has been applied, in case the schedule was specified as a schedule tree.
3396
 * In particular, do so for the specified subset of the schedule domain.
3397
 *
3398
 * If we are outside of the isolated part, then "domain" may include
3399
 * a disjunction.  Explicitly generate this disjunction at this point
3400
 * instead of relying on the disjunction getting hoisted back up
3401
 * to this level.
3402
 */
3403
static __isl_give isl_ast_graft_list *generate_shifted_component_tree_part(
3404
  __isl_keep isl_union_map *executed, __isl_take isl_set *domain,
3405
  __isl_keep isl_ast_build *build, int isolated)
3406
2.26k
{
3407
2.26k
  isl_union_set *uset;
3408
2.26k
  isl_ast_graft_list *list;
3409
2.26k
  isl_ast_build *sub_build;
3410
2.26k
  int empty;
3411
2.26k
3412
2.26k
  uset = isl_union_set_from_set(isl_set_copy(domain));
3413
2.26k
  executed = isl_union_map_copy(executed);
3414
2.26k
  executed = isl_union_map_intersect_domain(executed, uset);
3415
2.26k
  empty = isl_union_map_is_empty(executed);
3416
2.26k
  if (empty < 0)
3417
0
    goto error;
3418
2.26k
  
if (2.26k
empty2.26k
)
{1.20k
3419
1.20k
    isl_ctx *ctx;
3420
1.20k
    isl_union_map_free(executed);
3421
1.20k
    isl_set_free(domain);
3422
1.20k
    ctx = isl_ast_build_get_ctx(build);
3423
1.20k
    return isl_ast_graft_list_alloc(ctx, 0);
3424
1.20k
  }
3425
2.26k
3426
1.05k
  sub_build = isl_ast_build_copy(build);
3427
1.05k
  if (
!isolated1.05k
)
{492
3428
492
    domain = extract_disjunction(domain, build);
3429
492
    sub_build = isl_ast_build_restrict_generated(sub_build,
3430
492
              isl_set_copy(domain));
3431
492
  }
3432
1.05k
  list = generate_shifted_component_tree_base(executed,
3433
1.05k
        isl_ast_build_copy(sub_build), isolated);
3434
1.05k
  if (!isolated)
3435
492
    list = list_add_guard(list, domain, build, sub_build);
3436
1.05k
  isl_ast_build_free(sub_build);
3437
1.05k
  isl_set_free(domain);
3438
1.05k
  return list;
3439
0
error:
3440
0
  isl_union_map_free(executed);
3441
0
  isl_set_free(domain);
3442
0
  return NULL;
3443
2.26k
}
3444
3445
/* Generate code for a single component, after shifting (if any)
3446
 * has been applied, in case the schedule was specified as a schedule tree.
3447
 * In particular, do so for the specified sequence of subsets
3448
 * of the schedule domain, "before", "isolated", "after" and "other",
3449
 * where only the "isolated" part is considered to be isolated.
3450
 */
3451
static __isl_give isl_ast_graft_list *generate_shifted_component_parts(
3452
  __isl_take isl_union_map *executed, __isl_take isl_set *before,
3453
  __isl_take isl_set *isolated, __isl_take isl_set *after,
3454
  __isl_take isl_set *other, __isl_take isl_ast_build *build)
3455
566
{
3456
566
  isl_ast_graft_list *list, *res;
3457
566
3458
566
  res = generate_shifted_component_tree_part(executed, before, build, 0);
3459
566
  list = generate_shifted_component_tree_part(executed, isolated,
3460
566
                build, 1);
3461
566
  res = isl_ast_graft_list_concat(res, list);
3462
566
  list = generate_shifted_component_tree_part(executed, after, build, 0);
3463
566
  res = isl_ast_graft_list_concat(res, list);
3464
566
  list = generate_shifted_component_tree_part(executed, other, build, 0);
3465
566
  res = isl_ast_graft_list_concat(res, list);
3466
566
3467
566
  isl_union_map_free(executed);
3468
566
  isl_ast_build_free(build);
3469
566
3470
566
  return res;
3471
566
}
3472
3473
/* Does "set" intersect "first", but not "second"?
3474
 */
3475
static isl_bool only_intersects_first(__isl_keep isl_set *set,
3476
  __isl_keep isl_set *first, __isl_keep isl_set *second)
3477
566
{
3478
566
  isl_bool disjoint;
3479
566
3480
566
  disjoint = isl_set_is_disjoint(set, first);
3481
566
  if (disjoint < 0)
3482
0
    return isl_bool_error;
3483
566
  
if (566
disjoint566
)
3484
562
    return isl_bool_false;
3485
566
3486
4
  return isl_set_is_disjoint(set, second);
3487
566
}
3488
3489
/* Generate code for a single component, after shifting (if any)
3490
 * has been applied, in case the schedule was specified as a schedule tree.
3491
 * In particular, do so in case of isolation where there is
3492
 * only an "isolated" part and an "after" part.
3493
 * "dead1" and "dead2" are freed by this function in order to simplify
3494
 * the caller.
3495
 *
3496
 * The "before" and "other" parts are set to empty sets.
3497
 */
3498
static __isl_give isl_ast_graft_list *generate_shifted_component_only_after(
3499
  __isl_take isl_union_map *executed, __isl_take isl_set *isolated,
3500
  __isl_take isl_set *after, __isl_take isl_ast_build *build,
3501
  __isl_take isl_set *dead1, __isl_take isl_set *dead2)
3502
4
{
3503
4
  isl_set *empty;
3504
4
3505
4
  empty = isl_set_empty(isl_set_get_space(after));
3506
4
  isl_set_free(dead1);
3507
4
  isl_set_free(dead2);
3508
4
  return generate_shifted_component_parts(executed, isl_set_copy(empty),
3509
4
            isolated, after, empty, build);
3510
4
}
3511
3512
/* Generate code for a single component, after shifting (if any)
3513
 * has been applied, in case the schedule was specified as a schedule tree.
3514
 *
3515
 * We first check if the user has specified an isolated schedule domain
3516
 * and that we are not already outside of this isolated schedule domain.
3517
 * If so, we break up the schedule domain into iterations that
3518
 * precede the isolated domain, the isolated domain itself,
3519
 * the iterations that follow the isolated domain and
3520
 * the remaining iterations (those that are incomparable
3521
 * to the isolated domain).
3522
 * We generate an AST for each piece and concatenate the results.
3523
 *
3524
 * In the special case where at least one element of the schedule
3525
 * domain that does not belong to the isolated domain needs
3526
 * to be scheduled after this isolated domain, but none of those
3527
 * elements need to be scheduled before, break up the schedule domain
3528
 * in only two parts, the isolated domain, and a part that will be
3529
 * scheduled after the isolated domain.
3530
 *
3531
 * If no isolated set has been specified, then we generate an
3532
 * AST for the entire inverse schedule.
3533
 */
3534
static __isl_give isl_ast_graft_list *generate_shifted_component_tree(
3535
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3536
1.42k
{
3537
1.42k
  int i, depth;
3538
1.42k
  int empty, has_isolate;
3539
1.42k
  isl_space *space;
3540
1.42k
  isl_union_set *schedule_domain;
3541
1.42k
  isl_set *domain;
3542
1.42k
  isl_basic_set *hull;
3543
1.42k
  isl_set *isolated, *before, *after, *test;
3544
1.42k
  isl_map *gt, *lt;
3545
1.42k
  isl_bool pure;
3546
1.42k
3547
1.42k
  build = isl_ast_build_extract_isolated(build);
3548
1.42k
  has_isolate = isl_ast_build_has_isolated(build);
3549
1.42k
  if (has_isolate < 0)
3550
0
    executed = isl_union_map_free(executed);
3551
1.42k
  else 
if (1.42k
!has_isolate1.42k
)
3552
845
    return generate_shifted_component_tree_base(executed, build, 0);
3553
1.42k
3554
580
  schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3555
580
  domain = isl_set_from_union_set(schedule_domain);
3556
580
3557
580
  isolated = isl_ast_build_get_isolated(build);
3558
580
  isolated = isl_set_intersect(isolated, isl_set_copy(domain));
3559
580
  test = isl_ast_build_specialize(build, isl_set_copy(isolated));
3560
580
  empty = isl_set_is_empty(test);
3561
580
  isl_set_free(test);
3562
580
  if (empty < 0)
3563
0
    goto error;
3564
580
  
if (580
empty580
)
{14
3565
14
    isl_set_free(isolated);
3566
14
    isl_set_free(domain);
3567
14
    return generate_shifted_component_tree_base(executed, build, 0);
3568
14
  }
3569
566
  isolated = isl_ast_build_eliminate(build, isolated);
3570
566
  hull = isl_set_unshifted_simple_hull(isolated);
3571
566
  isolated = isl_set_from_basic_set(hull);
3572
566
3573
566
  depth = isl_ast_build_get_depth(build);
3574
566
  space = isl_space_map_from_set(isl_set_get_space(isolated));
3575
566
  gt = isl_map_universe(space);
3576
4.88k
  for (i = 0; 
i < depth4.88k
;
++i4.32k
)
3577
4.32k
    gt = isl_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
3578
566
  gt = isl_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
3579
566
  lt = isl_map_reverse(isl_map_copy(gt));
3580
566
  before = isl_set_apply(isl_set_copy(isolated), gt);
3581
566
  after = isl_set_apply(isl_set_copy(isolated), lt);
3582
566
3583
566
  domain = isl_set_subtract(domain, isl_set_copy(isolated));
3584
566
  pure = only_intersects_first(domain, after, before);
3585
566
  if (pure < 0)
3586
0
    executed = isl_union_map_free(executed);
3587
566
  else 
if (566
pure566
)
3588
4
    return generate_shifted_component_only_after(executed, isolated,
3589
4
            domain, build, before, after);
3590
562
  domain = isl_set_subtract(domain, isl_set_copy(before));
3591
562
  domain = isl_set_subtract(domain, isl_set_copy(after));
3592
562
  after = isl_set_subtract(after, isl_set_copy(isolated));
3593
562
  after = isl_set_subtract(after, isl_set_copy(before));
3594
562
  before = isl_set_subtract(before, isl_set_copy(isolated));
3595
562
3596
562
  return generate_shifted_component_parts(executed, before, isolated,
3597
562
            after, domain, build);
3598
0
error:
3599
0
  isl_set_free(domain);
3600
0
  isl_set_free(isolated);
3601
0
  isl_union_map_free(executed);
3602
0
  isl_ast_build_free(build);
3603
0
  return NULL;
3604
566
}
3605
3606
/* Generate code for a single component, after shifting (if any)
3607
 * has been applied.
3608
 *
3609
 * Call generate_shifted_component_tree or generate_shifted_component_flat
3610
 * depending on whether the schedule was specified as a schedule tree.
3611
 */
3612
static __isl_give isl_ast_graft_list *generate_shifted_component(
3613
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3614
1.49k
{
3615
1.49k
  if (isl_ast_build_has_schedule_node(build))
3616
1.42k
    return generate_shifted_component_tree(executed, build);
3617
1.49k
  else
3618
66
    return generate_shifted_component_flat(executed, build);
3619
1.49k
}
3620
3621
struct isl_set_map_pair {
3622
  isl_set *set;
3623
  isl_map *map;
3624
};
3625
3626
/* Given an array "domain" of isl_set_map_pairs and an array "order"
3627
 * of indices into the "domain" array,
3628
 * return the union of the "map" fields of the elements
3629
 * indexed by the first "n" elements of "order".
3630
 */
3631
static __isl_give isl_union_map *construct_component_executed(
3632
  struct isl_set_map_pair *domain, int *order, int n)
3633
106
{
3634
106
  int i;
3635
106
  isl_map *map;
3636
106
  isl_union_map *executed;
3637
106
3638
106
  map = isl_map_copy(domain[order[0]].map);
3639
106
  executed = isl_union_map_from_map(map);
3640
196
  for (i = 1; 
i < n196
;
++i90
)
{90
3641
90
    map = isl_map_copy(domain[order[i]].map);
3642
90
    executed = isl_union_map_add_map(executed, map);
3643
90
  }
3644
106
3645
106
  return executed;
3646
106
}
3647
3648
/* Generate code for a single component, after shifting (if any)
3649
 * has been applied.
3650
 *
3651
 * The component inverse schedule is specified as the "map" fields
3652
 * of the elements of "domain" indexed by the first "n" elements of "order".
3653
 */
3654
static __isl_give isl_ast_graft_list *generate_shifted_component_from_list(
3655
  struct isl_set_map_pair *domain, int *order, int n,
3656
  __isl_take isl_ast_build *build)
3657
106
{
3658
106
  isl_union_map *executed;
3659
106
3660
106
  executed = construct_component_executed(domain, order, n);
3661
106
  return generate_shifted_component(executed, build);
3662
106
}
3663
3664
/* Does set dimension "pos" of "set" have an obviously fixed value?
3665
 */
3666
static int dim_is_fixed(__isl_keep isl_set *set, int pos)
3667
265
{
3668
265
  int fixed;
3669
265
  isl_val *v;
3670
265
3671
265
  v = isl_set_plain_get_val_if_fixed(set, isl_dim_set, pos);
3672
265
  if (!v)
3673
0
    return -1;
3674
265
  fixed = !isl_val_is_nan(v);
3675
265
  isl_val_free(v);
3676
265
3677
265
  return fixed;
3678
265
}
3679
3680
/* Given an array "domain" of isl_set_map_pairs and an array "order"
3681
 * of indices into the "domain" array,
3682
 * do all (except for at most one) of the "set" field of the elements
3683
 * indexed by the first "n" elements of "order" have a fixed value
3684
 * at position "depth"?
3685
 */
3686
static int at_most_one_non_fixed(struct isl_set_map_pair *domain,
3687
  int *order, int n, int depth)
3688
58
{
3689
58
  int i;
3690
58
  int non_fixed = -1;
3691
58
3692
129
  for (i = 0; 
i < n129
;
++i71
)
{121
3693
121
    int f;
3694
121
3695
121
    f = dim_is_fixed(domain[order[i]].set, depth);
3696
121
    if (f < 0)
3697
0
      return -1;
3698
121
    
if (121
f121
)
3699
16
      continue;
3700
105
    
if (105
non_fixed >= 0105
)
3701
50
      return 0;
3702
55
    non_fixed = i;
3703
55
  }
3704
58
3705
8
  return 1;
3706
58
}
3707
3708
/* Given an array "domain" of isl_set_map_pairs and an array "order"
3709
 * of indices into the "domain" array,
3710
 * eliminate the inner dimensions from the "set" field of the elements
3711
 * indexed by the first "n" elements of "order", provided the current
3712
 * dimension does not have a fixed value.
3713
 *
3714
 * Return the index of the first element in "order" with a corresponding
3715
 * "set" field that does not have an (obviously) fixed value.
3716
 */
3717
static int eliminate_non_fixed(struct isl_set_map_pair *domain,
3718
  int *order, int n, int depth, __isl_keep isl_ast_build *build)
3719
50
{
3720
50
  int i;
3721
50
  int base = -1;
3722
50
3723
184
  for (i = n - 1; 
i >= 0184
;
--i134
)
{134
3724
134
    int f;
3725
134
    f = dim_is_fixed(domain[order[i]].set, depth);
3726
134
    if (f < 0)
3727
0
      return -1;
3728
134
    
if (134
f134
)
3729
1
      continue;
3730
133
    domain[order[i]].set = isl_ast_build_eliminate_inner(build,
3731
133
              domain[order[i]].set);
3732
133
    base = i;
3733
133
  }
3734
50
3735
50
  return base;
3736
50
}
3737
3738
/* Given an array "domain" of isl_set_map_pairs and an array "order"
3739
 * of indices into the "domain" array,
3740
 * find the element of "domain" (amongst those indexed by the first "n"
3741
 * elements of "order") with the "set" field that has the smallest
3742
 * value for the current iterator.
3743
 *
3744
 * Note that the domain with the smallest value may depend on the parameters
3745
 * and/or outer loop dimension.  Since the result of this function is only
3746
 * used as heuristic, we only make a reasonable attempt at finding the best
3747
 * domain, one that should work in case a single domain provides the smallest
3748
 * value for the current dimension over all values of the parameters
3749
 * and outer dimensions.
3750
 *
3751
 * In particular, we compute the smallest value of the first domain
3752
 * and replace it by that of any later domain if that later domain
3753
 * has a smallest value that is smaller for at least some value
3754
 * of the parameters and outer dimensions.
3755
 */
3756
static int first_offset(struct isl_set_map_pair *domain, int *order, int n,
3757
  __isl_keep isl_ast_build *build)
3758
3
{
3759
3
  int i;
3760
3
  isl_map *min_first;
3761
3
  int first = 0;
3762
3
3763
3
  min_first = isl_ast_build_map_to_iterator(build,
3764
3
          isl_set_copy(domain[order[0]].set));
3765
3
  min_first = isl_map_lexmin(min_first);
3766
3
3767
9
  for (i = 1; 
i < n9
;
++i6
)
{6
3768
6
    isl_map *min, *test;
3769
6
    int empty;
3770
6
3771
6
    min = isl_ast_build_map_to_iterator(build,
3772
6
          isl_set_copy(domain[order[i]].set));
3773
6
    min = isl_map_lexmin(min);
3774
6
    test = isl_map_copy(min);
3775
6
    test = isl_map_apply_domain(isl_map_copy(min_first), test);
3776
6
    test = isl_map_order_lt(test, isl_dim_in, 0, isl_dim_out, 0);
3777
6
    empty = isl_map_is_empty(test);
3778
6
    isl_map_free(test);
3779
6
    if (
empty >= 0 && 6
!empty6
)
{3
3780
3
      isl_map_free(min_first);
3781
3
      first = i;
3782
3
      min_first = min;
3783
3
    } else
3784
3
      isl_map_free(min);
3785
6
3786
6
    if (empty < 0)
3787
0
      break;
3788
6
  }
3789
3
3790
3
  isl_map_free(min_first);
3791
3
3792
3
  return i < n ? 
-10
:
first3
;
3793
3
}
3794
3795
/* Construct a shifted inverse schedule based on the original inverse schedule,
3796
 * the stride and the offset.
3797
 *
3798
 * The original inverse schedule is specified as the "map" fields
3799
 * of the elements of "domain" indexed by the first "n" elements of "order".
3800
 *
3801
 * "stride" and "offset" are such that the difference
3802
 * between the values of the current dimension of domain "i"
3803
 * and the values of the current dimension for some reference domain are
3804
 * equal to
3805
 *
3806
 *  stride * integer + offset[i]
3807
 *
3808
 * Moreover, 0 <= offset[i] < stride.
3809
 *
3810
 * For each domain, we create a map
3811
 *
3812
 *  { [..., j, ...] -> [..., j - offset[i], offset[i], ....] }
3813
 *
3814
 * where j refers to the current dimension and the other dimensions are
3815
 * unchanged, and apply this map to the original schedule domain.
3816
 *
3817
 * For example, for the original schedule
3818
 *
3819
 *  { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
3820
 *
3821
 * and assuming the offset is 0 for the A domain and 1 for the B domain,
3822
 * we apply the mapping
3823
 *
3824
 *  { [j] -> [j, 0] }
3825
 *
3826
 * to the schedule of the "A" domain and the mapping
3827
 *
3828
 *  { [j - 1] -> [j, 1] }
3829
 *
3830
 * to the schedule of the "B" domain.
3831
 *
3832
 *
3833
 * Note that after the transformation, the differences between pairs
3834
 * of values of the current dimension over all domains are multiples
3835
 * of stride and that we have therefore exposed the stride.
3836
 *
3837
 *
3838
 * To see that the mapping preserves the lexicographic order,
3839
 * first note that each of the individual maps above preserves the order.
3840
 * If the value of the current iterator is j1 in one domain and j2 in another,
3841
 * then if j1 = j2, we know that the same map is applied to both domains
3842
 * and the order is preserved.
3843
 * Otherwise, let us assume, without loss of generality, that j1 < j2.
3844
 * If c1 >= c2 (with c1 and c2 the corresponding offsets), then
3845
 *
3846
 *  j1 - c1 < j2 - c2
3847
 *
3848
 * and the order is preserved.
3849
 * If c1 < c2, then we know
3850
 *
3851
 *  0 <= c2 - c1 < s
3852
 *
3853
 * We also have
3854
 *
3855
 *  j2 - j1 = n * s + r
3856
 *
3857
 * with n >= 0 and 0 <= r < s.
3858
 * In other words, r = c2 - c1.
3859
 * If n > 0, then
3860
 *
3861
 *  j1 - c1 < j2 - c2
3862
 *
3863
 * If n = 0, then
3864
 *
3865
 *  j1 - c1 = j2 - c2
3866
 *
3867
 * and so
3868
 *
3869
 *  (j1 - c1, c1) << (j2 - c2, c2)
3870
 *
3871
 * with "<<" the lexicographic order, proving that the order is preserved
3872
 * in all cases.
3873
 */
3874
static __isl_give isl_union_map *contruct_shifted_executed(
3875
  struct isl_set_map_pair *domain, int *order, int n,
3876
  __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
3877
  __isl_take isl_ast_build *build)
3878
3
{
3879
3
  int i;
3880
3
  isl_union_map *executed;
3881
3
  isl_space *space;
3882
3
  isl_map *map;
3883
3
  int depth;
3884
3
  isl_constraint *c;
3885
3
3886
3
  depth = isl_ast_build_get_depth(build);
3887
3
  space = isl_ast_build_get_space(build, 1);
3888
3
  executed = isl_union_map_empty(isl_space_copy(space));
3889
3
  space = isl_space_map_from_set(space);
3890
3
  map = isl_map_identity(isl_space_copy(space));
3891
3
  map = isl_map_eliminate(map, isl_dim_out, depth, 1);
3892
3
  map = isl_map_insert_dims(map, isl_dim_out, depth + 1, 1);
3893
3
  space = isl_space_insert_dims(space, isl_dim_out, depth + 1, 1);
3894
3
3895
3
  c = isl_constraint_alloc_equality(isl_local_space_from_space(space));
3896
3
  c = isl_constraint_set_coefficient_si(c, isl_dim_in, depth, 1);
3897
3
  c = isl_constraint_set_coefficient_si(c, isl_dim_out, depth, -1);
3898
3
3899
12
  for (i = 0; 
i < n12
;
++i9
)
{9
3900
9
    isl_map *map_i;
3901
9
    isl_val *v;
3902
9
3903
9
    v = isl_multi_val_get_val(offset, i);
3904
9
    if (!v)
3905
0
      break;
3906
9
    map_i = isl_map_copy(map);
3907
9
    map_i = isl_map_fix_val(map_i, isl_dim_out, depth + 1,
3908
9
          isl_val_copy(v));
3909
9
    v = isl_val_neg(v);
3910
9
    c = isl_constraint_set_constant_val(c, v);
3911
9
    map_i = isl_map_add_constraint(map_i, isl_constraint_copy(c));
3912
9
3913
9
    map_i = isl_map_apply_domain(isl_map_copy(domain[order[i]].map),
3914
9
            map_i);
3915
9
    executed = isl_union_map_add_map(executed, map_i);
3916
9
  }
3917
3
3918
3
  isl_constraint_free(c);
3919
3
  isl_map_free(map);
3920
3
3921
3
  if (i < n)
3922
0
    executed = isl_union_map_free(executed);
3923
3
3924
3
  return executed;
3925
3
}
3926
3927
/* Generate code for a single component, after exposing the stride,
3928
 * given that the schedule domain is "shifted strided".
3929
 *
3930
 * The component inverse schedule is specified as the "map" fields
3931
 * of the elements of "domain" indexed by the first "n" elements of "order".
3932
 *
3933
 * The schedule domain being "shifted strided" means that the differences
3934
 * between the values of the current dimension of domain "i"
3935
 * and the values of the current dimension for some reference domain are
3936
 * equal to
3937
 *
3938
 *  stride * integer + offset[i]
3939
 *
3940
 * We first look for the domain with the "smallest" value for the current
3941
 * dimension and adjust the offsets such that the offset of the "smallest"
3942
 * domain is equal to zero.  The other offsets are reduced modulo stride.
3943
 *
3944
 * Based on this information, we construct a new inverse schedule in
3945
 * contruct_shifted_executed that exposes the stride.
3946
 * Since this involves the introduction of a new schedule dimension,
3947
 * the build needs to be changed accodingly.
3948
 * After computing the AST, the newly introduced dimension needs
3949
 * to be removed again from the list of grafts.  We do this by plugging
3950
 * in a mapping that represents the new schedule domain in terms of the
3951
 * old schedule domain.
3952
 */
3953
static __isl_give isl_ast_graft_list *generate_shift_component(
3954
  struct isl_set_map_pair *domain, int *order, int n,
3955
  __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
3956
  __isl_take isl_ast_build *build)
3957
3
{
3958
3
  isl_ast_graft_list *list;
3959
3
  int first;
3960
3
  int depth;
3961
3
  isl_val *val;
3962
3
  isl_multi_val *mv;
3963
3
  isl_space *space;
3964
3
  isl_multi_aff *ma, *zero;
3965
3
  isl_union_map *executed;
3966
3
3967
3
  depth = isl_ast_build_get_depth(build);
3968
3
3969
3
  first = first_offset(domain, order, n, build);
3970
3
  if (first < 0)
3971
0
    goto error;
3972
3
3973
3
  mv = isl_multi_val_copy(offset);
3974
3
  val = isl_multi_val_get_val(offset, first);
3975
3
  val = isl_val_neg(val);
3976
3
  mv = isl_multi_val_add_val(mv, val);
3977
3
  mv = isl_multi_val_mod_val(mv, isl_val_copy(stride));
3978
3
3979
3
  executed = contruct_shifted_executed(domain, order, n, stride, mv,
3980
3
            build);
3981
3
  space = isl_ast_build_get_space(build, 1);
3982
3
  space = isl_space_map_from_set(space);
3983
3
  ma = isl_multi_aff_identity(isl_space_copy(space));
3984
3
  space = isl_space_from_domain(isl_space_domain(space));
3985
3
  space = isl_space_add_dims(space, isl_dim_out, 1);
3986
3
  zero = isl_multi_aff_zero(space);
3987
3
  ma = isl_multi_aff_range_splice(ma, depth + 1, zero);
3988
3
  build = isl_ast_build_insert_dim(build, depth + 1);
3989
3
  list = generate_shifted_component(executed, build);
3990
3
3991
3
  list = isl_ast_graft_list_preimage_multi_aff(list, ma);
3992
3
3993
3
  isl_multi_val_free(mv);
3994
3
3995
3
  return list;
3996
0
error:
3997
0
  isl_ast_build_free(build);
3998
0
  return NULL;
3999
3
}
4000
4001
/* Does any node in the schedule tree rooted at the current schedule node
4002
 * of "build" depend on outer schedule nodes?
4003
 */
4004
static int has_anchored_subtree(__isl_keep isl_ast_build *build)
4005
49
{
4006
49
  isl_schedule_node *node;
4007
49
  int dependent = 0;
4008
49
4009
49
  node = isl_ast_build_get_schedule_node(build);
4010
49
  dependent = isl_schedule_node_is_subtree_anchored(node);
4011
49
  isl_schedule_node_free(node);
4012
49
4013
49
  return dependent;
4014
49
}
4015
4016
/* Generate code for a single component.
4017
 *
4018
 * The component inverse schedule is specified as the "map" fields
4019
 * of the elements of "domain" indexed by the first "n" elements of "order".
4020
 *
4021
 * This function may modify the "set" fields of "domain".
4022
 *
4023
 * Before proceeding with the actual code generation for the component,
4024
 * we first check if there are any "shifted" strides, meaning that
4025
 * the schedule domains of the individual domains are all strided,
4026
 * but that they have different offsets, resulting in the union
4027
 * of schedule domains not being strided anymore.
4028
 *
4029
 * The simplest example is the schedule
4030
 *
4031
 *  { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
4032
 *
4033
 * Both schedule domains are strided, but their union is not.
4034
 * This function detects such cases and then rewrites the schedule to
4035
 *
4036
 *  { A[i] -> [2i, 0]: 0 <= i < 10; B[i] -> [2i, 1] : 0 <= i < 10 }
4037
 *
4038
 * In the new schedule, the schedule domains have the same offset (modulo
4039
 * the stride), ensuring that the union of schedule domains is also strided.
4040
 *
4041
 *
4042
 * If there is only a single domain in the component, then there is
4043
 * nothing to do.   Similarly, if the current schedule dimension has
4044
 * a fixed value for almost all domains then there is nothing to be done.
4045
 * In particular, we need at least two domains where the current schedule
4046
 * dimension does not have a fixed value.
4047
 * Finally, in case of a schedule map input,
4048
 * if any of the options refer to the current schedule dimension,
4049
 * then we bail out as well.  It would be possible to reformulate the options
4050
 * in terms of the new schedule domain, but that would introduce constraints
4051
 * that separate the domains in the options and that is something we would
4052
 * like to avoid.
4053
 * In the case of a schedule tree input, we bail out if any of
4054
 * the descendants of the current schedule node refer to outer
4055
 * schedule nodes in any way.
4056
 *
4057
 *
4058
 * To see if there is any shifted stride, we look at the differences
4059
 * between the values of the current dimension in pairs of domains
4060
 * for equal values of outer dimensions.  These differences should be
4061
 * of the form
4062
 *
4063
 *  m x + r
4064
 *
4065
 * with "m" the stride and "r" a constant.  Note that we cannot perform
4066
 * this analysis on individual domains as the lower bound in each domain
4067
 * may depend on parameters or outer dimensions and so the current dimension
4068
 * itself may not have a fixed remainder on division by the stride.
4069
 *
4070
 * In particular, we compare the first domain that does not have an
4071
 * obviously fixed value for the current dimension to itself and all
4072
 * other domains and collect the offsets and the gcd of the strides.
4073
 * If the gcd becomes one, then we failed to find shifted strides.
4074
 * If the gcd is zero, then the differences were all fixed, meaning
4075
 * that some domains had non-obviously fixed values for the current dimension.
4076
 * If all the offsets are the same (for those domains that do not have
4077
 * an obviously fixed value for the current dimension), then we do not
4078
 * apply the transformation.
4079
 * If none of the domains were skipped, then there is nothing to do.
4080
 * If some of them were skipped, then if we apply separation, the schedule
4081
 * domain should get split in pieces with a (non-shifted) stride.
4082
 *
4083
 * Otherwise, we apply a shift to expose the stride in
4084
 * generate_shift_component.
4085
 */
4086
static __isl_give isl_ast_graft_list *generate_component(
4087
  struct isl_set_map_pair *domain, int *order, int n,
4088
  __isl_take isl_ast_build *build)
4089
109
{
4090
109
  int i, d;
4091
109
  int depth;
4092
109
  isl_ctx *ctx;
4093
109
  isl_map *map;
4094
109
  isl_set *deltas;
4095
109
  isl_val *gcd = NULL;
4096
109
  isl_multi_val *mv;
4097
109
  int fixed, skip;
4098
109
  int base;
4099
109
  isl_ast_graft_list *list;
4100
109
  int res = 0;
4101
109
4102
109
  depth = isl_ast_build_get_depth(build);
4103
109
4104
109
  skip = n == 1;
4105
109
  if (
skip >= 0 && 109
!skip109
)
4106
58
    skip = at_most_one_non_fixed(domain, order, n, depth);
4107
109
  if (
skip >= 0 && 109
!skip109
)
{50
4108
50
    if (isl_ast_build_has_schedule_node(build))
4109
49
      skip = has_anchored_subtree(build);
4110
50
    else
4111
1
      skip = isl_ast_build_options_involve_depth(build);
4112
50
  }
4113
109
  if (skip < 0)
4114
0
    goto error;
4115
109
  
if (109
skip109
)
4116
59
    return generate_shifted_component_from_list(domain,
4117
59
                  order, n, build);
4118
109
4119
50
  base = eliminate_non_fixed(domain, order, n, depth, build);
4120
50
  if (base < 0)
4121
0
    goto error;
4122
50
4123
50
  ctx = isl_ast_build_get_ctx(build);
4124
50
4125
50
  mv = isl_multi_val_zero(isl_space_set_alloc(ctx, 0, n));
4126
50
4127
50
  fixed = 1;
4128
60
  for (i = 0; 
i < n60
;
++i10
)
{57
4129
57
    isl_val *r, *m;
4130
57
4131
57
    map = isl_map_from_domain_and_range(
4132
57
          isl_set_copy(domain[order[base]].set),
4133
57
          isl_set_copy(domain[order[i]].set));
4134
77
    for (d = 0; 
d < depth77
;
++d20
)
4135
20
      map = isl_map_equate(map, isl_dim_in, d,
4136
20
                isl_dim_out, d);
4137
57
    deltas = isl_map_deltas(map);
4138
57
    res = isl_set_dim_residue_class_val(deltas, depth, &m, &r);
4139
57
    isl_set_free(deltas);
4140
57
    if (res < 0)
4141
0
      break;
4142
57
4143
57
    
if (57
i == 057
)
4144
50
      gcd = m;
4145
57
    else
4146
7
      gcd = isl_val_gcd(gcd, m);
4147
57
    if (
isl_val_is_one(gcd)57
)
{47
4148
47
      isl_val_free(r);
4149
47
      break;
4150
47
    }
4151
10
    mv = isl_multi_val_set_val(mv, i, r);
4152
10
4153
10
    res = dim_is_fixed(domain[order[i]].set, depth);
4154
10
    if (res < 0)
4155
0
      break;
4156
10
    
if (10
res10
)
4157
0
      continue;
4158
10
4159
10
    
if (10
fixed && 10
i > base7
)
{3
4160
3
      isl_val *a, *b;
4161
3
      a = isl_multi_val_get_val(mv, i);
4162
3
      b = isl_multi_val_get_val(mv, base);
4163
3
      if (isl_val_ne(a, b))
4164
3
        fixed = 0;
4165
3
      isl_val_free(a);
4166
3
      isl_val_free(b);
4167
3
    }
4168
10
  }
4169
50
4170
50
  if (
res < 0 || 50
!gcd50
)
{0
4171
0
    isl_ast_build_free(build);
4172
0
    list = NULL;
4173
50
  } else 
if (50
i < n || 50
fixed3
||
isl_val_is_zero(gcd)3
)
{47
4174
47
    list = generate_shifted_component_from_list(domain,
4175
47
                  order, n, build);
4176
3
  } else {
4177
3
    list = generate_shift_component(domain, order, n, gcd, mv,
4178
3
            build);
4179
3
  }
4180
50
4181
50
  isl_val_free(gcd);
4182
50
  isl_multi_val_free(mv);
4183
50
4184
50
  return list;
4185
0
error:
4186
0
  isl_ast_build_free(build);
4187
0
  return NULL;
4188
50
}
4189
4190
/* Store both "map" itself and its domain in the
4191
 * structure pointed to by *next and advance to the next array element.
4192
 */
4193
static isl_stat extract_domain(__isl_take isl_map *map, void *user)
4194
205
{
4195
205
  struct isl_set_map_pair **next = user;
4196
205
4197
205
  (*next)->map = isl_map_copy(map);
4198
205
  (*next)->set = isl_map_domain(map);
4199
205
  (*next)++;
4200
205
4201
205
  return isl_stat_ok;
4202
205
}
4203
4204
static int after_in_tree(__isl_keep isl_union_map *umap,
4205
  __isl_keep isl_schedule_node *node);
4206
4207
/* Is any domain element of "umap" scheduled after any of
4208
 * the corresponding image elements by the tree rooted at
4209
 * the child of "node"?
4210
 */
4211
static int after_in_child(__isl_keep isl_union_map *umap,
4212
  __isl_keep isl_schedule_node *node)
4213
22
{
4214
22
  isl_schedule_node *child;
4215
22
  int after;
4216
22
4217
22
  child = isl_schedule_node_get_child(node, 0);
4218
22
  after = after_in_tree(umap, child);
4219
22
  isl_schedule_node_free(child);
4220
22
4221
22
  return after;
4222
22
}
4223
4224
/* Is any domain element of "umap" scheduled after any of
4225
 * the corresponding image elements by the tree rooted at
4226
 * the band node "node"?
4227
 *
4228
 * We first check if any domain element is scheduled after any
4229
 * of the corresponding image elements by the band node itself.
4230
 * If not, we restrict "map" to those pairs of element that
4231
 * are scheduled together by the band node and continue with
4232
 * the child of the band node.
4233
 * If there are no such pairs then the map passed to after_in_child
4234
 * will be empty causing it to return 0.
4235
 */
4236
static int after_in_band(__isl_keep isl_union_map *umap,
4237
  __isl_keep isl_schedule_node *node)
4238
0
{
4239
0
  isl_multi_union_pw_aff *mupa;
4240
0
  isl_union_map *partial, *test, *gt, *universe, *umap1, *umap2;
4241
0
  isl_union_set *domain, *range;
4242
0
  isl_space *space;
4243
0
  int empty;
4244
0
  int after;
4245
0
4246
0
  if (isl_schedule_node_band_n_member(node) == 0)
4247
0
    return after_in_child(umap, node);
4248
0
4249
0
  mupa = isl_schedule_node_band_get_partial_schedule(node);
4250
0
  space = isl_multi_union_pw_aff_get_space(mupa);
4251
0
  partial = isl_union_map_from_multi_union_pw_aff(mupa);
4252
0
  test = isl_union_map_copy(umap);
4253
0
  test = isl_union_map_apply_domain(test, isl_union_map_copy(partial));
4254
0
  test = isl_union_map_apply_range(test, isl_union_map_copy(partial));
4255
0
  gt = isl_union_map_from_map(isl_map_lex_gt(space));
4256
0
  test = isl_union_map_intersect(test, gt);
4257
0
  empty = isl_union_map_is_empty(test);
4258
0
  isl_union_map_free(test);
4259
0
4260
0
  if (
empty < 0 || 0
!empty0
)
{0
4261
0
    isl_union_map_free(partial);
4262
0
    return empty < 0 ? 
-10
:
10
;
4263
0
  }
4264
0
4265
0
  universe = isl_union_map_universe(isl_union_map_copy(umap));
4266
0
  domain = isl_union_map_domain(isl_union_map_copy(universe));
4267
0
  range = isl_union_map_range(universe);
4268
0
  umap1 = isl_union_map_copy(partial);
4269
0
  umap1 = isl_union_map_intersect_domain(umap1, domain);
4270
0
  umap2 = isl_union_map_intersect_domain(partial, range);
4271
0
  test = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
4272
0
  test = isl_union_map_intersect(test, isl_union_map_copy(umap));
4273
0
  after = after_in_child(test, node);
4274
0
  isl_union_map_free(test);
4275
0
  return after;
4276
0
}
4277
4278
/* Is any domain element of "umap" scheduled after any of
4279
 * the corresponding image elements by the tree rooted at
4280
 * the context node "node"?
4281
 *
4282
 * The context constraints apply to the schedule domain,
4283
 * so we cannot apply them directly to "umap", which contains
4284
 * pairs of statement instances.  Instead, we add them
4285
 * to the range of the prefix schedule for both domain and
4286
 * range of "umap".
4287
 */
4288
static int after_in_context(__isl_keep isl_union_map *umap,
4289
  __isl_keep isl_schedule_node *node)
4290
0
{
4291
0
  isl_union_map *prefix, *universe, *umap1, *umap2;
4292
0
  isl_union_set *domain, *range;
4293
0
  isl_set *context;
4294
0
  int after;
4295
0
4296
0
  umap = isl_union_map_copy(umap);
4297
0
  context = isl_schedule_node_context_get_context(node);
4298
0
  prefix = isl_schedule_node_get_prefix_schedule_union_map(node);
4299
0
  universe = isl_union_map_universe(isl_union_map_copy(umap));
4300
0
  domain = isl_union_map_domain(isl_union_map_copy(universe));
4301
0
  range = isl_union_map_range(universe);
4302
0
  umap1 = isl_union_map_copy(prefix);
4303
0
  umap1 = isl_union_map_intersect_domain(umap1, domain);
4304
0
  umap2 = isl_union_map_intersect_domain(prefix, range);
4305
0
  umap1 = isl_union_map_intersect_range(umap1,
4306
0
              isl_union_set_from_set(context));
4307
0
  umap1 = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
4308
0
  umap = isl_union_map_intersect(umap, umap1);
4309
0
4310
0
  after = after_in_child(umap, node);
4311
0
4312
0
  isl_union_map_free(umap);
4313
0
4314
0
  return after;
4315
0
}
4316
4317
/* Is any domain element of "umap" scheduled after any of
4318
 * the corresponding image elements by the tree rooted at
4319
 * the expansion node "node"?
4320
 *
4321
 * We apply the expansion to domain and range of "umap" and
4322
 * continue with its child.
4323
 */
4324
static int after_in_expansion(__isl_keep isl_union_map *umap,
4325
  __isl_keep isl_schedule_node *node)
4326
0
{
4327
0
  isl_union_map *expansion;
4328
0
  int after;
4329
0
4330
0
  expansion = isl_schedule_node_expansion_get_expansion(node);
4331
0
  umap = isl_union_map_copy(umap);
4332
0
  umap = isl_union_map_apply_domain(umap, isl_union_map_copy(expansion));
4333
0
  umap = isl_union_map_apply_range(umap, expansion);
4334
0
4335
0
  after = after_in_child(umap, node);
4336
0
4337
0
  isl_union_map_free(umap);
4338
0
4339
0
  return after;
4340
0
}
4341
4342
/* Is any domain element of "umap" scheduled after any of
4343
 * the corresponding image elements by the tree rooted at
4344
 * the extension node "node"?
4345
 *
4346
 * Since the extension node may add statement instances before or
4347
 * after the pairs of statement instances in "umap", we return 1
4348
 * to ensure that these pairs are not broken up.
4349
 */
4350
static int after_in_extension(__isl_keep isl_union_map *umap,
4351
  __isl_keep isl_schedule_node *node)
4352
0
{
4353
0
  return 1;
4354
0
}
4355
4356
/* Is any domain element of "umap" scheduled after any of
4357
 * the corresponding image elements by the tree rooted at
4358
 * the filter node "node"?
4359
 *
4360
 * We intersect domain and range of "umap" with the filter and
4361
 * continue with its child.
4362
 */
4363
static int after_in_filter(__isl_keep isl_union_map *umap,
4364
  __isl_keep isl_schedule_node *node)
4365
22
{
4366
22
  isl_union_set *filter;
4367
22
  int after;
4368
22
4369
22
  umap = isl_union_map_copy(umap);
4370
22
  filter = isl_schedule_node_filter_get_filter(node);
4371
22
  umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(filter));
4372
22
  umap = isl_union_map_intersect_range(umap, filter);
4373
22
4374
22
  after = after_in_child(umap, node);
4375
22
4376
22
  isl_union_map_free(umap);
4377
22
4378
22
  return after;
4379
22
}
4380
4381
/* Is any domain element of "umap" scheduled after any of
4382
 * the corresponding image elements by the tree rooted at
4383
 * the set node "node"?
4384
 *
4385
 * This is only the case if this condition holds in any
4386
 * of the (filter) children of the set node.
4387
 * In particular, if the domain and the range of "umap"
4388
 * are contained in different children, then the condition
4389
 * does not hold.
4390
 */
4391
static int after_in_set(__isl_keep isl_union_map *umap,
4392
  __isl_keep isl_schedule_node *node)
4393
8
{
4394
8
  int i, n;
4395
8
4396
8
  n = isl_schedule_node_n_children(node);
4397
30
  for (i = 0; 
i < n30
;
++i22
)
{22
4398
22
    isl_schedule_node *child;
4399
22
    int after;
4400
22
4401
22
    child = isl_schedule_node_get_child(node, i);
4402
22
    after = after_in_tree(umap, child);
4403
22
    isl_schedule_node_free(child);
4404
22
4405
22
    if (
after < 0 || 22
after22
)
4406
0
      return after;
4407
22
  }
4408
8
4409
8
  return 0;
4410
8
}
4411
4412
/* Return the filter of child "i" of "node".
4413
 */
4414
static __isl_give isl_union_set *child_filter(
4415
  __isl_keep isl_schedule_node *node, int i)
4416
26
{
4417
26
  isl_schedule_node *child;
4418
26
  isl_union_set *filter;
4419
26
4420
26
  child = isl_schedule_node_get_child(node, i);
4421
26
  filter = isl_schedule_node_filter_get_filter(child);
4422
26
  isl_schedule_node_free(child);
4423
26
4424
26
  return filter;
4425
26
}
4426
4427
/* Is any domain element of "umap" scheduled after any of
4428
 * the corresponding image elements by the tree rooted at
4429
 * the sequence node "node"?
4430
 *
4431
 * This happens in particular if any domain element is
4432
 * contained in a later child than one containing a range element or
4433
 * if the condition holds within a given child in the sequence.
4434
 * The later part of the condition is checked by after_in_set.
4435
 */
4436
static int after_in_sequence(__isl_keep isl_union_map *umap,
4437
  __isl_keep isl_schedule_node *node)
4438
12
{
4439
12
  int i, j, n;
4440
12
  isl_union_map *umap_i;
4441
12
  int empty, after = 0;
4442
12
4443
12
  n = isl_schedule_node_n_children(node);
4444
27
  for (i = 1; 
i < n27
;
++i15
)
{19
4445
19
    isl_union_set *filter_i;
4446
19
4447
19
    umap_i = isl_union_map_copy(umap);
4448
19
    filter_i = child_filter(node, i);
4449
19
    umap_i = isl_union_map_intersect_domain(umap_i, filter_i);
4450
19
    empty = isl_union_map_is_empty(umap_i);
4451
19
    if (empty < 0)
4452
0
      goto error;
4453
19
    
if (19
empty19
)
{13
4454
13
      isl_union_map_free(umap_i);
4455
13
      continue;
4456
13
    }
4457
19
4458
9
    
for (j = 0; 6
j < i9
;
++j3
)
{7
4459
7
      isl_union_set *filter_j;
4460
7
      isl_union_map *umap_ij;
4461
7
4462
7
      umap_ij = isl_union_map_copy(umap_i);
4463
7
      filter_j = child_filter(node, j);
4464
7
      umap_ij = isl_union_map_intersect_range(umap_ij,
4465
7
                filter_j);
4466
7
      empty = isl_union_map_is_empty(umap_ij);
4467
7
      isl_union_map_free(umap_ij);
4468
7
4469
7
      if (empty < 0)
4470
0
        goto error;
4471
7
      
if (7
!empty7
)
4472
4
        after = 1;
4473
7
      if (after)
4474
4
        break;
4475
7
    }
4476
6
4477
6
    isl_union_map_free(umap_i);
4478
6
    if (after)
4479
4
      break;
4480
6
  }
4481
12
4482
12
  
if (12
after < 0 || 12
after12
)
4483
4
    return after;
4484
12
4485
8
  return after_in_set(umap, node);
4486
0
error:
4487
0
  isl_union_map_free(umap_i);
4488
0
  return -1;
4489
12
}
4490
4491
/* Is any domain element of "umap" scheduled after any of
4492
 * the corresponding image elements by the tree rooted at "node"?
4493
 *
4494
 * If "umap" is empty, then clearly there is no such element.
4495
 * Otherwise, consider the different types of nodes separately.
4496
 */
4497
static int after_in_tree(__isl_keep isl_union_map *umap,
4498
  __isl_keep isl_schedule_node *node)
4499
56
{
4500
56
  int empty;
4501
56
  enum isl_schedule_node_type type;
4502
56
4503
56
  empty = isl_union_map_is_empty(umap);
4504
56
  if (empty < 0)
4505
0
    return -1;
4506
56
  
if (56
empty56
)
4507
22
    return 0;
4508
34
  
if (34
!node34
)
4509
0
    return -1;
4510
34
4511
34
  type = isl_schedule_node_get_type(node);
4512
34
  switch (type) {
4513
0
  case isl_schedule_node_error:
4514
0
    return -1;
4515
0
  case isl_schedule_node_leaf:
4516
0
    return 0;
4517
0
  case isl_schedule_node_band:
4518
0
    return after_in_band(umap, node);
4519
0
  case isl_schedule_node_domain:
4520
0
    isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
4521
0
      "unexpected internal domain node", return -1);
4522
0
  case isl_schedule_node_context:
4523
0
    return after_in_context(umap, node);
4524
0
  case isl_schedule_node_expansion:
4525
0
    return after_in_expansion(umap, node);
4526
0
  case isl_schedule_node_extension:
4527
0
    return after_in_extension(umap, node);
4528
22
  case isl_schedule_node_filter:
4529
22
    return after_in_filter(umap, node);
4530
0
  case isl_schedule_node_guard:
4531
0
  case isl_schedule_node_mark:
4532
0
    return after_in_child(umap, node);
4533
0
  case isl_schedule_node_set:
4534
0
    return after_in_set(umap, node);
4535
12
  case isl_schedule_node_sequence:
4536
12
    return after_in_sequence(umap, node);
4537
34
  }
4538
34
4539
0
  return 1;
4540
34
}
4541
4542
/* Is any domain element of "map1" scheduled after any domain
4543
 * element of "map2" by the subtree underneath the current band node,
4544
 * while at the same time being scheduled together by the current
4545
 * band node, i.e., by "map1" and "map2?
4546
 *
4547
 * If the child of the current band node is a leaf, then
4548
 * no element can be scheduled after any other element.
4549
 *
4550
 * Otherwise, we construct a relation between domain elements
4551
 * of "map1" and domain elements of "map2" that are scheduled
4552
 * together and then check if the subtree underneath the current
4553
 * band node determines their relative order.
4554
 */
4555
static int after_in_subtree(__isl_keep isl_ast_build *build,
4556
  __isl_keep isl_map *map1, __isl_keep isl_map *map2)
4557
12
{
4558
12
  isl_schedule_node *node;
4559
12
  isl_map *map;
4560
12
  isl_union_map *umap;
4561
12
  int after;
4562
12
4563
12
  node = isl_ast_build_get_schedule_node(build);
4564
12
  if (!node)
4565
0
    return -1;
4566
12
  node = isl_schedule_node_child(node, 0);
4567
12
  if (
isl_schedule_node_get_type(node) == isl_schedule_node_leaf12
)
{0
4568
0
    isl_schedule_node_free(node);
4569
0
    return 0;
4570
0
  }
4571
12
  map = isl_map_copy(map2);
4572
12
  map = isl_map_apply_domain(map, isl_map_copy(map1));
4573
12
  umap = isl_union_map_from_map(map);
4574
12
  after = after_in_tree(umap, node);
4575
12
  isl_union_map_free(umap);
4576
12
  isl_schedule_node_free(node);
4577
12
  return after;
4578
12
}
4579
4580
/* Internal data for any_scheduled_after.
4581
 *
4582
 * "build" is the build in which the AST is constructed.
4583
 * "depth" is the number of loops that have already been generated
4584
 * "group_coscheduled" is a local copy of options->ast_build_group_coscheduled
4585
 * "domain" is an array of set-map pairs corresponding to the different
4586
 * iteration domains.  The set is the schedule domain, i.e., the domain
4587
 * of the inverse schedule, while the map is the inverse schedule itself.
4588
 */
4589
struct isl_any_scheduled_after_data {
4590
  isl_ast_build *build;
4591
  int depth;
4592
  int group_coscheduled;
4593
  struct isl_set_map_pair *domain;
4594
};
4595
4596
/* Is any element of domain "i" scheduled after any element of domain "j"
4597
 * (for a common iteration of the first data->depth loops)?
4598
 *
4599
 * data->domain[i].set contains the domain of the inverse schedule
4600
 * for domain "i", i.e., elements in the schedule domain.
4601
 *
4602
 * If we are inside a band of a schedule tree and there is a pair
4603
 * of elements in the two domains that is schedule together by
4604
 * the current band, then we check if any element of "i" may be schedule
4605
 * after element of "j" by the descendants of the band node.
4606
 *
4607
 * If data->group_coscheduled is set, then we also return 1 if there
4608
 * is any pair of elements in the two domains that are scheduled together.
4609
 */
4610
static isl_bool any_scheduled_after(int i, int j, void *user)
4611
265
{
4612
265
  struct isl_any_scheduled_after_data *data = user;
4613
265
  int dim = isl_set_dim(data->domain[i].set, isl_dim_set);
4614
265
  int pos;
4615
265
4616
297
  for (pos = data->depth; 
pos < dim297
;
++pos32
)
{285
4617
285
    int follows;
4618
285
4619
285
    follows = isl_set_follows_at(data->domain[i].set,
4620
285
            data->domain[j].set, pos);
4621
285
4622
285
    if (follows < -1)
4623
0
      return isl_bool_error;
4624
285
    
if (285
follows > 0285
)
4625
206
      return isl_bool_true;
4626
79
    
if (79
follows < 079
)
4627
47
      return isl_bool_false;
4628
79
  }
4629
265
4630
12
  
if (12
isl_ast_build_has_schedule_node(data->build)12
)
{12
4631
12
    int after;
4632
12
4633
12
    after = after_in_subtree(data->build, data->domain[i].map,
4634
12
              data->domain[j].map);
4635
12
    if (
after < 0 || 12
after12
)
4636
4
      return after;
4637
12
  }
4638
12
4639
8
  return data->group_coscheduled;
4640
12
}
4641
4642
/* Look for independent components at the current depth and generate code
4643
 * for each component separately.  The resulting lists of grafts are
4644
 * merged in an attempt to combine grafts with identical guards.
4645
 *
4646
 * Code for two domains can be generated separately if all the elements
4647
 * of one domain are scheduled before (or together with) all the elements
4648
 * of the other domain.  We therefore consider the graph with as nodes
4649
 * the domains and an edge between two nodes if any element of the first
4650
 * node is scheduled after any element of the second node.
4651
 * If the ast_build_group_coscheduled is set, then we also add an edge if
4652
 * there is any pair of elements in the two domains that are scheduled
4653
 * together.
4654
 * Code is then generated (by generate_component)
4655
 * for each of the strongly connected components in this graph
4656
 * in their topological order.
4657
 *
4658
 * Since the test is performed on the domain of the inverse schedules of
4659
 * the different domains, we precompute these domains and store
4660
 * them in data.domain.
4661
 */
4662
static __isl_give isl_ast_graft_list *generate_components(
4663
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
4664
78
{
4665
78
  int i;
4666
78
  isl_ctx *ctx = isl_ast_build_get_ctx(build);
4667
78
  int n = isl_union_map_n_map(executed);
4668
78
  struct isl_any_scheduled_after_data data;
4669
78
  struct isl_set_map_pair *next;
4670
78
  struct isl_tarjan_graph *g = NULL;
4671
78
  isl_ast_graft_list *list = NULL;
4672
78
  int n_domain = 0;
4673
78
4674
78
  data.domain = isl_calloc_array(ctx, struct isl_set_map_pair, n);
4675
78
  if (!data.domain)
4676
0
    goto error;
4677
78
  n_domain = n;
4678
78
4679
78
  next = data.domain;
4680
78
  if (isl_union_map_foreach_map(executed, &extract_domain, &next) < 0)
4681
0
    goto error;
4682
78
4683
78
  
if (78
!build78
)
4684
0
    goto error;
4685
78
  data.build = build;
4686
78
  data.depth = isl_ast_build_get_depth(build);
4687
78
  data.group_coscheduled = isl_options_get_ast_build_group_coscheduled(ctx);
4688
78
  g = isl_tarjan_graph_init(ctx, n, &any_scheduled_after, &data);
4689
78
  if (!g)
4690
0
    goto error;
4691
78
4692
78
  list = isl_ast_graft_list_alloc(ctx, 0);
4693
78
4694
78
  i = 0;
4695
187
  while (
list && 187
n187
)
{109
4696
109
    isl_ast_graft_list *list_c;
4697
109
    int first = i;
4698
109
4699
109
    if (g->order[i] == -1)
4700
0
      isl_die(ctx, isl_error_internal, "cannot happen",
4701
109
        goto error);
4702
109
    ++i; --n;
4703
205
    while (
g->order[i] != -1205
)
{96
4704
96
      ++i; --n;
4705
96
    }
4706
109
4707
109
    list_c = generate_component(data.domain,
4708
109
              g->order + first, i - first,
4709
109
              isl_ast_build_copy(build));
4710
109
    list = isl_ast_graft_list_merge(list, list_c, build);
4711
109
4712
109
    ++i;
4713
109
  }
4714
78
4715
78
  
if (78
078
)
4716
0
error:    list = isl_ast_graft_list_free(list);
4717
78
  isl_tarjan_graph_free(g);
4718
283
  for (i = 0; 
i < n_domain283
;
++i205
)
{205
4719
205
    isl_map_free(data.domain[i].map);
4720
205
    isl_set_free(data.domain[i].set);
4721
205
  }
4722
78
  free(data.domain);
4723
78
  isl_union_map_free(executed);
4724
78
  isl_ast_build_free(build);
4725
78
4726
78
  return list;
4727
78
}
4728
4729
/* Generate code for the next level (and all inner levels).
4730
 *
4731
 * If "executed" is empty, i.e., no code needs to be generated,
4732
 * then we return an empty list.
4733
 *
4734
 * If we have already generated code for all loop levels, then we pass
4735
 * control to generate_inner_level.
4736
 *
4737
 * If "executed" lives in a single space, i.e., if code needs to be
4738
 * generated for a single domain, then there can only be a single
4739
 * component and we go directly to generate_shifted_component.
4740
 * Otherwise, we call generate_components to detect the components
4741
 * and to call generate_component on each of them separately.
4742
 */
4743
static __isl_give isl_ast_graft_list *generate_next_level(
4744
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
4745
2.73k
{
4746
2.73k
  int depth;
4747
2.73k
4748
2.73k
  if (
!build || 2.73k
!executed2.73k
)
4749
0
    goto error;
4750
2.73k
4751
2.73k
  
if (2.73k
isl_union_map_is_empty(executed)2.73k
)
{0
4752
0
    isl_ctx *ctx = isl_ast_build_get_ctx(build);
4753
0
    isl_union_map_free(executed);
4754
0
    isl_ast_build_free(build);
4755
0
    return isl_ast_graft_list_alloc(ctx, 0);
4756
0
  }
4757
2.73k
4758
2.73k
  depth = isl_ast_build_get_depth(build);
4759
2.73k
  if (depth >= isl_ast_build_dim(build, isl_dim_set))
4760
1.27k
    return generate_inner_level(executed, build);
4761
2.73k
4762
1.46k
  
if (1.46k
isl_union_map_n_map(executed) == 11.46k
)
4763
1.38k
    return generate_shifted_component(executed, build);
4764
1.46k
4765
78
  return generate_components(executed, build);
4766
0
error:
4767
0
  isl_union_map_free(executed);
4768
0
  isl_ast_build_free(build);
4769
0
  return NULL;
4770
1.46k
}
4771
4772
/* Internal data structure used by isl_ast_build_node_from_schedule_map.
4773
 * internal, executed and build are the inputs to generate_code.
4774
 * list collects the output.
4775
 */
4776
struct isl_generate_code_data {
4777
  int internal;
4778
  isl_union_map *executed;
4779
  isl_ast_build *build;
4780
4781
  isl_ast_graft_list *list;
4782
};
4783
4784
/* Given an inverse schedule in terms of the external build schedule, i.e.,
4785
 *
4786
 *  [E -> S] -> D
4787
 *
4788
 * with E the external build schedule and S the additional schedule "space",
4789
 * reformulate the inverse schedule in terms of the internal schedule domain,
4790
 * i.e., return
4791
 *
4792
 *  [I -> S] -> D
4793
 *
4794
 * We first obtain a mapping
4795
 *
4796
 *  I -> E
4797
 *
4798
 * take the inverse and the product with S -> S, resulting in
4799
 *
4800
 *  [I -> S] -> [E -> S]
4801
 *
4802
 * Applying the map to the input produces the desired result.
4803
 */
4804
static __isl_give isl_union_map *internal_executed(
4805
  __isl_take isl_union_map *executed, __isl_keep isl_space *space,
4806
  __isl_keep isl_ast_build *build)
4807
3
{
4808
3
  isl_map *id, *proj;
4809
3
4810
3
  proj = isl_ast_build_get_schedule_map(build);
4811
3
  proj = isl_map_reverse(proj);
4812
3
  space = isl_space_map_from_set(isl_space_copy(space));
4813
3
  id = isl_map_identity(space);
4814
3
  proj = isl_map_product(proj, id);
4815
3
  executed = isl_union_map_apply_domain(executed,
4816
3
            isl_union_map_from_map(proj));
4817
3
  return executed;
4818
3
}
4819
4820
/* Generate an AST that visits the elements in the range of data->executed
4821
 * in the relative order specified by the corresponding domain element(s)
4822
 * for those domain elements that belong to "set".
4823
 * Add the result to data->list.
4824
 *
4825
 * The caller ensures that "set" is a universe domain.
4826
 * "space" is the space of the additional part of the schedule.
4827
 * It is equal to the space of "set" if build->domain is parametric.
4828
 * Otherwise, it is equal to the range of the wrapped space of "set".
4829
 *
4830
 * If the build space is not parametric and
4831
 * if isl_ast_build_node_from_schedule_map
4832
 * was called from an outside user (data->internal not set), then
4833
 * the (inverse) schedule refers to the external build domain and needs to
4834
 * be transformed to refer to the internal build domain.
4835
 *
4836
 * If the build space is parametric, then we add some of the parameter
4837
 * constraints to the executed relation.  Adding these constraints
4838
 * allows for an earlier detection of conflicts in some cases.
4839
 * However, we do not want to divide the executed relation into
4840
 * more disjuncts than necessary.  We therefore approximate
4841
 * the constraints on the parameters by a single disjunct set.
4842
 *
4843
 * The build is extended to include the additional part of the schedule.
4844
 * If the original build space was not parametric, then the options
4845
 * in data->build refer only to the additional part of the schedule
4846
 * and they need to be adjusted to refer to the complete AST build
4847
 * domain.
4848
 *
4849
 * After having adjusted inverse schedule and build, we start generating
4850
 * code with the outer loop of the current code generation
4851
 * in generate_next_level.
4852
 *
4853
 * If the original build space was not parametric, we undo the embedding
4854
 * on the resulting isl_ast_node_list so that it can be used within
4855
 * the outer AST build.
4856
 */
4857
static isl_stat generate_code_in_space(struct isl_generate_code_data *data,
4858
  __isl_take isl_set *set, __isl_take isl_space *space)
4859
25
{
4860
25
  isl_union_map *executed;
4861
25
  isl_ast_build *build;
4862
25
  isl_ast_graft_list *list;
4863
25
  int embed;
4864
25
4865
25
  executed = isl_union_map_copy(data->executed);
4866
25
  executed = isl_union_map_intersect_domain(executed,
4867
25
             isl_union_set_from_set(set));
4868
25
4869
25
  embed = !isl_set_is_params(data->build->domain);
4870
25
  if (
embed && 25
!data->internal19
)
4871
3
    executed = internal_executed(executed, space, data->build);
4872
25
  if (
!embed25
)
{6
4873
6
    isl_set *domain;
4874
6
    domain = isl_ast_build_get_domain(data->build);
4875
6
    domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
4876
6
    executed = isl_union_map_intersect_params(executed, domain);
4877
6
  }
4878
25
4879
25
  build = isl_ast_build_copy(data->build);
4880
25
  build = isl_ast_build_product(build, space);
4881
25
4882
25
  list = generate_next_level(executed, build);
4883
25
4884
25
  list = isl_ast_graft_list_unembed(list, embed);
4885
25
4886
25
  data->list = isl_ast_graft_list_concat(data->list, list);
4887
25
4888
25
  return isl_stat_ok;
4889
25
}
4890
4891
/* Generate an AST that visits the elements in the range of data->executed
4892
 * in the relative order specified by the corresponding domain element(s)
4893
 * for those domain elements that belong to "set".
4894
 * Add the result to data->list.
4895
 *
4896
 * The caller ensures that "set" is a universe domain.
4897
 *
4898
 * If the build space S is not parametric, then the space of "set"
4899
 * need to be a wrapped relation with S as domain.  That is, it needs
4900
 * to be of the form
4901
 *
4902
 *  [S -> T]
4903
 *
4904
 * Check this property and pass control to generate_code_in_space
4905
 * passing along T.
4906
 * If the build space is not parametric, then T is the space of "set".
4907
 */
4908
static isl_stat generate_code_set(__isl_take isl_set *set, void *user)
4909
25
{
4910
25
  struct isl_generate_code_data *data = user;
4911
25
  isl_space *space, *build_space;
4912
25
  int is_domain;
4913
25
4914
25
  space = isl_set_get_space(set);
4915
25
4916
25
  if (isl_set_is_params(data->build->domain))
4917
6
    return generate_code_in_space(data, set, space);
4918
25
4919
19
  build_space = isl_ast_build_get_space(data->build, data->internal);
4920
19
  space = isl_space_unwrap(space);
4921
19
  is_domain = isl_space_is_domain(build_space, space);
4922
19
  isl_space_free(build_space);
4923
19
  space = isl_space_range(space);
4924
19
4925
19
  if (is_domain < 0)
4926
0
    goto error;
4927
19
  
if (19
!is_domain19
)
4928
0
    isl_die(isl_set_get_ctx(set), isl_error_invalid,
4929
19
      "invalid nested schedule space", goto error);
4930
19
4931
19
  return generate_code_in_space(data, set, space);
4932
0
error:
4933
0
  isl_set_free(set);
4934
0
  isl_space_free(space);
4935
0
  return isl_stat_error;
4936
19
}
4937
4938
/* Generate an AST that visits the elements in the range of "executed"
4939
 * in the relative order specified by the corresponding domain element(s).
4940
 *
4941
 * "build" is an isl_ast_build that has either been constructed by
4942
 * isl_ast_build_from_context or passed to a callback set by
4943
 * isl_ast_build_set_create_leaf.
4944
 * In the first case, the space of the isl_ast_build is typically
4945
 * a parametric space, although this is currently not enforced.
4946
 * In the second case, the space is never a parametric space.
4947
 * If the space S is not parametric, then the domain space(s) of "executed"
4948
 * need to be wrapped relations with S as domain.
4949
 *
4950
 * If the domain of "executed" consists of several spaces, then an AST
4951
 * is generated for each of them (in arbitrary order) and the results
4952
 * are concatenated.
4953
 *
4954
 * If "internal" is set, then the domain "S" above refers to the internal
4955
 * schedule domain representation.  Otherwise, it refers to the external
4956
 * representation, as returned by isl_ast_build_get_schedule_space.
4957
 *
4958
 * We essentially run over all the spaces in the domain of "executed"
4959
 * and call generate_code_set on each of them.
4960
 */
4961
static __isl_give isl_ast_graft_list *generate_code(
4962
  __isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
4963
  int internal)
4964
25
{
4965
25
  isl_ctx *ctx;
4966
25
  struct isl_generate_code_data data = { 0 };
4967
25
  isl_space *space;
4968
25
  isl_union_set *schedule_domain;
4969
25
  isl_union_map *universe;
4970
25
4971
25
  if (!build)
4972
0
    goto error;
4973
25
  space = isl_ast_build_get_space(build, 1);
4974
25
  space = isl_space_align_params(space,
4975
25
            isl_union_map_get_space(executed));
4976
25
  space = isl_space_align_params(space,
4977
25
            isl_union_map_get_space(build->options));
4978
25
  build = isl_ast_build_align_params(build, isl_space_copy(space));
4979
25
  executed = isl_union_map_align_params(executed, space);
4980
25
  if (
!executed || 25
!build25
)
4981
0
    goto error;
4982
25
4983
25
  ctx = isl_ast_build_get_ctx(build);
4984
25
4985
25
  data.internal = internal;
4986
25
  data.executed = executed;
4987
25
  data.build = build;
4988
25
  data.list = isl_ast_graft_list_alloc(ctx, 0);
4989
25
4990
25
  universe = isl_union_map_universe(isl_union_map_copy(executed));
4991
25
  schedule_domain = isl_union_map_domain(universe);
4992
25
  if (isl_union_set_foreach_set(schedule_domain, &generate_code_set,
4993
25
          &data) < 0)
4994
0
    data.list = isl_ast_graft_list_free(data.list);
4995
25
4996
25
  isl_union_set_free(schedule_domain);
4997
25
  isl_union_map_free(executed);
4998
25
4999
25
  isl_ast_build_free(build);
5000
25
  return data.list;
5001
0
error:
5002
0
  isl_union_map_free(executed);
5003
0
  isl_ast_build_free(build);
5004
0
  return NULL;
5005
25
}
5006
5007
/* Generate an AST that visits the elements in the domain of "schedule"
5008
 * in the relative order specified by the corresponding image element(s).
5009
 *
5010
 * "build" is an isl_ast_build that has either been constructed by
5011
 * isl_ast_build_from_context or passed to a callback set by
5012
 * isl_ast_build_set_create_leaf.
5013
 * In the first case, the space of the isl_ast_build is typically
5014
 * a parametric space, although this is currently not enforced.
5015
 * In the second case, the space is never a parametric space.
5016
 * If the space S is not parametric, then the range space(s) of "schedule"
5017
 * need to be wrapped relations with S as domain.
5018
 *
5019
 * If the range of "schedule" consists of several spaces, then an AST
5020
 * is generated for each of them (in arbitrary order) and the results
5021
 * are concatenated.
5022
 *
5023
 * We first initialize the local copies of the relevant options.
5024
 * We do this here rather than when the isl_ast_build is created
5025
 * because the options may have changed between the construction
5026
 * of the isl_ast_build and the call to isl_generate_code.
5027
 *
5028
 * The main computation is performed on an inverse schedule (with
5029
 * the schedule domain in the domain and the elements to be executed
5030
 * in the range) called "executed".
5031
 */
5032
__isl_give isl_ast_node *isl_ast_build_node_from_schedule_map(
5033
  __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule)
5034
9
{
5035
9
  isl_ast_graft_list *list;
5036
9
  isl_ast_node *node;
5037
9
  isl_union_map *executed;
5038
9
5039
9
  build = isl_ast_build_copy(build);
5040
9
  build = isl_ast_build_set_single_valued(build, 0);
5041
9
  schedule = isl_union_map_coalesce(schedule);
5042
9
  schedule = isl_union_map_remove_redundancies(schedule);
5043
9
  executed = isl_union_map_reverse(schedule);
5044
9
  list = generate_code(executed, isl_ast_build_copy(build), 0);
5045
9
  node = isl_