| /* |
| * Copyright 2013 Ecole Normale Superieure |
| * Copyright 2015 Sven Verdoolaege |
| * |
| * Use of this software is governed by the MIT license |
| * |
| * Written by Sven Verdoolaege, |
| * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France |
| */ |
| |
| #include <string.h> |
| |
| #include <isl/space.h> |
| #include <isl/constraint.h> |
| #include <isl/val.h> |
| #include <isl/aff.h> |
| #include <isl/set.h> |
| #include <isl/map.h> |
| #include <isl/union_set.h> |
| #include <isl/union_map.h> |
| |
| #include "hybrid.h" |
| #include "schedule.h" |
| |
| /* The hybrid tiling implemented in this file is based on |
| * Grosser et al., "Hybrid Hexagonal/Classical Tiling for GPUs". |
| */ |
| |
| /* Bounds on relative dependence distances in input to hybrid tiling. |
| * upper is an upper bound on the relative dependence distances |
| * in the first space dimension |
| * -lower is a lower bound on the relative dependence distances |
| * in all space dimensions. |
| * |
| * In particular, |
| * |
| * d_i >= -lower_i d_0 |
| * and |
| * d_1 <= upper d_0 |
| * |
| * for each dependence distance vector d, where d_1 is the component |
| * corresponding to the first space dimension. |
| * |
| * upper and lower are always non-negative. |
| * Some of the values may be NaN if no bound could be found. |
| */ |
| struct ppcg_ht_bounds { |
| isl_val *upper; |
| isl_multi_val *lower; |
| }; |
| |
| /* Free "bounds" along with all its fields. |
| */ |
| __isl_null ppcg_ht_bounds *ppcg_ht_bounds_free( |
| __isl_take ppcg_ht_bounds *bounds) |
| { |
| if (!bounds) |
| return NULL; |
| isl_val_free(bounds->upper); |
| isl_multi_val_free(bounds->lower); |
| free(bounds); |
| |
| return NULL; |
| } |
| |
| /* Create a ppcg_ht_bounds object for a band living in "space". |
| * The bounds are initialized to NaN. |
| */ |
| __isl_give ppcg_ht_bounds *ppcg_ht_bounds_alloc(__isl_take isl_space *space) |
| { |
| int i, n; |
| isl_ctx *ctx; |
| ppcg_ht_bounds *bounds; |
| |
| if (!space) |
| return NULL; |
| |
| ctx = isl_space_get_ctx(space); |
| bounds = isl_alloc_type(ctx, struct ppcg_ht_bounds); |
| if (!bounds) |
| goto error; |
| bounds->upper = isl_val_nan(ctx); |
| bounds->lower = isl_multi_val_zero(space); |
| n = isl_multi_val_dim(bounds->lower, isl_dim_set); |
| for (i = 0; i < n; ++i) { |
| isl_val *v = isl_val_copy(bounds->upper); |
| bounds->lower = isl_multi_val_set_val(bounds->lower, i, v); |
| } |
| |
| if (!bounds->lower || !bounds->upper) |
| return ppcg_ht_bounds_free(bounds); |
| |
| return bounds; |
| error: |
| isl_space_free(space); |
| return NULL; |
| } |
| |
| void ppcg_ht_bounds_dump(__isl_keep ppcg_ht_bounds *bounds) |
| { |
| if (!bounds) |
| return; |
| |
| fprintf(stderr, "lower: "); |
| isl_multi_val_dump(bounds->lower); |
| fprintf(stderr, "upper: "); |
| isl_val_dump(bounds->upper); |
| } |
| |
| /* Return the upper bound on the relative dependence distances |
| * in the first space dimension. |
| */ |
| __isl_give isl_val *ppcg_ht_bounds_get_upper(__isl_keep ppcg_ht_bounds *bounds) |
| { |
| if (!bounds) |
| return NULL; |
| return isl_val_copy(bounds->upper); |
| } |
| |
| /* Replace the upper bound on the relative dependence distances |
| * in the first space dimension by "upper". |
| */ |
| __isl_give ppcg_ht_bounds *ppcg_ht_bounds_set_upper( |
| __isl_take ppcg_ht_bounds *bounds, __isl_take isl_val *upper) |
| { |
| if (!bounds || !upper) |
| goto error; |
| isl_val_free(bounds->upper); |
| bounds->upper = upper; |
| return bounds; |
| error: |
| ppcg_ht_bounds_free(bounds); |
| isl_val_free(upper); |
| return NULL; |
| } |
| |
| /* Return the lower bound on the relative dependence distances |
| * in space dimension "pos". |
| */ |
| __isl_give isl_val *ppcg_ht_bounds_get_lower(__isl_keep ppcg_ht_bounds *bounds, |
| int pos) |
| { |
| if (!bounds) |
| return NULL; |
| return isl_multi_val_get_val(bounds->lower, pos); |
| } |
| |
| /* Replace the lower bound on the relative dependence distances |
| * in space dimension "pos" by "lower". |
| */ |
| __isl_give ppcg_ht_bounds *ppcg_ht_bounds_set_lower( |
| __isl_take ppcg_ht_bounds *bounds, int pos, __isl_take isl_val *lower) |
| { |
| if (!bounds || !lower) |
| goto error; |
| bounds->lower = isl_multi_val_set_val(bounds->lower, pos, lower); |
| if (!bounds->lower) |
| return ppcg_ht_bounds_free(bounds); |
| return bounds; |
| error: |
| ppcg_ht_bounds_free(bounds); |
| isl_val_free(lower); |
| return NULL; |
| } |
| |
| /* Can the bounds on relative dependence distances recorded in "bounds" |
| * be used to perform hybrid tiling? |
| * In particular, have appropriate lower and upper bounds been found? |
| * Any NaN indicates that no corresponding bound was found. |
| */ |
| isl_bool ppcg_ht_bounds_is_valid(__isl_keep ppcg_ht_bounds *bounds) |
| { |
| isl_bool is_nan; |
| int i, n; |
| |
| if (!bounds) |
| return isl_bool_error; |
| is_nan = isl_val_is_nan(bounds->upper); |
| if (is_nan < 0) |
| return isl_bool_error; |
| if (is_nan) |
| return isl_bool_false; |
| |
| n = isl_multi_val_dim(bounds->lower, isl_dim_set); |
| for (i = 0; i < n; ++i) { |
| isl_val *v; |
| |
| v = isl_multi_val_get_val(bounds->lower, i); |
| is_nan = isl_val_is_nan(v); |
| if (is_nan < 0) |
| return isl_bool_error; |
| if (is_nan) |
| return isl_bool_false; |
| isl_val_free(v); |
| } |
| |
| return isl_bool_true; |
| } |
| |
| /* Structure that represents the basic hexagonal tiling, |
| * along with information that is needed to perform the hybrid tiling. |
| * |
| * "bounds" are the bounds on the dependence distances that |
| * define the hexagonal shape and the required skewing in the remaining |
| * space dimensions. |
| * |
| * "input_node" points to the input pair of band nodes. |
| * "input_schedule" is the partial schedule of this input pair of band nodes. |
| * The space of this schedule is [P -> C], where P is the space |
| * of the parent node and C is the space of the child node. |
| * |
| * "space_sizes" represent the total size of a tile for the space |
| * dimensions, i.e., those corresponding to the child node. |
| * The space of "space_sizes" is C. |
| * If S_0 is the original tile size in the first space dimension, |
| * then the first entry of "space_sizes" is equal to |
| * W = 2*S_0 + floor(d_l h) + floor(d_u h). |
| * The remaining entries are the same as in the original tile sizes. |
| * |
| * The basic hexagonal tiling "hex" is defined |
| * in a "ts" (time-space) space and corresponds to the phase-1 tiles. |
| * "time_tile" maps the "ts" space to outer time tile. |
| * Is is equal to ts[t, s] -> floor(t/(2 * S_t)), with S_t the original tile |
| * size corresponding to the parent node. |
| * "local_time" maps the "ts" space to the time dimension inside each tile. |
| * It is equal to ts[t, s] -> t mod (2 S_t), with S_t the original tile |
| * size corresponding to the parent node. |
| * "shift_space" shifts the tiles at time tile T = floor(t/(2 S_t)) |
| * in the space dimension such that they align to a multiple of W. |
| * It is equal to ts[t, s] -> s + (-(2 * shift_s)*T) % W, |
| * with shift_s = S_0 + floor(d_u h). |
| * "shift_phase" is the shift taken to go from phase 0 to phase 1 |
| * It is equal to ts[t, s] -> ts[t + S_t, s + shift_s], |
| * with shift_s = S_0 + floor(d_u h). |
| * |
| * "project_ts" projects the space of the input schedule to the ts-space. |
| * It is equal to [P[t] -> C[s_0, ...]] -> ts[t, s_0]. |
| */ |
| struct ppcg_ht_tiling { |
| int ref; |
| |
| ppcg_ht_bounds *bounds; |
| isl_schedule_node *input_node; |
| isl_multi_union_pw_aff *input_schedule; |
| |
| isl_multi_val *space_sizes; |
| |
| isl_aff *time_tile; |
| isl_aff *local_time; |
| isl_aff *shift_space; |
| isl_multi_aff *shift_phase; |
| isl_set *hex; |
| |
| isl_multi_aff *project_ts; |
| }; |
| typedef struct ppcg_ht_tiling ppcg_ht_tiling; |
| |
| /* Return the space of the pair of band nodes that form the input |
| * to the hybrid tiling. |
| * In particular, return the space [P -> C], where P is the space |
| * of the parent node and C is the space of the child node. |
| */ |
| __isl_give isl_space *ppcg_ht_tiling_get_input_space( |
| __isl_keep ppcg_ht_tiling *tile) |
| { |
| if (!tile) |
| return NULL; |
| |
| return isl_multi_union_pw_aff_get_space(tile->input_schedule); |
| } |
| |
| /* Remove a reference to "tile" and free "tile" along with all its fields |
| * as soon as the reference count drops to zero. |
| */ |
| static __isl_null ppcg_ht_tiling *ppcg_ht_tiling_free( |
| __isl_take ppcg_ht_tiling *tiling) |
| { |
| if (!tiling) |
| return NULL; |
| if (--tiling->ref > 0) |
| return NULL; |
| |
| ppcg_ht_bounds_free(tiling->bounds); |
| isl_schedule_node_free(tiling->input_node); |
| isl_multi_union_pw_aff_free(tiling->input_schedule); |
| isl_multi_val_free(tiling->space_sizes); |
| isl_aff_free(tiling->time_tile); |
| isl_aff_free(tiling->local_time); |
| isl_aff_free(tiling->shift_space); |
| isl_multi_aff_free(tiling->shift_phase); |
| isl_set_free(tiling->hex); |
| isl_multi_aff_free(tiling->project_ts); |
| free(tiling); |
| |
| return NULL; |
| } |
| |
| /* Return a new reference to "tiling". |
| */ |
| __isl_give ppcg_ht_tiling *ppcg_ht_tiling_copy( |
| __isl_keep ppcg_ht_tiling *tiling) |
| { |
| if (!tiling) |
| return NULL; |
| |
| tiling->ref++; |
| return tiling; |
| } |
| |
| /* Return the isl_ctx to which "tiling" belongs. |
| */ |
| isl_ctx *ppcg_ht_tiling_get_ctx(__isl_keep ppcg_ht_tiling *tiling) |
| { |
| if (!tiling) |
| return NULL; |
| |
| return isl_multi_union_pw_aff_get_ctx(tiling->input_schedule); |
| } |
| |
| /* Representation of one of the two phases of hybrid tiling. |
| * |
| * "tiling" points to the shared tiling data. |
| * |
| * "time_tile", "local_time" and "shift_space" are equal to the corresponding |
| * fields of "tiling", pulled back to the input space. |
| * In case of phase 0, these expressions have also been moved |
| * from phase 1 to phase 0. |
| * |
| * "domain" contains the hexagonal tiling of this phase. |
| * |
| * "space_shift" is the shift that should be added to the space band |
| * in order to be able to apply rectangular tiling to the space. |
| * For phase 1, it is equal to |
| * |
| * [P[t] -> C[s_0, s_i]] -> C[(-(2 * shift_s)*T) % W, dl_i * u] |
| * |
| * with shift_s = S_0 + floor(d_u h), |
| * T equal to "time_tile" and u equal to "local_time". |
| * For phase 0, it is equal to |
| * |
| * [P[t] -> C[s_0, s_i]] -> C[shift_s + (-(2 * shift_s)*T) % W, dl_i * u] |
| * |
| * "space_tile" is the space tiling. It is equal to |
| * |
| * [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size] |
| */ |
| struct ppcg_ht_phase { |
| ppcg_ht_tiling *tiling; |
| |
| isl_aff *time_tile; |
| isl_aff *local_time; |
| isl_aff *shift_space; |
| isl_set *domain; |
| |
| isl_multi_aff *space_shift; |
| isl_multi_aff *space_tile; |
| }; |
| |
| /* Free "phase" along with all its fields. |
| */ |
| static __isl_null ppcg_ht_phase *ppcg_ht_phase_free( |
| __isl_take ppcg_ht_phase *phase) |
| { |
| if (!phase) |
| return NULL; |
| |
| ppcg_ht_tiling_free(phase->tiling); |
| isl_aff_free(phase->time_tile); |
| isl_aff_free(phase->local_time); |
| isl_aff_free(phase->shift_space); |
| isl_set_free(phase->domain); |
| isl_multi_aff_free(phase->space_shift); |
| isl_multi_aff_free(phase->space_tile); |
| free(phase); |
| |
| return NULL; |
| } |
| |
| /* Wrapper around ppcg_ht_phase_free for use as an argument |
| * to isl_id_set_free_user. |
| */ |
| static void ppcg_ht_phase_free_wrap(void *user) |
| { |
| ppcg_ht_phase *phase = user; |
| |
| ppcg_ht_phase_free(phase); |
| } |
| |
| /* Return the domain of hybrid tiling phase "phase". |
| */ |
| static __isl_give isl_set *ppcg_ht_phase_get_domain(ppcg_ht_phase *phase) |
| { |
| if (!phase) |
| return NULL; |
| |
| return isl_set_copy(phase->domain); |
| } |
| |
| /* Return the space of the pair of band nodes that form the input |
| * to the hybrid tiling of which "phase" is a phase. |
| * In particular, return the space [P -> C], where P is the space |
| * of the parent node and C is the space of the child node. |
| */ |
| static __isl_give isl_space *ppcg_ht_phase_get_input_space( |
| __isl_keep ppcg_ht_phase *phase) |
| { |
| if (!phase) |
| return NULL; |
| |
| return ppcg_ht_tiling_get_input_space(phase->tiling); |
| } |
| |
| /* Construct the lower left constraint of the hexagonal tile, i.e., |
| * |
| * du a - b <= (2h+1) du - duh |
| * -du a + b + (2h+1) du - duh >= 0 |
| * |
| * where duh = floor(du * h). |
| * |
| * This constraint corresponds to (6) in |
| * "Hybrid Hexagonal/Classical Tiling for GPUs". |
| */ |
| static __isl_give isl_constraint *hex_lower_left(__isl_take isl_local_space *ls, |
| __isl_keep isl_val *h, __isl_keep isl_val *du, __isl_keep isl_val *duh) |
| { |
| isl_val *v; |
| isl_aff *aff; |
| |
| v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1); |
| v = isl_val_mul(v, isl_val_copy(du)); |
| v = isl_val_sub(v, isl_val_copy(duh)); |
| aff = isl_aff_val_on_domain(ls, v); |
| v = isl_val_neg(isl_val_copy(du)); |
| aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, v); |
| aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, 1); |
| |
| return isl_inequality_from_aff(aff); |
| } |
| |
| /* Construct the lower constraint of the hexagonal tile, i.e., |
| * |
| * a <= 2h+1 |
| * -a + 2h+1 >= 0 |
| * |
| * This constraint corresponds to (7) in |
| * "Hybrid Hexagonal/Classical Tiling for GPUs". |
| */ |
| static __isl_give isl_constraint *hex_lower(__isl_take isl_local_space *ls, |
| __isl_keep isl_val *h) |
| { |
| isl_val *v; |
| isl_aff *aff; |
| |
| v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1); |
| aff = isl_aff_val_on_domain(ls, v); |
| aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 0, -1); |
| |
| return isl_inequality_from_aff(aff); |
| } |
| |
| /* Construct the lower right constraint of the hexagonal tile, i.e., |
| * |
| * dl a + b <= (2h+1) dl + duh + (s0-1) |
| * -dl a - b + (2h+1) dl + duh + (s0-1) >= 0 |
| * |
| * where duh = floor(du * h). |
| * |
| * This constraint corresponds to (8) in |
| * "Hybrid Hexagonal/Classical Tiling for GPUs". |
| */ |
| static __isl_give isl_constraint *hex_lower_right( |
| __isl_take isl_local_space *ls, __isl_keep isl_val *h, |
| __isl_keep isl_val *s0, __isl_keep isl_val *dl, __isl_keep isl_val *duh) |
| { |
| isl_val *v; |
| isl_aff *aff; |
| |
| v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1); |
| v = isl_val_mul(v, isl_val_copy(dl)); |
| v = isl_val_add(v, isl_val_copy(duh)); |
| v = isl_val_add(v, isl_val_copy(s0)); |
| v = isl_val_sub_ui(v, 1); |
| aff = isl_aff_val_on_domain(ls, v); |
| v = isl_val_neg(isl_val_copy(dl)); |
| aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, v); |
| aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, -1); |
| |
| return isl_inequality_from_aff(aff); |
| } |
| |
| /* Construct the upper left constraint of the hexagonal tile, i.e., |
| * |
| * dl a + b >= h dl - (d - 1)/d with d = den(dl) |
| * dl a + b - h dl + (d - 1)/d >= 0 |
| * |
| * This constraint corresponds to (10) in |
| * "Hybrid Hexagonal/Classical Tiling for GPUs". |
| */ |
| static __isl_give isl_constraint *hex_upper_left(__isl_take isl_local_space *ls, |
| __isl_keep isl_val *h, __isl_keep isl_val *dl) |
| { |
| isl_val *v, *d; |
| isl_aff *aff; |
| |
| d = isl_val_get_den_val(dl); |
| v = isl_val_sub_ui(isl_val_copy(d), 1); |
| v = isl_val_div(v, d); |
| v = isl_val_sub(v, isl_val_mul(isl_val_copy(h), isl_val_copy(dl))); |
| aff = isl_aff_val_on_domain(ls, v); |
| aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, isl_val_copy(dl)); |
| aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, 1); |
| |
| return isl_inequality_from_aff(aff); |
| } |
| |
| /* Construct the upper right constraint of the hexagonal tile, i.e., |
| * |
| * du a - b >= du h - duh - (s0-1) - dlh - (d - 1)/d with d = den(du) |
| * du a - b - du h + duh + (s0-1) + dlh + (d - 1)/d >= 0 |
| * |
| * where dlh = floor(dl * h) and duh = floor(du * h). |
| * |
| * This constraint corresponds to (12) in |
| * "Hybrid Hexagonal/Classical Tiling for GPUs". |
| */ |
| static __isl_give isl_constraint *hex_upper_right( |
| __isl_take isl_local_space *ls, __isl_keep isl_val *h, |
| __isl_keep isl_val *s0, __isl_keep isl_val *du, |
| __isl_keep isl_val *dlh, __isl_keep isl_val *duh) |
| { |
| isl_val *v, *d; |
| isl_aff *aff; |
| |
| d = isl_val_get_den_val(du); |
| v = isl_val_sub_ui(isl_val_copy(d), 1); |
| v = isl_val_div(v, d); |
| v = isl_val_sub(v, isl_val_mul(isl_val_copy(h), isl_val_copy(du))); |
| v = isl_val_add(v, isl_val_copy(duh)); |
| v = isl_val_add(v, isl_val_copy(dlh)); |
| v = isl_val_add(v, isl_val_copy(s0)); |
| v = isl_val_sub_ui(v, 1); |
| aff = isl_aff_val_on_domain(ls, v); |
| aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, isl_val_copy(du)); |
| aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, -1); |
| |
| return isl_inequality_from_aff(aff); |
| } |
| |
| /* Construct the uppper constraint of the hexagonal tile, i.e., |
| * |
| * a >= 0 |
| * |
| * This constraint corresponds to (13) in |
| * "Hybrid Hexagonal/Classical Tiling for GPUs". |
| */ |
| static __isl_give isl_constraint *hex_upper(__isl_take isl_local_space *ls) |
| { |
| isl_aff *aff; |
| |
| aff = isl_aff_var_on_domain(ls, isl_dim_set, 0); |
| |
| return isl_inequality_from_aff(aff); |
| } |
| |
| /* Construct the basic hexagonal tile shape. |
| * "space" is the 2D space in which the hexagon should be constructed. |
| * h is st-1, with st the tile size in the time dimension |
| * s0 is the tile size in the space dimension |
| * dl is a bound on the negative relative dependence distances, i.e., |
| * |
| * d_s >= -dl d_t |
| * |
| * du is a bound on the positive relative dependence distances, i.e., |
| * |
| * d_s <= du d_t |
| * |
| * with (d_t,d_s) any dependence distance vector. |
| * dlh = floor(dl * h) |
| * duh = floor(du * h) |
| * |
| * The shape of the hexagon is as follows: |
| * |
| * 0 dlh dlh+s0-1 |
| * ______ __ |
| * 0 / \_ / |
| * / \_ / |
| * h / \ ______ / |
| * h+1 \_ // \\_ |
| * \_ // \\_ |
| * 2h+1 \______// \\ |
| * 0 duh duh+s0-1 |
| * duh+s0-1+dlh |
| * duh+s0-1+dlh+1+s0+1 |
| * |
| * The next hexagon is shifted by duh + dlh + 2 * s0. |
| * |
| * The slope of the "/" constraints is dl. |
| * The slope of the "\_" constraints is du. |
| */ |
| static __isl_give isl_set *compute_hexagon(__isl_take isl_space *space, |
| __isl_keep isl_val *h, __isl_keep isl_val *s0, |
| __isl_keep isl_val *dl, __isl_keep isl_val *du, |
| __isl_keep isl_val *dlh, __isl_keep isl_val *duh) |
| { |
| isl_local_space *ls; |
| isl_constraint *c; |
| isl_basic_set *bset; |
| |
| ls = isl_local_space_from_space(space); |
| |
| c = hex_lower_left(isl_local_space_copy(ls), h, du, duh); |
| bset = isl_basic_set_from_constraint(c); |
| |
| c = hex_lower(isl_local_space_copy(ls), h); |
| bset = isl_basic_set_add_constraint(bset, c); |
| |
| c = hex_lower_right(isl_local_space_copy(ls), h, s0, dl, duh); |
| bset = isl_basic_set_add_constraint(bset, c); |
| |
| c = hex_upper_left(isl_local_space_copy(ls), h, dl); |
| bset = isl_basic_set_add_constraint(bset, c); |
| |
| c = hex_upper_right(isl_local_space_copy(ls), h, s0, du, dlh, duh); |
| bset = isl_basic_set_add_constraint(bset, c); |
| |
| c = hex_upper(ls); |
| bset = isl_basic_set_add_constraint(bset, c); |
| |
| return isl_set_from_basic_set(bset); |
| } |
| |
| /* Name of the ts-space. |
| */ |
| static const char *ts_space_name = "ts"; |
| |
| /* Construct and return the space ts[t, s]. |
| */ |
| static __isl_give isl_space *construct_ts_space(isl_ctx *ctx) |
| { |
| isl_space *s; |
| |
| s = isl_space_set_alloc(ctx, 0, 2); |
| s = isl_space_set_tuple_name(s, isl_dim_set, ts_space_name); |
| |
| return s; |
| } |
| |
| /* Name of the local ts-space. |
| */ |
| static const char *local_ts_space_name = "local_ts"; |
| |
| /* Construct and return the space local_ts[t, s]. |
| */ |
| static __isl_give isl_space *construct_local_ts_space(isl_ctx *ctx) |
| { |
| isl_space *s; |
| |
| s = isl_space_set_alloc(ctx, 0, 2); |
| s = isl_space_set_tuple_name(s, isl_dim_set, local_ts_space_name); |
| |
| return s; |
| } |
| |
| /* Compute the total size of a tile for the space dimensions, |
| * i.e., those corresponding to the child node |
| * of the input pattern. |
| * If S_0 is the original tile size in the first space dimension, |
| * then the first entry of "space_sizes" is equal to |
| * W = 2*S_0 + floor(d_l h) + floor(d_u h). |
| * The remaining entries are the same as in the original tile sizes. |
| * "tile_sizes" contains the original tile sizes, including |
| * the tile size corresponding to the parent node. |
| * "dlh" is equal to floor(d_l h). |
| * "duh" is equal to floor(d_u h). |
| */ |
| static __isl_give isl_multi_val *compute_space_sizes( |
| __isl_keep isl_multi_val *tile_sizes, |
| __isl_keep isl_val *dlh, __isl_keep isl_val *duh) |
| { |
| isl_val *size; |
| isl_multi_val *space_sizes; |
| |
| space_sizes = isl_multi_val_copy(tile_sizes); |
| space_sizes = isl_multi_val_factor_range(space_sizes); |
| size = isl_multi_val_get_val(space_sizes, 0); |
| size = isl_val_mul_ui(size, 2); |
| size = isl_val_add(size, isl_val_copy(duh)); |
| size = isl_val_add(size, isl_val_copy(dlh)); |
| space_sizes = isl_multi_val_set_val(space_sizes, 0, size); |
| |
| return space_sizes; |
| } |
| |
| /* Compute the offset of phase 1 with respect to phase 0 |
| * in the ts-space ("space"). |
| * In particular, return |
| * |
| * ts[st, s0 + duh] |
| */ |
| static __isl_give isl_multi_val *compute_phase_shift( |
| __isl_keep isl_space *space, __isl_keep isl_val *st, |
| __isl_keep isl_val *s0, __isl_keep isl_val *duh) |
| { |
| isl_val *v; |
| isl_multi_val *phase_shift; |
| |
| phase_shift = isl_multi_val_zero(isl_space_copy(space)); |
| phase_shift = isl_multi_val_set_val(phase_shift, 0, isl_val_copy(st)); |
| v = isl_val_add(isl_val_copy(duh), isl_val_copy(s0)); |
| phase_shift = isl_multi_val_set_val(phase_shift, 1, v); |
| |
| return phase_shift; |
| } |
| |
| /* Return the function |
| * |
| * ts[t, s] -> floor(t/(2 * st)) |
| * |
| * representing the time tile. |
| * "space" is the space ts[t, s]. |
| */ |
| static __isl_give isl_aff *compute_time_tile(__isl_keep isl_space *space, |
| __isl_keep isl_val *st) |
| { |
| isl_val *v; |
| isl_aff *t; |
| isl_local_space *ls; |
| |
| ls = isl_local_space_from_space(isl_space_copy(space)); |
| t = isl_aff_var_on_domain(ls, isl_dim_set, 0); |
| v = isl_val_mul_ui(isl_val_copy(st), 2); |
| t = isl_aff_floor(isl_aff_scale_down_val(t, v)); |
| |
| return t; |
| } |
| |
| /* Compute a shift in the space dimension for tiles |
| * at time tile T = floor(t/(2 * S_t)) |
| * such that they align to a multiple of the total space tile dimension W. |
| * In particular, compute |
| * |
| * ts[t, s] -> s + (-(2 * shift_s)*T) % W |
| * |
| * where shift_s is the shift of phase 1 with respect to phase 0 |
| * in the space dimension (the first element of "phase_shift"). |
| * W is stored in the first element of "space_sizes". |
| * "time_tile" is the function |
| * |
| * ts[t, s] -> floor(t/(2 * S_T)) |
| * |
| * Since phase 1 is shifted by shift_s with respect to phase 0, |
| * the next line of phase 0 (at T+1) is shifted by 2*shift_s |
| * with respect to the previous line (at T). |
| * A shift of -(2 * shift_s)*T therefore allows the basic pattern |
| * (which starts at 0) to be applied. |
| * However, this shift will be used to obtain the tile coordinate |
| * in the first space dimension and if the original values |
| * in the space dimension are non-negative, then the shift should |
| * not make them negative. Moreover, the shift should be as minimal |
| * as possible. |
| * Since the pattern repeats itself with a period of W in the space |
| * dimension, the shift can be replaced by (-(2 * shift_s)*T) % W. |
| */ |
| static __isl_give isl_aff *compute_shift_space(__isl_keep isl_aff *time_tile, |
| __isl_keep isl_multi_val *space_sizes, |
| __isl_keep isl_multi_val *phase_shift) |
| { |
| isl_val *v; |
| isl_aff *s, *t; |
| isl_local_space *ls; |
| |
| ls = isl_local_space_from_space(isl_aff_get_domain_space(time_tile)); |
| t = isl_aff_copy(time_tile); |
| v = isl_val_mul_ui(isl_multi_val_get_val(phase_shift, 1), 2); |
| v = isl_val_neg(v); |
| t = isl_aff_scale_val(t, v); |
| v = isl_multi_val_get_val(space_sizes, 0); |
| t = isl_aff_mod_val(t, v); |
| s = isl_aff_var_on_domain(ls, isl_dim_set, 1); |
| s = isl_aff_add(s, t); |
| |
| return s; |
| } |
| |
| /* Give the phase_shift ts[S_t, S_0 + floor(d_u h)], |
| * compute a function that applies the shift, i.e., |
| * |
| * ts[t, s] -> ts[t + S_t, s + S_0 + floor(d_u h)], |
| */ |
| static __isl_give isl_multi_aff *compute_shift_phase( |
| __isl_keep isl_multi_val *phase_shift) |
| { |
| isl_space *space; |
| isl_multi_aff *shift; |
| |
| space = isl_multi_val_get_space(phase_shift); |
| shift = isl_multi_aff_multi_val_on_space(space, |
| isl_multi_val_copy(phase_shift)); |
| space = isl_multi_aff_get_space(shift); |
| shift = isl_multi_aff_add(shift, isl_multi_aff_identity(space)); |
| |
| return shift; |
| } |
| |
| /* Compute a mapping from the ts-space to the local coordinates |
| * within each tile. In particular, compute |
| * |
| * ts[t, s] -> local_ts[t % (2 S_t), (s + (-(2 * shift_s)*T) % W) % W] |
| * |
| * "ts" is the space ts[t, s] |
| * "local_ts" is the space local_ts[t, s] |
| * "shift_space" is equal to ts[t, s] -> s + (-(2 * shift_s)*T) % W |
| * "st" is the tile size in the time dimension S_t. |
| * The first element of "space_sizes" is equal to W. |
| */ |
| static __isl_give isl_multi_aff *compute_localize( |
| __isl_keep isl_space *local_ts, __isl_keep isl_aff *shift_space, |
| __isl_keep isl_val *st, __isl_keep isl_multi_val *space_sizes) |
| { |
| isl_val *v; |
| isl_space *space; |
| isl_aff *s, *t; |
| isl_multi_aff *localize; |
| |
| space = isl_aff_get_domain_space(shift_space); |
| local_ts = isl_space_copy(local_ts); |
| space = isl_space_map_from_domain_and_range(space, local_ts); |
| localize = isl_multi_aff_identity(space); |
| t = isl_multi_aff_get_aff(localize, 0); |
| v = isl_val_mul_ui(isl_val_copy(st), 2); |
| t = isl_aff_mod_val(t, v); |
| localize = isl_multi_aff_set_aff(localize, 0, t); |
| s = isl_aff_copy(shift_space); |
| v = isl_multi_val_get_val(space_sizes, 0); |
| s = isl_aff_mod_val(s, v); |
| localize = isl_multi_aff_set_aff(localize, 1, s); |
| |
| return localize; |
| } |
| |
| /* Set the project_ts field of "tiling". |
| * |
| * This field projects the space of the input schedule to the ts-space. |
| * It is equal to [P[t] -> C[s_0, ...]] -> ts[t, s_0]. |
| */ |
| static __isl_give ppcg_ht_tiling *ppcg_ht_tiling_set_project_ts( |
| __isl_take ppcg_ht_tiling *tiling) |
| { |
| int n; |
| isl_space *space; |
| isl_multi_aff *project; |
| |
| if (!tiling) |
| return NULL; |
| |
| space = ppcg_ht_tiling_get_input_space(tiling); |
| n = isl_space_dim(space, isl_dim_set); |
| project = isl_multi_aff_project_out_map(space, isl_dim_set, 2, n - 2); |
| project = isl_multi_aff_set_tuple_name(project, |
| isl_dim_out, ts_space_name); |
| if (!project) |
| return ppcg_ht_tiling_free(tiling); |
| |
| tiling->project_ts = project; |
| |
| return tiling; |
| } |
| |
| /* Construct a hybrid tiling description from bounds on the dependence |
| * distances "bounds". |
| * "input_node" points to the original parent node. |
| * "input_schedule" is the combined schedule of the parent and child |
| * node in the input. |
| * "tile_sizes" are the original, user specified tile sizes. |
| */ |
| static __isl_give ppcg_ht_tiling *ppcg_ht_bounds_construct_tiling( |
| __isl_take ppcg_ht_bounds *bounds, |
| __isl_keep isl_schedule_node *input_node, |
| __isl_keep isl_multi_union_pw_aff *input_schedule, |
| __isl_keep isl_multi_val *tile_sizes) |
| { |
| isl_ctx *ctx; |
| ppcg_ht_tiling *tiling; |
| isl_multi_val *space_sizes, *phase_shift; |
| isl_aff *time_tile, *shift_space; |
| isl_multi_aff *localize; |
| isl_val *h, *duh, *dlh; |
| isl_val *st, *s0, *du, *dl; |
| isl_space *ts, *local_ts; |
| |
| if (!bounds || !input_node || !input_schedule || !tile_sizes) |
| goto error; |
| |
| ctx = isl_multi_union_pw_aff_get_ctx(input_schedule); |
| tiling = isl_calloc_type(ctx, struct ppcg_ht_tiling); |
| if (!tiling) |
| goto error; |
| tiling->ref = 1; |
| |
| st = isl_multi_val_get_val(tile_sizes, 0); |
| h = isl_val_sub_ui(isl_val_copy(st), 1); |
| s0 = isl_multi_val_get_val(tile_sizes, 1); |
| du = ppcg_ht_bounds_get_upper(bounds); |
| dl = ppcg_ht_bounds_get_lower(bounds, 0); |
| |
| duh = isl_val_floor(isl_val_mul(isl_val_copy(du), isl_val_copy(h))); |
| dlh = isl_val_floor(isl_val_mul(isl_val_copy(dl), isl_val_copy(h))); |
| |
| ts = construct_ts_space(ctx); |
| local_ts = construct_local_ts_space(ctx); |
| |
| space_sizes = compute_space_sizes(tile_sizes, dlh, duh); |
| phase_shift = compute_phase_shift(ts, st, s0, duh); |
| time_tile = compute_time_tile(ts, st); |
| shift_space = compute_shift_space(time_tile, space_sizes, phase_shift); |
| localize = compute_localize(local_ts, shift_space, st, space_sizes); |
| isl_space_free(ts); |
| |
| tiling->input_node = isl_schedule_node_copy(input_node); |
| tiling->input_schedule = isl_multi_union_pw_aff_copy(input_schedule); |
| tiling->space_sizes = space_sizes; |
| tiling->bounds = bounds; |
| tiling->local_time = isl_multi_aff_get_aff(localize, 0); |
| tiling->hex = compute_hexagon(local_ts, h, s0, dl, du, dlh, duh); |
| tiling->hex = isl_set_preimage_multi_aff(tiling->hex, localize); |
| tiling->time_tile = time_tile; |
| tiling->shift_space = shift_space; |
| tiling->shift_phase = compute_shift_phase(phase_shift); |
| isl_multi_val_free(phase_shift); |
| |
| isl_val_free(duh); |
| isl_val_free(dlh); |
| isl_val_free(du); |
| isl_val_free(dl); |
| isl_val_free(s0); |
| isl_val_free(st); |
| isl_val_free(h); |
| |
| if (!tiling->input_schedule || !tiling->local_time || !tiling->hex || |
| !tiling->shift_space || !tiling->shift_phase) |
| return ppcg_ht_tiling_free(tiling); |
| |
| tiling = ppcg_ht_tiling_set_project_ts(tiling); |
| |
| return tiling; |
| error: |
| ppcg_ht_bounds_free(bounds); |
| return NULL; |
| } |
| |
| /* Are all members of the band node "node" coincident? |
| */ |
| static isl_bool all_coincident(__isl_keep isl_schedule_node *node) |
| { |
| int i, n; |
| |
| n = isl_schedule_node_band_n_member(node); |
| for (i = 0; i < n; ++i) { |
| isl_bool c; |
| |
| c = isl_schedule_node_band_member_get_coincident(node, i); |
| if (c < 0 || !c) |
| return c; |
| } |
| |
| return isl_bool_true; |
| } |
| |
| /* Does "node" satisfy the properties of the inner node in the input |
| * pattern for hybrid tiling? |
| * That is, is it a band node with only coincident members, of which |
| * there is at least one? |
| */ |
| static isl_bool has_child_properties(__isl_keep isl_schedule_node *node) |
| { |
| if (!node) |
| return isl_bool_error; |
| if (isl_schedule_node_get_type(node) != isl_schedule_node_band) |
| return isl_bool_false; |
| if (isl_schedule_node_band_n_member(node) < 1) |
| return isl_bool_false; |
| return all_coincident(node); |
| } |
| |
| /* Does "node" satisfy the properties of the outer node in the input |
| * pattern for hybrid tiling? |
| * That is, is it a band node with a single member? |
| */ |
| static isl_bool has_parent_properties(__isl_keep isl_schedule_node *node) |
| { |
| if (!node) |
| return isl_bool_error; |
| if (isl_schedule_node_get_type(node) != isl_schedule_node_band) |
| return isl_bool_false; |
| if (isl_schedule_node_band_n_member(node) != 1) |
| return isl_bool_false; |
| return isl_bool_true; |
| } |
| |
| /* Does the parent of "node" satisfy the input patttern for hybrid tiling? |
| * That is, does "node" satisfy the properties of the inner node and |
| * does the parent of "node" satisfy the properties of the outer node? |
| */ |
| isl_bool ppcg_ht_parent_has_input_pattern(__isl_keep isl_schedule_node *node) |
| { |
| isl_bool has_pattern; |
| |
| has_pattern = has_child_properties(node); |
| if (has_pattern < 0 || !has_pattern) |
| return has_pattern; |
| |
| node = isl_schedule_node_copy(node); |
| node = isl_schedule_node_parent(node); |
| has_pattern = has_parent_properties(node); |
| isl_schedule_node_free(node); |
| |
| return has_pattern; |
| } |
| |
| /* Does "node" satisfy the input patttern for hybrid tiling? |
| * That is, does "node" satisfy the properties of the outer node and |
| * does the child of "node" satisfy the properties of the inner node? |
| */ |
| isl_bool ppcg_ht_has_input_pattern(__isl_keep isl_schedule_node *node) |
| { |
| isl_bool has_pattern; |
| |
| has_pattern = has_parent_properties(node); |
| if (has_pattern < 0 || !has_pattern) |
| return has_pattern; |
| |
| node = isl_schedule_node_get_child(node, 0); |
| has_pattern = has_child_properties(node); |
| isl_schedule_node_free(node); |
| |
| return has_pattern; |
| } |
| |
| /* Check that "node" satisfies the input pattern for hybrid tiling. |
| * Error out if it does not. |
| */ |
| static isl_stat check_input_pattern(__isl_keep isl_schedule_node *node) |
| { |
| isl_bool has_pattern; |
| |
| has_pattern = ppcg_ht_has_input_pattern(node); |
| if (has_pattern < 0) |
| return isl_stat_error; |
| if (!has_pattern) |
| isl_die(isl_schedule_node_get_ctx(node), isl_error_invalid, |
| "invalid input pattern for hybrid tiling", |
| return isl_stat_error); |
| |
| return isl_stat_ok; |
| } |
| |
| /* Extract the input schedule from "node", i.e., the product |
| * of the partial schedules of the parent and child nodes |
| * in the input pattern. |
| */ |
| static __isl_give isl_multi_union_pw_aff *extract_input_schedule( |
| __isl_keep isl_schedule_node *node) |
| { |
| isl_multi_union_pw_aff *partial, *partial2; |
| |
| partial = isl_schedule_node_band_get_partial_schedule(node); |
| node = isl_schedule_node_get_child(node, 0); |
| partial2 = isl_schedule_node_band_get_partial_schedule(node); |
| isl_schedule_node_free(node); |
| |
| return isl_multi_union_pw_aff_range_product(partial, partial2); |
| } |
| |
| /* Collect all dependences from "scop" that are relevant for performing |
| * hybrid tiling on "node" and its child and map them to the schedule |
| * space of this pair of nodes. |
| * |
| * In case live range reordering is not used, |
| * the flow and the false dependences are collected. |
| * In case live range reordering is used, |
| * the flow and the forced dependences are collected, as well |
| * as the order dependences that are adjacent to non-local |
| * flow dependences. |
| * |
| * In all cases, only dependences that map to the same instance |
| * of the outer part of the schedule are considered. |
| */ |
| static __isl_give isl_map *collect_deps(struct ppcg_scop *scop, |
| __isl_keep isl_schedule_node *node) |
| { |
| isl_space *space; |
| isl_multi_union_pw_aff *prefix, *partial; |
| isl_union_map *flow, *other, *dep, *umap; |
| isl_map *map; |
| |
| prefix = isl_schedule_node_get_prefix_schedule_multi_union_pw_aff(node); |
| partial = extract_input_schedule(node); |
| space = isl_multi_union_pw_aff_get_space(partial); |
| |
| flow = isl_union_map_copy(scop->dep_flow); |
| flow = isl_union_map_eq_at_multi_union_pw_aff(flow, |
| isl_multi_union_pw_aff_copy(prefix)); |
| if (!scop->options->live_range_reordering) { |
| other = isl_union_map_copy(scop->dep_false); |
| other = isl_union_map_eq_at_multi_union_pw_aff(other, prefix); |
| } else { |
| isl_union_map *local, *non_local, *order, *adj; |
| isl_union_set *domain, *range; |
| |
| other = isl_union_map_copy(scop->dep_forced); |
| other = isl_union_map_eq_at_multi_union_pw_aff(other, |
| isl_multi_union_pw_aff_copy(prefix)); |
| local = isl_union_map_copy(flow); |
| local = isl_union_map_eq_at_multi_union_pw_aff(local, |
| isl_multi_union_pw_aff_copy(partial)); |
| non_local = isl_union_map_copy(flow); |
| non_local = isl_union_map_subtract(non_local, local); |
| |
| order = isl_union_map_copy(scop->dep_order); |
| order = isl_union_map_eq_at_multi_union_pw_aff(order, prefix); |
| adj = isl_union_map_copy(order); |
| domain = isl_union_map_domain(isl_union_map_copy(non_local)); |
| domain = isl_union_set_coalesce(domain); |
| adj = isl_union_map_intersect_range(adj, domain); |
| other = isl_union_map_union(other, adj); |
| |
| adj = order; |
| range = isl_union_map_range(non_local); |
| range = isl_union_set_coalesce(range); |
| adj = isl_union_map_intersect_domain(adj, range); |
| other = isl_union_map_union(other, adj); |
| } |
| dep = isl_union_map_union(flow, other); |
| |
| umap = isl_union_map_from_multi_union_pw_aff(partial); |
| dep = isl_union_map_apply_domain(dep, isl_union_map_copy(umap)); |
| dep = isl_union_map_apply_range(dep, umap); |
| |
| space = isl_space_map_from_set(space); |
| map = isl_union_map_extract_map(dep, space); |
| isl_union_map_free(dep); |
| |
| map = isl_map_coalesce(map); |
| |
| return map; |
| } |
| |
| /* Given a constraint of the form |
| * |
| * a i_0 + b i_1 >= 0 |
| * or |
| * a i_0 + b i_1 = 0 |
| * |
| * use it to update one or both of the non-negative bounds |
| * in "list" = (min, max) such that |
| * |
| * i_1 >= -min i_0 |
| * and |
| * i_1 <= max i_0 |
| * |
| * If b = 0, then the constraint cannot be used. |
| * Otherwise, the constraint is equivalent to |
| * |
| * sgn(b) i_1 >= - a/abs(b) i_0 |
| * i.e., |
| * i_1 >= - a/abs(b) i_0 |
| * or |
| * i_1 <= a/abs(b) i_0 |
| * |
| * Set the first or second element of "list" to max(0, a/abs(b)), |
| * according to the sign of "b". Or set both in case the constraint |
| * is an equality, taking into account the sign change. |
| */ |
| static __isl_give isl_val_list *list_set_min_max(__isl_take isl_val_list *list, |
| __isl_keep isl_constraint *c) |
| { |
| isl_val *a, *b; |
| int sign; |
| int pos; |
| isl_bool eq, is_zero, is_neg; |
| |
| eq = isl_constraint_is_equality(c); |
| if (eq < 0) |
| return isl_val_list_free(list); |
| |
| b = isl_constraint_get_coefficient_val(c, isl_dim_set, 1); |
| is_zero = isl_val_is_zero(b); |
| if (is_zero == isl_bool_true) { |
| isl_val_free(b); |
| return list; |
| } |
| a = isl_constraint_get_coefficient_val(c, isl_dim_set, 0); |
| sign = isl_val_sgn(b); |
| b = isl_val_abs(b); |
| a = isl_val_div(a, b); |
| |
| if (eq) |
| b = isl_val_copy(a); |
| |
| pos = sign > 0 ? 0 : 1; |
| is_neg = isl_val_is_neg(a); |
| if (is_neg == isl_bool_true) |
| a = isl_val_set_si(a, 0); |
| list = isl_val_list_set_val(list, pos, a); |
| |
| if (!eq) |
| return is_neg < 0 ? isl_val_list_free(list) : list; |
| |
| pos = 1 - pos; |
| a = isl_val_neg(b); |
| is_neg = isl_val_is_neg(a); |
| if (is_neg == isl_bool_true) |
| a = isl_val_set_si(a, 0); |
| list = isl_val_list_set_val(list, pos, a); |
| |
| return is_neg < 0 ? isl_val_list_free(list) : list; |
| } |
| |
| /* If constraint "c" passes through the origin, then try and use it |
| * to update the non-negative bounds in "list" = (min, max) such that |
| * |
| * i_1 >= -min i_0 |
| * and |
| * i_1 <= max i_0 |
| */ |
| static isl_stat set_min_max(__isl_take isl_constraint *c, void *user) |
| { |
| isl_val *v; |
| isl_val_list **list = user; |
| isl_bool is_zero; |
| |
| v = isl_constraint_get_constant_val(c); |
| is_zero = isl_val_is_zero(v); |
| isl_val_free(v); |
| |
| if (is_zero == isl_bool_true) |
| *list = list_set_min_max(*list, c); |
| |
| isl_constraint_free(c); |
| return is_zero < 0 ? isl_stat_error : isl_stat_ok; |
| } |
| |
| /* Given a set of dependence distance vectors "dist", compute |
| * pair of non-negative bounds min and max such that |
| * |
| * d_pos >= -min d_0 |
| * and |
| * d_pos <= max d_0 |
| * |
| * and return the pair (min, max). |
| * If no bound can be found in either direction, then the bound |
| * is replaced by NaN. |
| * |
| * The dependence distances are first projected onto the (d_0, d_pos). |
| * Then the zero dependence distance is added and the convex hull is computed. |
| * Finally, the bounds are extracted from the constraints of the convex hull |
| * that pass through the origin. |
| */ |
| static __isl_give isl_val_list *min_max_dist(__isl_keep isl_set *dist, int pos) |
| { |
| isl_space *space; |
| isl_basic_set *hull; |
| int dim; |
| isl_ctx *ctx; |
| isl_val *nan; |
| isl_val_list *list; |
| |
| ctx = isl_set_get_ctx(dist); |
| nan = isl_val_nan(ctx); |
| list = isl_val_list_alloc(ctx, 2); |
| list = isl_val_list_add(list, isl_val_copy(nan)); |
| list = isl_val_list_add(list, nan); |
| |
| dist = isl_set_copy(dist); |
| dim = isl_set_dim(dist, isl_dim_set); |
| if (dist && pos >= dim) |
| isl_die(ctx, isl_error_internal, "position out of bounds", |
| dist = isl_set_free(dist)); |
| dist = isl_set_project_out(dist, isl_dim_set, pos + 1, dim - (pos + 1)); |
| dist = isl_set_project_out(dist, isl_dim_set, 1, pos - 1); |
| |
| space = isl_set_get_space(dist); |
| dist = isl_set_union(dist, isl_set_from_point(isl_point_zero(space))); |
| dist = isl_set_remove_divs(dist); |
| hull = isl_set_convex_hull(dist); |
| |
| if (isl_basic_set_foreach_constraint(hull, &set_min_max, &list) < 0) |
| list = isl_val_list_free(list); |
| isl_basic_set_free(hull); |
| |
| return list; |
| } |
| |
| /* Given a schedule node "node" that, together with its child, |
| * satisfies the input pattern for hybrid tiling, compute bounds |
| * on the relative dependence distances of the child node with |
| * respect to the parent node. These bounds are needed to |
| * construct a hybrid tiling. |
| * |
| * First all relevant dependences are collected and mapped |
| * to the schedule space of the pair of nodes. Then, the |
| * dependence distances are computed in this space. |
| * |
| * These dependence distances are then projected onto a two-dimensional |
| * space consisting of the single schedule dimension of the outer node |
| * and one of the schedule dimensions of the inner node. |
| * The maximal and minimal relative dependence distances are extracted |
| * from these projections. |
| * This process is repeated for each of the schedule dimensions |
| * of the inner node. For the first dimension, both minimal and |
| * maximal relative dependence distances are stored in the result. |
| * For the other dimensions, only the minimal relative dependence |
| * distance is stored. |
| */ |
| __isl_give ppcg_ht_bounds *ppcg_ht_compute_bounds(struct ppcg_scop *scop, |
| __isl_keep isl_schedule_node *node) |
| { |
| ppcg_ht_bounds *bnd; |
| isl_space *space; |
| isl_map *map; |
| isl_set *dist; |
| isl_val_list *pair; |
| isl_schedule_node *child; |
| int n; |
| int i, dim; |
| |
| if (!scop || !node || check_input_pattern(node) < 0) |
| return NULL; |
| |
| child = isl_schedule_node_get_child(node, 0); |
| space = isl_schedule_node_band_get_space(child); |
| dim = isl_schedule_node_band_n_member(child); |
| isl_schedule_node_free(child); |
| bnd = ppcg_ht_bounds_alloc(space); |
| if (!bnd) |
| return NULL; |
| |
| map = collect_deps(scop, node); |
| |
| dist = isl_map_deltas(map); |
| n = isl_set_dim(dist, isl_dim_param); |
| dist = isl_set_project_out(dist, isl_dim_param, 0, n); |
| |
| pair = min_max_dist(dist, 1); |
| bnd = ppcg_ht_bounds_set_lower(bnd, 0, isl_val_list_get_val(pair, 0)); |
| bnd = ppcg_ht_bounds_set_upper(bnd, isl_val_list_get_val(pair, 1)); |
| isl_val_list_free(pair); |
| |
| for (i = 1; i < dim; ++i) { |
| pair = min_max_dist(dist, 1 + i); |
| bnd = ppcg_ht_bounds_set_lower(bnd, i, |
| isl_val_list_get_val(pair, 0)); |
| isl_val_list_free(pair); |
| } |
| |
| isl_set_free(dist); |
| |
| return bnd; |
| } |
| |
| /* Check if all the fields of "phase" are valid, freeing "phase" |
| * if they are not. |
| */ |
| static __isl_give ppcg_ht_phase *check_phase(__isl_take ppcg_ht_phase *phase) |
| { |
| if (!phase) |
| return NULL; |
| |
| if (!phase->tiling || !phase->local_time || |
| !phase->shift_space || !phase->domain) |
| return ppcg_ht_phase_free(phase); |
| |
| return phase; |
| } |
| |
| /* Construct a ppcg_ht_phase object, that simply copies |
| * information from "tiling". |
| * That is, the result is defined over the "ts" space and |
| * corresponds to phase 1. |
| */ |
| static __isl_give ppcg_ht_phase *construct_phase( |
| __isl_keep ppcg_ht_tiling *tiling) |
| { |
| isl_ctx *ctx; |
| ppcg_ht_phase *phase; |
| |
| if (!tiling) |
| return NULL; |
| |
| ctx = ppcg_ht_tiling_get_ctx(tiling); |
| phase = isl_calloc_type(ctx, struct ppcg_ht_phase); |
| if (!phase) |
| return NULL; |
| phase->tiling = ppcg_ht_tiling_copy(tiling); |
| phase->time_tile = isl_aff_copy(tiling->time_tile); |
| phase->local_time = isl_aff_copy(tiling->local_time); |
| phase->shift_space = isl_aff_copy(tiling->shift_space); |
| phase->domain = isl_set_copy(tiling->hex); |
| |
| return check_phase(phase); |
| } |
| |
| /* Align the parameters of the elements of "phase" to those of "space". |
| */ |
| static __isl_give ppcg_ht_phase *phase_align_params( |
| __isl_take ppcg_ht_phase *phase, __isl_take isl_space *space) |
| { |
| if (!phase) |
| goto error; |
| |
| phase->time_tile = isl_aff_align_params(phase->time_tile, |
| isl_space_copy(space)); |
| phase->local_time = isl_aff_align_params(phase->local_time, |
| isl_space_copy(space)); |
| phase->shift_space = isl_aff_align_params(phase->shift_space, |
| isl_space_copy(space)); |
| phase->domain = isl_set_align_params(phase->domain, space); |
| |
| return check_phase(phase); |
| error: |
| isl_space_free(space); |
| return NULL; |
| } |
| |
| /* Pull back "phase" over "ma". |
| * That is, take a phase defined over the range of "ma" and |
| * turn it into a phase defined over the domain of "ma". |
| */ |
| static __isl_give ppcg_ht_phase *pullback_phase(__isl_take ppcg_ht_phase *phase, |
| __isl_take isl_multi_aff *ma) |
| { |
| phase = phase_align_params(phase, isl_multi_aff_get_space(ma)); |
| if (!phase) |
| goto error; |
| |
| phase->time_tile = isl_aff_pullback_multi_aff(phase->time_tile, |
| isl_multi_aff_copy(ma)); |
| phase->local_time = isl_aff_pullback_multi_aff(phase->local_time, |
| isl_multi_aff_copy(ma)); |
| phase->shift_space = isl_aff_pullback_multi_aff(phase->shift_space, |
| isl_multi_aff_copy(ma)); |
| phase->domain = isl_set_preimage_multi_aff(phase->domain, ma); |
| |
| return check_phase(phase); |
| error: |
| isl_multi_aff_free(ma); |
| return NULL; |
| } |
| |
| /* Pullback "phase" over phase->tiling->shift_phase, which shifts |
| * phase 0 to phase 1. The pullback therefore takes a phase 1 |
| * description and turns it into a phase 0 description. |
| */ |
| static __isl_give ppcg_ht_phase *shift_phase(__isl_take ppcg_ht_phase *phase) |
| { |
| ppcg_ht_tiling *tiling; |
| |
| if (!phase) |
| return NULL; |
| |
| tiling = phase->tiling; |
| return pullback_phase(phase, isl_multi_aff_copy(tiling->shift_phase)); |
| } |
| |
| /* Take a "phase" defined over the ts-space and plug in the projection |
| * from the input schedule space to the ts-space. |
| * The result is then defined over this input schedule space. |
| */ |
| static __isl_give ppcg_ht_phase *lift_phase(__isl_take ppcg_ht_phase *phase) |
| { |
| ppcg_ht_tiling *tiling; |
| |
| if (!phase) |
| return NULL; |
| |
| tiling = phase->tiling; |
| return pullback_phase(phase, isl_multi_aff_copy(tiling->project_ts)); |
| } |
| |
| /* Compute the shift that should be added to the space band |
| * in order to be able to apply rectangular tiling to the space. |
| * Store the shift in phase->space_shift. |
| * |
| * In the first dimension, it is equal to shift_space - s. |
| * For phase 1, this results in |
| * |
| * (-(2 * shift_s)*T) % W |
| * |
| * In phase 0, the "s" in shift_space has been replaced by "s + shift_s", |
| * so the result is |
| * |
| * shift_s + (-(2 * shift_s)*T) % W |
| * |
| * In the other dimensions, the shift is equal to |
| * |
| * dl_i * local_time. |
| */ |
| static __isl_give ppcg_ht_phase *compute_space_shift( |
| __isl_take ppcg_ht_phase *phase) |
| { |
| int i, n; |
| isl_space *space; |
| isl_local_space *ls; |
| isl_aff *aff, *s; |
| isl_multi_aff *space_shift; |
| |
| if (!phase) |
| return NULL; |
| |
| space = ppcg_ht_phase_get_input_space(phase); |
| space = isl_space_unwrap(space); |
| space = isl_space_range_map(space); |
| |
| space_shift = isl_multi_aff_zero(space); |
| aff = isl_aff_copy(phase->shift_space); |
| ls = isl_local_space_from_space(isl_aff_get_domain_space(aff)); |
| s = isl_aff_var_on_domain(ls, isl_dim_set, 1); |
| aff = isl_aff_sub(aff, s); |
| space_shift = isl_multi_aff_set_aff(space_shift, 0, aff); |
| |
| n = isl_multi_aff_dim(space_shift, isl_dim_out); |
| for (i = 1; i < n; ++i) { |
| isl_val *v; |
| isl_aff *time; |
| |
| v = ppcg_ht_bounds_get_lower(phase->tiling->bounds, i); |
| time = isl_aff_copy(phase->local_time); |
| time = isl_aff_scale_val(time, v); |
| space_shift = isl_multi_aff_set_aff(space_shift, i, time); |
| } |
| |
| if (!space_shift) |
| return ppcg_ht_phase_free(phase); |
| phase->space_shift = space_shift; |
| return phase; |
| } |
| |
| /* Compute the space tiling and store the result in phase->space_tile. |
| * The space tiling is of the form |
| * |
| * [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size] |
| */ |
| static __isl_give ppcg_ht_phase *compute_space_tile( |
| __isl_take ppcg_ht_phase *phase) |
| { |
| isl_space *space; |
| isl_multi_val *space_sizes; |
| isl_multi_aff *space_shift; |
| isl_multi_aff *tile; |
| |
| if (!phase) |
| return NULL; |
| |
| space = ppcg_ht_phase_get_input_space(phase); |
| space = isl_space_unwrap(space); |
| tile = isl_multi_aff_range_map(space); |
| space_shift = isl_multi_aff_copy(phase->space_shift); |
| tile = isl_multi_aff_add(space_shift, tile); |
| space_sizes = isl_multi_val_copy(phase->tiling->space_sizes); |
| tile = isl_multi_aff_scale_down_multi_val(tile, space_sizes); |
| tile = isl_multi_aff_floor(tile); |
| |
| if (!tile) |
| return ppcg_ht_phase_free(phase); |
| phase->space_tile = tile; |
| return phase; |
| } |
| |
| /* Construct a representation for one of the two phase for hybrid tiling |
| * "tiling". If "shift" is not set, then the phase is constructed |
| * directly from the hexagonal tile shape in "tiling", which represents |
| * the phase-1 tiles. If "shift" is set, then this tile shape is shifted |
| * back over tiling->shift_phase to obtain the phase-0 tiles. |
| * |
| * First copy data from "tiling", then optionally shift the phase and |
| * finally move the tiling from the "ts" space of "tiling" to |
| * the space of the input pattern. |
| * |
| * After the basic phase has been computed, also compute |
| * the corresponding space shift. |
| */ |
| static __isl_give ppcg_ht_phase *ppcg_ht_tiling_compute_phase( |
| __isl_keep ppcg_ht_tiling *tiling, int shift) |
| { |
| ppcg_ht_phase *phase; |
| |
| phase = construct_phase(tiling); |
| if (shift) |
| phase = shift_phase(phase); |
| phase = lift_phase(phase); |
| |
| phase = compute_space_shift(phase); |
| phase = compute_space_tile(phase); |
| |
| return phase; |
| } |
| |
| /* Consruct a function that is equal to the time tile of "phase0" |
| * on the domain of "phase0" and equal to the time tile of "phase1" |
| * on the domain of "phase1". |
| * The two domains are assumed to form a partition of the input |
| * schedule space. |
| */ |
| static __isl_give isl_pw_multi_aff *combine_time_tile( |
| __isl_keep ppcg_ht_phase *phase0, __isl_keep ppcg_ht_phase *phase1) |
| { |
| isl_aff *T; |
| isl_pw_aff *time, *time1; |
| |
| if (!phase0 || !phase1) |
| return NULL; |
| |
| T = isl_aff_copy(phase0->time_tile); |
| time = isl_pw_aff_alloc(ppcg_ht_phase_get_domain(phase0), T); |
| |
| T = isl_aff_copy(phase1->time_tile); |
| time1 = isl_pw_aff_alloc(ppcg_ht_phase_get_domain(phase1), T); |
| |
| time = isl_pw_aff_union_add(time, time1); |
| |
| return isl_pw_multi_aff_from_pw_aff(time); |
| } |
| |
| /* Name used in mark nodes that contain a pointer to a ppcg_ht_phase. |
| */ |
| static char *ppcg_phase_name = "phase"; |
| |
| /* Does "id" contain a pointer to a ppcg_ht_phase? |
| * That is, is it called "phase"? |
| */ |
| static isl_bool is_phase_id(__isl_keep isl_id *id) |
| { |
| const char *name; |
| |
| name = isl_id_get_name(id); |
| if (!name) |
| return isl_bool_error; |
| |
| return !strcmp(name, ppcg_phase_name); |
| } |
| |
| /* Given a mark node with an identifier that points to a ppcg_ht_phase, |
| * extract this ppcg_ht_phase pointer. |
| */ |
| __isl_keep ppcg_ht_phase *ppcg_ht_phase_extract_from_mark( |
| __isl_keep isl_schedule_node *node) |
| { |
| isl_bool is_phase; |
| isl_id *id; |
| void *p; |
| |
| if (!node) |
| return NULL; |
| if (isl_schedule_node_get_type(node) != isl_schedule_node_mark) |
| isl_die(isl_schedule_node_get_ctx(node), isl_error_internal, |
| "not a phase mark", return NULL); |
| |
| id = isl_schedule_node_mark_get_id(node); |
| is_phase = is_phase_id(id); |
| p = isl_id_get_user(id); |
| isl_id_free(id); |
| |
| if (is_phase < 0) |
| return NULL; |
| if (!is_phase) |
| isl_die(isl_schedule_node_get_ctx(node), isl_error_internal, |
| "not a phase mark", return NULL); |
| |
| return p; |
| } |
| |
| /* Insert a mark node at "node" holding a pointer to "phase". |
| */ |
| static __isl_give isl_schedule_node *insert_phase( |
| __isl_take isl_schedule_node *node, __isl_take ppcg_ht_phase *phase) |
| { |
| isl_ctx *ctx; |
| isl_id *id; |
| |
| if (!node) |
| goto error; |
| ctx = isl_schedule_node_get_ctx(node); |
| id = isl_id_alloc(ctx, ppcg_phase_name, phase); |
| if (!id) |
| goto error; |
| id = isl_id_set_free_user(id, &ppcg_ht_phase_free_wrap); |
| node = isl_schedule_node_insert_mark(node, id); |
| |
| return node; |
| error: |
| ppcg_ht_phase_free(phase); |
| isl_schedule_node_free(node); |
| return NULL; |
| } |
| |
| /* Construct a mapping from the elements of the original pair of bands |
| * to which tiling was applied that belong to a tile of "phase" |
| * to that tile, preserving the values for the outer bands. |
| * |
| * The mapping is of the form |
| * |
| * [[outer] -> [P -> C]] -> [[outer] -> [tile]] |
| * |
| * where tile is defined by a concatenation of the time_tile and |
| * the space_tile. |
| */ |
| static __isl_give isl_map *construct_tile_map(__isl_keep ppcg_ht_phase *phase) |
| { |
| int depth; |
| isl_space *space; |
| isl_multi_aff *ma; |
| isl_multi_aff *tiling; |
| isl_map *el2tile; |
| |
| depth = isl_schedule_node_get_schedule_depth( |
| phase->tiling->input_node); |
| space = isl_aff_get_space(phase->time_tile); |
| space = isl_space_params(space); |
| space = isl_space_set_from_params(space); |
| space = isl_space_add_dims(space, isl_dim_set, depth); |
| space = isl_space_map_from_set(space); |
| ma = isl_multi_aff_identity(space); |
| |
| tiling = isl_multi_aff_flat_range_product( |
| isl_multi_aff_from_aff(isl_aff_copy(phase->time_tile)), |
| isl_multi_aff_copy(phase->space_tile)); |
| el2tile = isl_map_from_multi_aff(tiling); |
| el2tile = isl_map_intersect_domain(el2tile, |
| isl_set_copy(phase->domain)); |
| el2tile = isl_map_product(isl_map_from_multi_aff(ma), el2tile); |
| |
| return el2tile; |
| } |
| |
| /* Return a description of the full tiles of "phase" at the point |
| * in the original schedule tree where the tiling was applied. |
| * |
| * First construct a mapping from the input schedule dimensions |
| * up to an including the original pair of bands to which hybrid tiling |
| * was applied to schedule dimensions in which this original pair |
| * has been replaced by the tiles. |
| * This mapping is of the form |
| * |
| * [[outer] -> [P -> C]] -> [[outer] -> [tile]] |
| * |
| * Apply this mapping to the set of all values for the input |
| * schedule dimensions and then apply its inverse. |
| * The result is the set of values for the input schedule dimensions |
| * that would map to any of the tiles. Subtracting from this set |
| * the set of values that are actually executed produces the set |
| * of values that belong to a tile but that are not executed. |
| * Mapping these back to the tiles produces a description of |
| * the partial tiles. Subtracting these from the set of all tiles |
| * produces a description of the full tiles in the form |
| * |
| * [[outer] -> [tile]] |
| */ |
| static __isl_give isl_set *compute_full_tile(__isl_keep ppcg_ht_phase *phase) |
| { |
| isl_schedule_node *node; |
| isl_union_set *domain; |
| isl_union_map *prefix, *schedule; |
| isl_set *all, *partial, *all_el; |
| isl_map *tile2el, *el2tile; |
| isl_multi_union_pw_aff *mupa; |
| |
| el2tile = construct_tile_map(phase); |
| tile2el = isl_map_reverse(isl_map_copy(el2tile)); |
| |
| node = phase->tiling->input_node; |
| prefix = isl_schedule_node_get_prefix_schedule_union_map(node); |
| domain = isl_schedule_node_get_domain(node); |
| mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule); |
| schedule = isl_union_map_from_multi_union_pw_aff(mupa); |
| schedule = isl_union_map_range_product(prefix, schedule); |
| all_el = isl_set_from_union_set(isl_union_set_apply(domain, schedule)); |
| all_el = isl_set_coalesce(all_el); |
| |
| all = isl_set_apply(isl_set_copy(all_el), isl_map_copy(el2tile)); |
| |
| partial = isl_set_copy(all); |
| partial = isl_set_apply(partial, tile2el); |
| partial = isl_set_subtract(partial, all_el); |
| partial = isl_set_apply(partial, el2tile); |
| |
| return isl_set_subtract(all, partial); |
| } |
| |
| /* Copy the AST loop types of the non-isolated part to those |
| * of the isolated part. |
| */ |
| static __isl_give isl_schedule_node *set_isolate_loop_type( |
| __isl_take isl_schedule_node *node) |
| { |
| int i, n; |
| |
| n = isl_schedule_node_band_n_member(node); |
| for (i = 0; i < n; ++i) { |
| enum isl_ast_loop_type type; |
| |
| type = isl_schedule_node_band_member_get_ast_loop_type(node, i); |
| node = isl_schedule_node_band_member_set_isolate_ast_loop_type( |
| node, i, type); |
| } |
| |
| return node; |
| } |
| |
| /* If options->isolate_full_tiles is set, then mark the full tiles |
| * in "node" for isolation. The full tiles are derived from "phase". |
| * "node" may point to a part of the tiling, e.g., the space tiling. |
| * |
| * The full tiles are originally computed in the form |
| * |
| * [[outer] -> [tile]] |
| * |
| * However, the band that "node" points to may only contain |
| * subset of the tile dimensions. |
| * The description above is therefore treated as |
| * |
| * [[outer] -> [before; this; after]] |
| * |
| * before is of size "pos"; this is of size "dim"; and |
| * after is of size "out - pos - dim". |
| * The after part is first project out. Then the range is split |
| * into a before and this part and finally the before part is moved |
| * to the domain, resulting in |
| * |
| * [[outer; before] -> [this]] |
| * |
| * This description is then used as the isolate option. |
| * |
| * The AST loop type for the isolated part is set to be the same |
| * as that of the non-isolated part. |
| */ |
| static __isl_give isl_schedule_node *ppcg_ht_phase_isolate_full_tile_node( |
| __isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node, |
| struct ppcg_options *options) |
| { |
| int in, out, pos, depth, dim; |
| isl_space *space; |
| isl_multi_aff *ma1, *ma2; |
| isl_set *tile; |
| isl_map *map; |
| isl_set *set; |
| isl_union_set *opt; |
| |
| if (!options->isolate_full_tiles) |
| return node; |
| |
| depth = isl_schedule_node_get_schedule_depth(node); |
| dim = isl_schedule_node_band_n_member(node); |
| |
| tile = compute_full_tile(phase); |
| map = isl_set_unwrap(tile); |
| in = isl_map_dim(map, isl_dim_in); |
| out = isl_map_dim(map, isl_dim_out); |
| pos = depth - in; |
| map = isl_map_project_out(map, isl_dim_out, pos + dim, |
| out - (pos + dim)); |
| space = isl_space_range(isl_map_get_space(map)); |
| ma1 = isl_multi_aff_project_out_map(isl_space_copy(space), |
| isl_dim_set, pos, dim); |
| ma2 = isl_multi_aff_project_out_map(space, isl_dim_set, 0, pos); |
| ma1 = isl_multi_aff_range_product(ma1, ma2); |
| map = isl_map_apply_range(map, isl_map_from_multi_aff(ma1)); |
| map = isl_map_uncurry(map); |
| map = isl_map_flatten_domain(map); |
| set = isl_map_wrap(map); |
| set = isl_set_set_tuple_name(set, "isolate"); |
| |
| opt = isl_schedule_node_band_get_ast_build_options(node); |
| opt = isl_union_set_add_set(opt, set); |
| node = isl_schedule_node_band_set_ast_build_options(node, opt); |
| node = set_isolate_loop_type(node); |
| |
| return node; |
| } |
| |
| /* Insert a band node for performing the space tiling for "phase" at "node". |
| * In particular, insert a band node with partial schedule |
| * |
| * [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size)] |
| * |
| * pulled back over the input schedule. |
| * "options" determines whether full tiles should be separated |
| * from partial tiles. |
| * |
| * The first tile dimension iterates over the hexagons in the same |
| * phase, which are independent by construction. The first dimension |
| * is therefore marked coincident. |
| * All dimensions are also marked for being generated as atomic loops |
| * because separation is usually not desirable on tile loops. |
| */ |
| static __isl_give isl_schedule_node *insert_space_tiling( |
| __isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node, |
| struct ppcg_options *options) |
| { |
| isl_multi_aff *space_tile; |
| isl_multi_union_pw_aff *mupa; |
| |
| if (!phase) |
| return isl_schedule_node_free(node); |
| |
| space_tile = isl_multi_aff_copy(phase->space_tile); |
| mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule); |
| mupa = isl_multi_union_pw_aff_apply_multi_aff(mupa, space_tile); |
| node = isl_schedule_node_insert_partial_schedule(node, mupa); |
| node = ppcg_set_schedule_node_type(node, isl_ast_loop_atomic); |
| node = ppcg_ht_phase_isolate_full_tile_node(phase, node, options); |
| node = isl_schedule_node_band_member_set_coincident(node, 0, 1); |
| |
| return node; |
| } |
| |
| /* Given a pointer "node" to (a copy of) the original child node |
| * in the input pattern, adjust its partial schedule such that |
| * it starts at zero within each tile. |
| * |
| * That is, replace "s" by (s + space_shift) % space_sizes. |
| */ |
| __isl_give isl_schedule_node *ppcg_ht_phase_shift_space_point( |
| __isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node) |
| { |
| isl_multi_val *space_sizes; |
| isl_multi_aff *space_shift; |
| isl_multi_union_pw_aff *mupa; |
| |
| space_shift = isl_multi_aff_copy(phase->space_shift); |
| mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule); |
| mupa = isl_multi_union_pw_aff_apply_multi_aff(mupa, space_shift); |
| node = isl_schedule_node_band_shift(node, mupa); |
| space_sizes = isl_multi_val_copy(phase->tiling->space_sizes); |
| node = isl_schedule_node_band_mod(node, space_sizes); |
| |
| return node; |
| } |
| |
| /* Does |
| * |
| * s0 > delta + 2 * {delta * h} - 1 |
| * |
| * hold? |
| */ |
| static isl_bool wide_enough(__isl_keep isl_val *s0, __isl_keep isl_val *delta, |
| __isl_keep isl_val *h) |
| { |
| isl_val *v, *v2; |
| isl_bool ok; |
| |
| v = isl_val_mul(isl_val_copy(delta), isl_val_copy(h)); |
| v2 = isl_val_floor(isl_val_copy(v)); |
| v = isl_val_sub(v, v2); |
| v = isl_val_mul_ui(v, 2); |
| v = isl_val_add(v, isl_val_copy(delta)); |
| v = isl_val_sub_ui(v, 1); |
| ok = isl_val_gt(s0, v); |
| isl_val_free(v); |
| |
| return ok; |
| } |
| |
| /* Is the tile size specified by "sizes" wide enough in the first space |
| * dimension, i.e., the base of the hexagon? This ensures that, |
| * after hybrid tiling using "bounds" and these sizes, |
| * neighboring hexagons in the same phase are far enough apart |
| * that they do not depend on each other. |
| * The test is only meaningful if the bounds are valid. |
| * |
| * Let st be (half) the size in the time dimension and s0 the base |
| * size in the first space dimension. Let delta be the dependence |
| * distance in either positive or negative direction. In principle, |
| * it should be enough to have s0 + 1 > delta, i.e., s0 >= delta. |
| * However, in case of fractional delta, the tile is not extended |
| * with delta * (st - 1), but instead with floor(delta * (st - 1)). |
| * The condition therefore needs to be adjusted to |
| * |
| * s0 + 1 > delta + 2 {delta * (st - 1)} |
| * |
| * (with {} the fractional part) to account for the two slanted sides. |
| * The condition in the paper "Hybrid Hexagonal/Classical Tiling for GPUs" |
| * translates to |
| * |
| * s0 >= delta + {delta * (st - 1)} |
| * |
| * Since 1 > frac(delta * (st - 1)), this condition implies |
| * the condition above. |
| * |
| * The condition is checked for both directions. |
| */ |
| isl_bool ppcg_ht_bounds_supports_sizes(__isl_keep ppcg_ht_bounds *bounds, |
| __isl_keep isl_multi_val *sizes) |
| { |
| isl_val *s0, *h; |
| isl_val *delta; |
| isl_bool ok; |
| |
| ok = ppcg_ht_bounds_is_valid(bounds); |
| if (ok < 0 || !ok) |
| return ok; |
| |
| h = isl_val_sub_ui(isl_multi_val_get_val(sizes, 0), 1); |
| s0 = isl_multi_val_get_val(sizes, 1); |
| |
| delta = ppcg_ht_bounds_get_lower(bounds, 0); |
| ok = wide_enough(s0, delta, h); |
| isl_val_free(delta); |
| |
| delta = ppcg_ht_bounds_get_upper(bounds); |
| if (ok == isl_bool_true) |
| ok = wide_enough(s0, delta, h); |
| isl_val_free(delta); |
| |
| isl_val_free(s0); |
| isl_val_free(h); |
| |
| return ok; |
| } |
| |
| /* Check that the tile will be wide enough in the first space |
| * dimension, i.e., the base of the hexagon. This ensures that |
| * neighboring hexagons in the same phase are far enough apart |
| * that they do not depend on each other. |
| * |
| * Error out if the condition fails to hold. |
| */ |
| static isl_stat check_width(__isl_keep ppcg_ht_bounds *bounds, |
| __isl_keep isl_multi_val *sizes) |
| { |
| isl_bool ok; |
| |
| ok = ppcg_ht_bounds_supports_sizes(bounds, sizes); |
| |
| if (ok < 0) |
| return isl_stat_error; |
| if (!ok) |
| isl_die(isl_multi_val_get_ctx(sizes), isl_error_invalid, |
| "base of hybrid tiling hexagon not sufficiently wide", |
| return isl_stat_error); |
| |
| return isl_stat_ok; |
| } |
| |
| /* Given valid bounds on the relative dependence distances for |
| * the pair of nested nodes that "node" point to, as well as sufficiently |
| * wide tile sizes "sizes", insert the corresponding time and space tiling |
| * at "node", along with a pair of phase nodes that can be used |
| * to make further changes. |
| * The space of "sizes" should be the product of the spaces |
| * of the schedules of the pair of parent and child nodes. |
| * "options" determines whether full tiles should be separated |
| * from partial tiles. |
| * |
| * In particular, given an input of the form |
| * |
| * P - C - ... |
| * |
| * the output has the form |
| * |
| * /- F0 - M0 - CT0 - P - C - ... |
| * PT - seq |
| * \- F1 - M1 - CT1 - P - C - ... |
| * |
| * PT is the global time tiling. Within each of these tiles, |
| * two phases are executed in order. Within each phase, the schedule |
| * space is further subdivided into tiles through CT0 and CT1. |
| * The first dimension of each of these iterates over the hexagons |
| * within a phase and these are independent by construction. |
| * The F0 and F1 filters filter the statement instances that belong |
| * to the corresponding phase. The M0 and M1 marks contain a pointer |
| * to a ppcg_ht_phase object that can be used to perform further changes. |
| * |
| * After checking that input satisfies the requirements, |
| * a data structure is constructed that represents the tiling and |
| * two additional data structures are constructed for the two phases |
| * of the tiling. These are then used to define the filters F0 and F1 and |
| * combined to construct the time tiling PT. |
| * Then the time tiling node PT is inserted, followed by |
| * the sequence with the two filters, the CT space tiling nodes and |
| * the phase markers M. |
| */ |
| __isl_give isl_schedule_node *ppcg_ht_bounds_insert_tiling( |
| __isl_take ppcg_ht_bounds *bounds, __isl_take isl_multi_val *sizes, |
| __isl_take isl_schedule_node *node, struct ppcg_options *options) |
| { |
| isl_ctx *ctx; |
| isl_union_set *phase0; |
| isl_union_set *phase1; |
| isl_multi_union_pw_aff *input, *dom_time; |
| isl_union_pw_multi_aff *upma; |
| isl_pw_multi_aff *time; |
| isl_union_set_list *phases; |
| ppcg_ht_tiling *tiling; |
| ppcg_ht_phase *phase_0; |
| ppcg_ht_phase *phase_1; |
| |
| if (!node || !sizes || !bounds) |
| goto error; |
| if (check_input_pattern(node) < 0 || check_width(bounds, sizes) < 0) |
| goto error; |
| |
| ctx = isl_schedule_node_get_ctx(node); |
| |
| input = extract_input_schedule(node); |
| |
| tiling = ppcg_ht_bounds_construct_tiling(bounds, node, input, sizes); |
| phase_0 = ppcg_ht_tiling_compute_phase(tiling, 1); |
| phase_1 = ppcg_ht_tiling_compute_phase(tiling, 0); |
| time = combine_time_tile(phase_0, phase_1); |
| ppcg_ht_tiling_free(tiling); |
| |
| upma = isl_union_pw_multi_aff_from_multi_union_pw_aff( |
| isl_multi_union_pw_aff_copy(input)); |
| phase0 = isl_union_set_from_set(ppcg_ht_phase_get_domain(phase_0)); |
| phase0 = isl_union_set_preimage_union_pw_multi_aff(phase0, |
| isl_union_pw_multi_aff_copy(upma)); |
| phase1 = isl_union_set_from_set(ppcg_ht_phase_get_domain(phase_1)); |
| phase1 = isl_union_set_preimage_union_pw_multi_aff(phase1, upma); |
| |
| phases = isl_union_set_list_alloc(ctx, 2); |
| phases = isl_union_set_list_add(phases, phase0); |
| phases = isl_union_set_list_add(phases, phase1); |
| |
| dom_time = isl_multi_union_pw_aff_apply_pw_multi_aff(input, time); |
| node = isl_schedule_node_insert_partial_schedule(node, dom_time); |
| |
| node = isl_schedule_node_child(node, 0); |
| |
| node = isl_schedule_node_insert_sequence(node, phases); |
| node = isl_schedule_node_child(node, 0); |
| node = isl_schedule_node_child(node, 0); |
| node = insert_space_tiling(phase_0, node, options); |
| node = insert_phase(node, phase_0); |
| node = isl_schedule_node_parent(node); |
| node = isl_schedule_node_next_sibling(node); |
| node = isl_schedule_node_child(node, 0); |
| node = insert_space_tiling(phase_1, node, options); |
| node = insert_phase(node, phase_1); |
| node = isl_schedule_node_parent(node); |
| node = isl_schedule_node_parent(node); |
| |
| node = isl_schedule_node_parent(node); |
| |
| isl_multi_val_free(sizes); |
| return node; |
| error: |
| isl_multi_val_free(sizes); |
| isl_schedule_node_free(node); |
| ppcg_ht_bounds_free(bounds); |
| return NULL; |
| } |
| |
| /* Given a branch "node" that contains a sequence node with two phases |
| * of hybrid tiling as input, call "fn" on each of the two phase marker |
| * nodes. |
| * |
| * That is, the input is as follows |
| * |
| * /- F0 - M0 - ... |
| * ... - seq |
| * \- F1 - M1 - ... |
| * |
| * and "fn" is called on M0 and on M1. |
| */ |
| __isl_give isl_schedule_node *hybrid_tile_foreach_phase( |
| __isl_take isl_schedule_node *node, |
| __isl_give isl_schedule_node *(*fn)(__isl_take isl_schedule_node *node, |
| void *user), void *user) |
| { |
| int depth0, depth; |
| |
| depth0 = isl_schedule_node_get_tree_depth(node); |
| |
| while (node && |
| isl_schedule_node_get_type(node) != isl_schedule_node_sequence) |
| node = isl_schedule_node_child(node, 0); |
| |
| node = isl_schedule_node_child(node, 0); |
| node = isl_schedule_node_child(node, 0); |
| if (!node) |
| return NULL; |
| node = fn(node, user); |
| node = isl_schedule_node_parent(node); |
| node = isl_schedule_node_next_sibling(node); |
| node = isl_schedule_node_child(node, 0); |
| if (!node) |
| return NULL; |
| node = fn(node, user); |
| node = isl_schedule_node_parent(node); |
| node = isl_schedule_node_parent(node); |
| |
| depth = isl_schedule_node_get_tree_depth(node); |
| node = isl_schedule_node_ancestor(node, depth - depth0); |
| |
| return node; |
| } |
| |
| /* This function is called on each of the two phase marks |
| * in a hybrid tiling tree. |
| * Drop the phase mark at "node". |
| */ |
| static __isl_give isl_schedule_node *drop_phase_mark( |
| __isl_take isl_schedule_node *node, void *user) |
| { |
| isl_id *id; |
| isl_bool is_phase; |
| |
| if (isl_schedule_node_get_type(node) != isl_schedule_node_mark) |
| return node; |
| |
| id = isl_schedule_node_mark_get_id(node); |
| is_phase = is_phase_id(id); |
| isl_id_free(id); |
| |
| if (is_phase < 0) |
| return isl_schedule_node_free(node); |
| if (is_phase) |
| node = isl_schedule_node_delete(node); |
| |
| return node; |
| } |
| |
| /* Given a branch "node" that contains a sequence node with two phases |
| * of hybrid tiling as input, remove the two phase marker nodes. |
| * |
| * That is, the input is as follows |
| * |
| * /- F0 - M0 - ... |
| * ... - seq |
| * \- F1 - M1 - ... |
| * |
| * and the output is |
| * |
| * /- F0 - ... |
| * ... - seq |
| * \- F1 - ... |
| */ |
| __isl_give isl_schedule_node *hybrid_tile_drop_phase_marks( |
| __isl_take isl_schedule_node *node) |
| { |
| return hybrid_tile_foreach_phase(node, &drop_phase_mark, NULL); |
| } |