1565 lines
48 KiB
C
1565 lines
48 KiB
C
/*
|
|
* Copyright 2015 Sven Verdoolaege
|
|
*
|
|
* Use of this software is governed by the MIT license
|
|
*
|
|
* Written by Sven Verdoolaege
|
|
*/
|
|
|
|
#include "isl_map_private.h"
|
|
|
|
#include <isl/id.h>
|
|
#include <isl/schedule_node.h>
|
|
#include <isl/union_set.h>
|
|
|
|
#include "isl_mat_private.h"
|
|
#include "isl_scheduler_clustering.h"
|
|
#include "isl_scheduler_scc.h"
|
|
#include "isl_seq.h"
|
|
#include "isl_tarjan.h"
|
|
|
|
/* Initialize the clustering data structure "c" from "graph".
|
|
*
|
|
* In particular, allocate memory, extract the SCCs from "graph"
|
|
* into c->scc, initialize scc_cluster and construct
|
|
* a band of schedule rows for each SCC.
|
|
* Within each SCC, there is only one SCC by definition.
|
|
* Each SCC initially belongs to a cluster containing only that SCC.
|
|
*/
|
|
static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
|
|
struct isl_sched_graph *graph)
|
|
{
|
|
int i;
|
|
|
|
c->n = graph->scc;
|
|
c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
|
|
c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
|
|
c->scc_cluster = isl_calloc_array(ctx, int, c->n);
|
|
c->scc_node = isl_calloc_array(ctx, int, c->n);
|
|
c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
|
|
if (!c->scc || !c->cluster ||
|
|
!c->scc_cluster || !c->scc_node || !c->scc_in_merge)
|
|
return isl_stat_error;
|
|
|
|
for (i = 0; i < c->n; ++i) {
|
|
if (isl_sched_graph_extract_sub_graph(ctx, graph,
|
|
&isl_sched_node_scc_exactly,
|
|
&isl_sched_edge_scc_exactly,
|
|
i, &c->scc[i]) < 0)
|
|
return isl_stat_error;
|
|
c->scc[i].scc = 1;
|
|
if (isl_sched_graph_compute_maxvar(&c->scc[i]) < 0)
|
|
return isl_stat_error;
|
|
if (isl_schedule_node_compute_wcc_band(ctx, &c->scc[i]) < 0)
|
|
return isl_stat_error;
|
|
c->scc_cluster[i] = i;
|
|
}
|
|
|
|
return isl_stat_ok;
|
|
}
|
|
|
|
/* Free all memory allocated for "c".
|
|
*/
|
|
static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
|
|
{
|
|
int i;
|
|
|
|
if (c->scc)
|
|
for (i = 0; i < c->n; ++i)
|
|
isl_sched_graph_free(ctx, &c->scc[i]);
|
|
free(c->scc);
|
|
if (c->cluster)
|
|
for (i = 0; i < c->n; ++i)
|
|
isl_sched_graph_free(ctx, &c->cluster[i]);
|
|
free(c->cluster);
|
|
free(c->scc_cluster);
|
|
free(c->scc_node);
|
|
free(c->scc_in_merge);
|
|
}
|
|
|
|
/* Should we refrain from merging the cluster in "graph" with
|
|
* any other cluster?
|
|
* In particular, is its current schedule band empty and incomplete.
|
|
*/
|
|
static int bad_cluster(struct isl_sched_graph *graph)
|
|
{
|
|
return graph->n_row < graph->maxvar &&
|
|
graph->n_total_row == graph->band_start;
|
|
}
|
|
|
|
/* Is "edge" a proximity edge with a non-empty dependence relation?
|
|
*/
|
|
static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
|
|
{
|
|
if (!isl_sched_edge_is_proximity(edge))
|
|
return isl_bool_false;
|
|
return isl_bool_not(isl_map_plain_is_empty(edge->map));
|
|
}
|
|
|
|
/* Return the index of an edge in "graph" that can be used to merge
|
|
* two clusters in "c".
|
|
* Return graph->n_edge if no such edge can be found.
|
|
* Return -1 on error.
|
|
*
|
|
* In particular, return a proximity edge between two clusters
|
|
* that is not marked "no_merge" and such that neither of the
|
|
* two clusters has an incomplete, empty band.
|
|
*
|
|
* If there are multiple such edges, then try and find the most
|
|
* appropriate edge to use for merging. In particular, pick the edge
|
|
* with the greatest weight. If there are multiple of those,
|
|
* then pick one with the shortest distance between
|
|
* the two cluster representatives.
|
|
*/
|
|
static int find_proximity(struct isl_sched_graph *graph,
|
|
struct isl_clustering *c)
|
|
{
|
|
int i, best = graph->n_edge, best_dist, best_weight;
|
|
|
|
for (i = 0; i < graph->n_edge; ++i) {
|
|
struct isl_sched_edge *edge = &graph->edge[i];
|
|
int dist, weight;
|
|
isl_bool prox;
|
|
|
|
prox = is_non_empty_proximity(edge);
|
|
if (prox < 0)
|
|
return -1;
|
|
if (!prox)
|
|
continue;
|
|
if (edge->no_merge)
|
|
continue;
|
|
if (bad_cluster(&c->scc[edge->src->scc]) ||
|
|
bad_cluster(&c->scc[edge->dst->scc]))
|
|
continue;
|
|
dist = c->scc_cluster[edge->dst->scc] -
|
|
c->scc_cluster[edge->src->scc];
|
|
if (dist == 0)
|
|
continue;
|
|
weight = edge->weight;
|
|
if (best < graph->n_edge) {
|
|
if (best_weight > weight)
|
|
continue;
|
|
if (best_weight == weight && best_dist <= dist)
|
|
continue;
|
|
}
|
|
best = i;
|
|
best_dist = dist;
|
|
best_weight = weight;
|
|
}
|
|
|
|
return best;
|
|
}
|
|
|
|
/* Internal data structure used in mark_merge_sccs.
|
|
*
|
|
* "graph" is the dependence graph in which a strongly connected
|
|
* component is constructed.
|
|
* "scc_cluster" maps each SCC index to the cluster to which it belongs.
|
|
* "src" and "dst" are the indices of the nodes that are being merged.
|
|
*/
|
|
struct isl_mark_merge_sccs_data {
|
|
struct isl_sched_graph *graph;
|
|
int *scc_cluster;
|
|
int src;
|
|
int dst;
|
|
};
|
|
|
|
/* Check whether the cluster containing node "i" depends on the cluster
|
|
* containing node "j". If "i" and "j" belong to the same cluster,
|
|
* then they are taken to depend on each other to ensure that
|
|
* the resulting strongly connected component consists of complete
|
|
* clusters. Furthermore, if "i" and "j" are the two nodes that
|
|
* are being merged, then they are taken to depend on each other as well.
|
|
* Otherwise, check if there is a (conditional) validity dependence
|
|
* from node[j] to node[i], forcing node[i] to follow node[j].
|
|
*/
|
|
static isl_bool cluster_follows(int i, int j, void *user)
|
|
{
|
|
struct isl_mark_merge_sccs_data *data = user;
|
|
struct isl_sched_graph *graph = data->graph;
|
|
int *scc_cluster = data->scc_cluster;
|
|
|
|
if (data->src == i && data->dst == j)
|
|
return isl_bool_true;
|
|
if (data->src == j && data->dst == i)
|
|
return isl_bool_true;
|
|
if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
|
|
return isl_bool_true;
|
|
|
|
return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
|
|
&graph->node[i]);
|
|
}
|
|
|
|
/* Mark all SCCs that belong to either of the two clusters in "c"
|
|
* connected by the edge in "graph" with index "edge", or to any
|
|
* of the intermediate clusters.
|
|
* The marking is recorded in c->scc_in_merge.
|
|
*
|
|
* The given edge has been selected for merging two clusters,
|
|
* meaning that there is at least a proximity edge between the two nodes.
|
|
* However, there may also be (indirect) validity dependences
|
|
* between the two nodes. When merging the two clusters, all clusters
|
|
* containing one or more of the intermediate nodes along the
|
|
* indirect validity dependences need to be merged in as well.
|
|
*
|
|
* First collect all such nodes by computing the strongly connected
|
|
* component (SCC) containing the two nodes connected by the edge, where
|
|
* the two nodes are considered to depend on each other to make
|
|
* sure they end up in the same SCC. Similarly, each node is considered
|
|
* to depend on every other node in the same cluster to ensure
|
|
* that the SCC consists of complete clusters.
|
|
*
|
|
* Then the original SCCs that contain any of these nodes are marked
|
|
* in c->scc_in_merge.
|
|
*/
|
|
static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
|
|
int edge, struct isl_clustering *c)
|
|
{
|
|
struct isl_mark_merge_sccs_data data;
|
|
struct isl_tarjan_graph *g;
|
|
int i;
|
|
|
|
for (i = 0; i < c->n; ++i)
|
|
c->scc_in_merge[i] = 0;
|
|
|
|
data.graph = graph;
|
|
data.scc_cluster = c->scc_cluster;
|
|
data.src = graph->edge[edge].src - graph->node;
|
|
data.dst = graph->edge[edge].dst - graph->node;
|
|
|
|
g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
|
|
&cluster_follows, &data);
|
|
if (!g)
|
|
goto error;
|
|
|
|
i = g->op;
|
|
if (i < 3)
|
|
isl_die(ctx, isl_error_internal,
|
|
"expecting at least two nodes in component",
|
|
goto error);
|
|
if (g->order[--i] != -1)
|
|
isl_die(ctx, isl_error_internal,
|
|
"expecting end of component marker", goto error);
|
|
|
|
for (--i; i >= 0 && g->order[i] != -1; --i) {
|
|
int scc = graph->node[g->order[i]].scc;
|
|
c->scc_in_merge[scc] = 1;
|
|
}
|
|
|
|
isl_tarjan_graph_free(g);
|
|
return isl_stat_ok;
|
|
error:
|
|
isl_tarjan_graph_free(g);
|
|
return isl_stat_error;
|
|
}
|
|
|
|
/* Construct the identifier "cluster_i".
|
|
*/
|
|
static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
|
|
{
|
|
char name[40];
|
|
|
|
snprintf(name, sizeof(name), "cluster_%d", i);
|
|
return isl_id_alloc(ctx, name, NULL);
|
|
}
|
|
|
|
/* Construct the space of the cluster with index "i" containing
|
|
* the strongly connected component "scc".
|
|
*
|
|
* In particular, construct a space called cluster_i with dimension equal
|
|
* to the number of schedule rows in the current band of "scc".
|
|
*/
|
|
static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
|
|
{
|
|
int nvar;
|
|
isl_space *space;
|
|
isl_id *id;
|
|
|
|
nvar = scc->n_total_row - scc->band_start;
|
|
space = isl_space_copy(scc->node[0].space);
|
|
space = isl_space_params(space);
|
|
space = isl_space_set_from_params(space);
|
|
space = isl_space_add_dims(space, isl_dim_set, nvar);
|
|
id = cluster_id(isl_space_get_ctx(space), i);
|
|
space = isl_space_set_tuple_id(space, isl_dim_set, id);
|
|
|
|
return space;
|
|
}
|
|
|
|
/* Collect the domain of the graph for merging clusters.
|
|
*
|
|
* In particular, for each cluster with first SCC "i", construct
|
|
* a set in the space called cluster_i with dimension equal
|
|
* to the number of schedule rows in the current band of the cluster.
|
|
*/
|
|
static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
|
|
struct isl_sched_graph *graph, struct isl_clustering *c)
|
|
{
|
|
int i;
|
|
isl_space *space;
|
|
isl_union_set *domain;
|
|
|
|
space = isl_space_params_alloc(ctx, 0);
|
|
domain = isl_union_set_empty(space);
|
|
|
|
for (i = 0; i < graph->scc; ++i) {
|
|
isl_space *space;
|
|
|
|
if (!c->scc_in_merge[i])
|
|
continue;
|
|
if (c->scc_cluster[i] != i)
|
|
continue;
|
|
space = cluster_space(&c->scc[i], i);
|
|
domain = isl_union_set_add_set(domain, isl_set_universe(space));
|
|
}
|
|
|
|
return domain;
|
|
}
|
|
|
|
/* Construct a map from the original instances to the corresponding
|
|
* cluster instance in the current bands of the clusters in "c".
|
|
*/
|
|
static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
|
|
struct isl_sched_graph *graph, struct isl_clustering *c)
|
|
{
|
|
int i, j;
|
|
isl_space *space;
|
|
isl_union_map *cluster_map;
|
|
|
|
space = isl_space_params_alloc(ctx, 0);
|
|
cluster_map = isl_union_map_empty(space);
|
|
for (i = 0; i < graph->scc; ++i) {
|
|
int start, n;
|
|
isl_id *id;
|
|
|
|
if (!c->scc_in_merge[i])
|
|
continue;
|
|
|
|
id = cluster_id(ctx, c->scc_cluster[i]);
|
|
start = c->scc[i].band_start;
|
|
n = c->scc[i].n_total_row - start;
|
|
for (j = 0; j < c->scc[i].n; ++j) {
|
|
isl_multi_aff *ma;
|
|
isl_map *map;
|
|
struct isl_sched_node *node = &c->scc[i].node[j];
|
|
|
|
ma = isl_sched_node_extract_partial_schedule_multi_aff(
|
|
node, start, n);
|
|
ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
|
|
isl_id_copy(id));
|
|
map = isl_map_from_multi_aff(ma);
|
|
cluster_map = isl_union_map_add_map(cluster_map, map);
|
|
}
|
|
isl_id_free(id);
|
|
}
|
|
|
|
return cluster_map;
|
|
}
|
|
|
|
/* Add "umap" to the schedule constraints "sc" of all types of "edge"
|
|
* that are not isl_edge_condition or isl_edge_conditional_validity.
|
|
*/
|
|
static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
|
|
struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
|
|
__isl_take isl_schedule_constraints *sc)
|
|
{
|
|
enum isl_edge_type t;
|
|
|
|
if (!sc)
|
|
return NULL;
|
|
|
|
for (t = isl_edge_first; t <= isl_edge_last; ++t) {
|
|
if (t == isl_edge_condition ||
|
|
t == isl_edge_conditional_validity)
|
|
continue;
|
|
if (!isl_sched_edge_has_type(edge, t))
|
|
continue;
|
|
sc = isl_schedule_constraints_add(sc, t,
|
|
isl_union_map_copy(umap));
|
|
}
|
|
|
|
return sc;
|
|
}
|
|
|
|
/* Add schedule constraints of types isl_edge_condition and
|
|
* isl_edge_conditional_validity to "sc" by applying "umap" to
|
|
* the domains of the wrapped relations in domain and range
|
|
* of the corresponding tagged constraints of "edge".
|
|
*/
|
|
static __isl_give isl_schedule_constraints *add_conditional_constraints(
|
|
struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
|
|
__isl_take isl_schedule_constraints *sc)
|
|
{
|
|
enum isl_edge_type t;
|
|
isl_union_map *tagged;
|
|
|
|
for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
|
|
if (!isl_sched_edge_has_type(edge, t))
|
|
continue;
|
|
if (t == isl_edge_condition)
|
|
tagged = isl_union_map_copy(edge->tagged_condition);
|
|
else
|
|
tagged = isl_union_map_copy(edge->tagged_validity);
|
|
tagged = isl_union_map_zip(tagged);
|
|
tagged = isl_union_map_apply_domain(tagged,
|
|
isl_union_map_copy(umap));
|
|
tagged = isl_union_map_zip(tagged);
|
|
sc = isl_schedule_constraints_add(sc, t, tagged);
|
|
if (!sc)
|
|
return NULL;
|
|
}
|
|
|
|
return sc;
|
|
}
|
|
|
|
/* Given a mapping "cluster_map" from the original instances to
|
|
* the cluster instances, add schedule constraints on the clusters
|
|
* to "sc" corresponding to the original constraints represented by "edge".
|
|
*
|
|
* For non-tagged dependence constraints, the cluster constraints
|
|
* are obtained by applying "cluster_map" to the edge->map.
|
|
*
|
|
* For tagged dependence constraints, "cluster_map" needs to be applied
|
|
* to the domains of the wrapped relations in domain and range
|
|
* of the tagged dependence constraints. Pick out the mappings
|
|
* from these domains from "cluster_map" and construct their product.
|
|
* This mapping can then be applied to the pair of domains.
|
|
*/
|
|
static __isl_give isl_schedule_constraints *collect_edge_constraints(
|
|
struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map,
|
|
__isl_take isl_schedule_constraints *sc)
|
|
{
|
|
isl_union_map *umap;
|
|
isl_space *space;
|
|
isl_union_set *uset;
|
|
isl_union_map *umap1, *umap2;
|
|
|
|
if (!sc)
|
|
return NULL;
|
|
|
|
umap = isl_union_map_from_map(isl_map_copy(edge->map));
|
|
umap = isl_union_map_apply_domain(umap,
|
|
isl_union_map_copy(cluster_map));
|
|
umap = isl_union_map_apply_range(umap,
|
|
isl_union_map_copy(cluster_map));
|
|
sc = add_non_conditional_constraints(edge, umap, sc);
|
|
isl_union_map_free(umap);
|
|
|
|
if (!sc ||
|
|
(!isl_sched_edge_is_condition(edge) &&
|
|
!isl_sched_edge_is_conditional_validity(edge)))
|
|
return sc;
|
|
|
|
space = isl_space_domain(isl_map_get_space(edge->map));
|
|
uset = isl_union_set_from_set(isl_set_universe(space));
|
|
umap1 = isl_union_map_copy(cluster_map);
|
|
umap1 = isl_union_map_intersect_domain(umap1, uset);
|
|
space = isl_space_range(isl_map_get_space(edge->map));
|
|
uset = isl_union_set_from_set(isl_set_universe(space));
|
|
umap2 = isl_union_map_copy(cluster_map);
|
|
umap2 = isl_union_map_intersect_domain(umap2, uset);
|
|
umap = isl_union_map_product(umap1, umap2);
|
|
|
|
sc = add_conditional_constraints(edge, umap, sc);
|
|
|
|
isl_union_map_free(umap);
|
|
return sc;
|
|
}
|
|
|
|
/* Given a mapping "cluster_map" from the original instances to
|
|
* the cluster instances, add schedule constraints on the clusters
|
|
* to "sc" corresponding to all edges in "graph" between nodes that
|
|
* belong to SCCs that are marked for merging in "scc_in_merge".
|
|
*/
|
|
static __isl_give isl_schedule_constraints *collect_constraints(
|
|
struct isl_sched_graph *graph, int *scc_in_merge,
|
|
__isl_keep isl_union_map *cluster_map,
|
|
__isl_take isl_schedule_constraints *sc)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < graph->n_edge; ++i) {
|
|
struct isl_sched_edge *edge = &graph->edge[i];
|
|
|
|
if (!scc_in_merge[edge->src->scc])
|
|
continue;
|
|
if (!scc_in_merge[edge->dst->scc])
|
|
continue;
|
|
sc = collect_edge_constraints(edge, cluster_map, sc);
|
|
}
|
|
|
|
return sc;
|
|
}
|
|
|
|
/* Construct a dependence graph for scheduling clusters with respect
|
|
* to each other and store the result in "merge_graph".
|
|
* In particular, the nodes of the graph correspond to the schedule
|
|
* dimensions of the current bands of those clusters that have been
|
|
* marked for merging in "c".
|
|
*
|
|
* First construct an isl_schedule_constraints object for this domain
|
|
* by transforming the edges in "graph" to the domain.
|
|
* Then initialize a dependence graph for scheduling from these
|
|
* constraints.
|
|
*/
|
|
static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
|
|
struct isl_clustering *c, struct isl_sched_graph *merge_graph)
|
|
{
|
|
isl_union_set *domain;
|
|
isl_union_map *cluster_map;
|
|
isl_schedule_constraints *sc;
|
|
isl_stat r;
|
|
|
|
domain = collect_domain(ctx, graph, c);
|
|
sc = isl_schedule_constraints_on_domain(domain);
|
|
if (!sc)
|
|
return isl_stat_error;
|
|
cluster_map = collect_cluster_map(ctx, graph, c);
|
|
sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc);
|
|
isl_union_map_free(cluster_map);
|
|
|
|
r = isl_sched_graph_init(merge_graph, sc);
|
|
|
|
isl_schedule_constraints_free(sc);
|
|
|
|
return r;
|
|
}
|
|
|
|
/* Compute the maximal number of remaining schedule rows that still need
|
|
* to be computed for the nodes that belong to clusters with the maximal
|
|
* dimension for the current band (i.e., the band that is to be merged).
|
|
* Only clusters that are about to be merged are considered.
|
|
* "maxvar" is the maximal dimension for the current band.
|
|
* "c" contains information about the clusters.
|
|
*
|
|
* Return the maximal number of remaining schedule rows or
|
|
* isl_size_error on error.
|
|
*/
|
|
static isl_size compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
|
|
{
|
|
int i, j;
|
|
int max_slack;
|
|
|
|
max_slack = 0;
|
|
for (i = 0; i < c->n; ++i) {
|
|
int nvar;
|
|
struct isl_sched_graph *scc;
|
|
|
|
if (!c->scc_in_merge[i])
|
|
continue;
|
|
scc = &c->scc[i];
|
|
nvar = scc->n_total_row - scc->band_start;
|
|
if (nvar != maxvar)
|
|
continue;
|
|
for (j = 0; j < scc->n; ++j) {
|
|
struct isl_sched_node *node = &scc->node[j];
|
|
int slack;
|
|
|
|
if (isl_sched_node_update_vmap(node) < 0)
|
|
return isl_size_error;
|
|
slack = node->nvar - node->rank;
|
|
if (slack > max_slack)
|
|
max_slack = slack;
|
|
}
|
|
}
|
|
|
|
return max_slack;
|
|
}
|
|
|
|
/* If there are any clusters where the dimension of the current band
|
|
* (i.e., the band that is to be merged) is smaller than "maxvar" and
|
|
* if there are any nodes in such a cluster where the number
|
|
* of remaining schedule rows that still need to be computed
|
|
* is greater than "max_slack", then return the smallest current band
|
|
* dimension of all these clusters. Otherwise return the original value
|
|
* of "maxvar". Return isl_size_error in case of any error.
|
|
* Only clusters that are about to be merged are considered.
|
|
* "c" contains information about the clusters.
|
|
*/
|
|
static isl_size limit_maxvar_to_slack(int maxvar, int max_slack,
|
|
struct isl_clustering *c)
|
|
{
|
|
int i, j;
|
|
|
|
for (i = 0; i < c->n; ++i) {
|
|
int nvar;
|
|
struct isl_sched_graph *scc;
|
|
|
|
if (!c->scc_in_merge[i])
|
|
continue;
|
|
scc = &c->scc[i];
|
|
nvar = scc->n_total_row - scc->band_start;
|
|
if (nvar >= maxvar)
|
|
continue;
|
|
for (j = 0; j < scc->n; ++j) {
|
|
struct isl_sched_node *node = &scc->node[j];
|
|
int slack;
|
|
|
|
if (isl_sched_node_update_vmap(node) < 0)
|
|
return isl_size_error;
|
|
slack = node->nvar - node->rank;
|
|
if (slack > max_slack) {
|
|
maxvar = nvar;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return maxvar;
|
|
}
|
|
|
|
/* Adjust merge_graph->maxvar based on the number of remaining schedule rows
|
|
* that still need to be computed. In particular, if there is a node
|
|
* in a cluster where the dimension of the current band is smaller
|
|
* than merge_graph->maxvar, but the number of remaining schedule rows
|
|
* is greater than that of any node in a cluster with the maximal
|
|
* dimension for the current band (i.e., merge_graph->maxvar),
|
|
* then adjust merge_graph->maxvar to the (smallest) current band dimension
|
|
* of those clusters. Without this adjustment, the total number of
|
|
* schedule dimensions would be increased, resulting in a skewed view
|
|
* of the number of coincident dimensions.
|
|
* "c" contains information about the clusters.
|
|
*
|
|
* If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
|
|
* then there is no point in attempting any merge since it will be rejected
|
|
* anyway. Set merge_graph->maxvar to zero in such cases.
|
|
*/
|
|
static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
|
|
struct isl_sched_graph *merge_graph, struct isl_clustering *c)
|
|
{
|
|
isl_size max_slack, maxvar;
|
|
|
|
max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c);
|
|
if (max_slack < 0)
|
|
return isl_stat_error;
|
|
maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c);
|
|
if (maxvar < 0)
|
|
return isl_stat_error;
|
|
|
|
if (maxvar < merge_graph->maxvar) {
|
|
if (isl_options_get_schedule_maximize_band_depth(ctx))
|
|
merge_graph->maxvar = 0;
|
|
else
|
|
merge_graph->maxvar = maxvar;
|
|
}
|
|
|
|
return isl_stat_ok;
|
|
}
|
|
|
|
/* Return the number of coincident dimensions in the current band of "graph",
|
|
* where the nodes of "graph" are assumed to be scheduled by a single band.
|
|
*/
|
|
static int get_n_coincident(struct isl_sched_graph *graph)
|
|
{
|
|
int i;
|
|
|
|
for (i = graph->band_start; i < graph->n_total_row; ++i)
|
|
if (!graph->node[0].coincident[i])
|
|
break;
|
|
|
|
return i - graph->band_start;
|
|
}
|
|
|
|
/* Should the clusters be merged based on the cluster schedule
|
|
* in the current (and only) band of "merge_graph", given that
|
|
* coincidence should be maximized?
|
|
*
|
|
* If the number of coincident schedule dimensions in the merged band
|
|
* would be less than the maximal number of coincident schedule dimensions
|
|
* in any of the merged clusters, then the clusters should not be merged.
|
|
*/
|
|
static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
|
|
struct isl_sched_graph *merge_graph)
|
|
{
|
|
int i;
|
|
int n_coincident;
|
|
int max_coincident;
|
|
|
|
max_coincident = 0;
|
|
for (i = 0; i < c->n; ++i) {
|
|
if (!c->scc_in_merge[i])
|
|
continue;
|
|
n_coincident = get_n_coincident(&c->scc[i]);
|
|
if (n_coincident > max_coincident)
|
|
max_coincident = n_coincident;
|
|
}
|
|
|
|
n_coincident = get_n_coincident(merge_graph);
|
|
|
|
return isl_bool_ok(n_coincident >= max_coincident);
|
|
}
|
|
|
|
/* Return the transformation on "node" expressed by the current (and only)
|
|
* band of "merge_graph" applied to the clusters in "c".
|
|
*
|
|
* First find the representation of "node" in its SCC in "c" and
|
|
* extract the transformation expressed by the current band.
|
|
* Then extract the transformation applied by "merge_graph"
|
|
* to the cluster to which this SCC belongs.
|
|
* Combine the two to obtain the complete transformation on the node.
|
|
*
|
|
* Note that the range of the first transformation is an anonymous space,
|
|
* while the domain of the second is named "cluster_X". The range
|
|
* of the former therefore needs to be adjusted before the two
|
|
* can be combined.
|
|
*/
|
|
static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx,
|
|
struct isl_sched_node *node, struct isl_clustering *c,
|
|
struct isl_sched_graph *merge_graph)
|
|
{
|
|
struct isl_sched_node *scc_node, *cluster_node;
|
|
int start, n;
|
|
isl_id *id;
|
|
isl_space *space;
|
|
isl_multi_aff *ma, *ma2;
|
|
|
|
scc_node = isl_sched_graph_find_node(ctx, &c->scc[node->scc],
|
|
node->space);
|
|
if (scc_node && !isl_sched_graph_is_node(&c->scc[node->scc], scc_node))
|
|
isl_die(ctx, isl_error_internal, "unable to find node",
|
|
return NULL);
|
|
start = c->scc[node->scc].band_start;
|
|
n = c->scc[node->scc].n_total_row - start;
|
|
ma = isl_sched_node_extract_partial_schedule_multi_aff(scc_node,
|
|
start, n);
|
|
space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]);
|
|
cluster_node = isl_sched_graph_find_node(ctx, merge_graph, space);
|
|
if (cluster_node && !isl_sched_graph_is_node(merge_graph, cluster_node))
|
|
isl_die(ctx, isl_error_internal, "unable to find cluster",
|
|
space = isl_space_free(space));
|
|
id = isl_space_get_tuple_id(space, isl_dim_set);
|
|
ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id);
|
|
isl_space_free(space);
|
|
n = merge_graph->n_total_row;
|
|
ma2 = isl_sched_node_extract_partial_schedule_multi_aff(cluster_node,
|
|
0, n);
|
|
ma = isl_multi_aff_pullback_multi_aff(ma2, ma);
|
|
|
|
return isl_map_from_multi_aff(ma);
|
|
}
|
|
|
|
/* Give a set of distances "set", are they bounded by a small constant
|
|
* in direction "pos"?
|
|
* In practice, check if they are bounded by 2 by checking that there
|
|
* are no elements with a value greater than or equal to 3 or
|
|
* smaller than or equal to -3.
|
|
*/
|
|
static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos)
|
|
{
|
|
isl_bool bounded;
|
|
isl_set *test;
|
|
|
|
if (!set)
|
|
return isl_bool_error;
|
|
|
|
test = isl_set_copy(set);
|
|
test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3);
|
|
bounded = isl_set_is_empty(test);
|
|
isl_set_free(test);
|
|
|
|
if (bounded < 0 || !bounded)
|
|
return bounded;
|
|
|
|
test = isl_set_copy(set);
|
|
test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3);
|
|
bounded = isl_set_is_empty(test);
|
|
isl_set_free(test);
|
|
|
|
return bounded;
|
|
}
|
|
|
|
/* Does the set "set" have a fixed (but possible parametric) value
|
|
* at dimension "pos"?
|
|
*/
|
|
static isl_bool has_single_value(__isl_keep isl_set *set, int pos)
|
|
{
|
|
isl_size n;
|
|
isl_bool single;
|
|
|
|
n = isl_set_dim(set, isl_dim_set);
|
|
if (n < 0)
|
|
return isl_bool_error;
|
|
set = isl_set_copy(set);
|
|
set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1));
|
|
set = isl_set_project_out(set, isl_dim_set, 0, pos);
|
|
single = isl_set_is_singleton(set);
|
|
isl_set_free(set);
|
|
|
|
return single;
|
|
}
|
|
|
|
/* Does "map" have a fixed (but possible parametric) value
|
|
* at dimension "pos" of either its domain or its range?
|
|
*/
|
|
static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos)
|
|
{
|
|
isl_set *set;
|
|
isl_bool single;
|
|
|
|
set = isl_map_domain(isl_map_copy(map));
|
|
single = has_single_value(set, pos);
|
|
isl_set_free(set);
|
|
|
|
if (single < 0 || single)
|
|
return single;
|
|
|
|
set = isl_map_range(isl_map_copy(map));
|
|
single = has_single_value(set, pos);
|
|
isl_set_free(set);
|
|
|
|
return single;
|
|
}
|
|
|
|
/* Does the edge "edge" from "graph" have bounded dependence distances
|
|
* in the merged graph "merge_graph" of a selection of clusters in "c"?
|
|
*
|
|
* Extract the complete transformations of the source and destination
|
|
* nodes of the edge, apply them to the edge constraints and
|
|
* compute the differences. Finally, check if these differences are bounded
|
|
* in each direction.
|
|
*
|
|
* If the dimension of the band is greater than the number of
|
|
* dimensions that can be expected to be optimized by the edge
|
|
* (based on its weight), then also allow the differences to be unbounded
|
|
* in the remaining dimensions, but only if either the source or
|
|
* the destination has a fixed value in that direction.
|
|
* This allows a statement that produces values that are used by
|
|
* several instances of another statement to be merged with that
|
|
* other statement.
|
|
* However, merging such clusters will introduce an inherently
|
|
* large proximity distance inside the merged cluster, meaning
|
|
* that proximity distances will no longer be optimized in
|
|
* subsequent merges. These merges are therefore only allowed
|
|
* after all other possible merges have been tried.
|
|
* The first time such a merge is encountered, the weight of the edge
|
|
* is replaced by a negative weight. The second time (i.e., after
|
|
* all merges over edges with a non-negative weight have been tried),
|
|
* the merge is allowed.
|
|
*/
|
|
static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge,
|
|
struct isl_sched_graph *graph, struct isl_clustering *c,
|
|
struct isl_sched_graph *merge_graph)
|
|
{
|
|
int i, n_slack;
|
|
isl_size n;
|
|
isl_bool bounded;
|
|
isl_map *map, *t;
|
|
isl_set *dist;
|
|
|
|
map = isl_map_copy(edge->map);
|
|
t = extract_node_transformation(ctx, edge->src, c, merge_graph);
|
|
map = isl_map_apply_domain(map, t);
|
|
t = extract_node_transformation(ctx, edge->dst, c, merge_graph);
|
|
map = isl_map_apply_range(map, t);
|
|
dist = isl_map_deltas(isl_map_copy(map));
|
|
|
|
bounded = isl_bool_true;
|
|
n = isl_set_dim(dist, isl_dim_set);
|
|
if (n < 0)
|
|
goto error;
|
|
n_slack = n - edge->weight;
|
|
if (edge->weight < 0)
|
|
n_slack -= graph->max_weight + 1;
|
|
for (i = 0; i < n; ++i) {
|
|
isl_bool bounded_i, singular_i;
|
|
|
|
bounded_i = distance_is_bounded(dist, i);
|
|
if (bounded_i < 0)
|
|
goto error;
|
|
if (bounded_i)
|
|
continue;
|
|
if (edge->weight >= 0)
|
|
bounded = isl_bool_false;
|
|
n_slack--;
|
|
if (n_slack < 0)
|
|
break;
|
|
singular_i = has_singular_src_or_dst(map, i);
|
|
if (singular_i < 0)
|
|
goto error;
|
|
if (singular_i)
|
|
continue;
|
|
bounded = isl_bool_false;
|
|
break;
|
|
}
|
|
if (!bounded && i >= n && edge->weight >= 0)
|
|
edge->weight -= graph->max_weight + 1;
|
|
isl_map_free(map);
|
|
isl_set_free(dist);
|
|
|
|
return bounded;
|
|
error:
|
|
isl_map_free(map);
|
|
isl_set_free(dist);
|
|
return isl_bool_error;
|
|
}
|
|
|
|
/* Should the clusters be merged based on the cluster schedule
|
|
* in the current (and only) band of "merge_graph"?
|
|
* "graph" is the original dependence graph, while "c" records
|
|
* which SCCs are involved in the latest merge.
|
|
*
|
|
* In particular, is there at least one proximity constraint
|
|
* that is optimized by the merge?
|
|
*
|
|
* A proximity constraint is considered to be optimized
|
|
* if the dependence distances are small.
|
|
*/
|
|
static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
|
|
struct isl_sched_graph *graph, struct isl_clustering *c,
|
|
struct isl_sched_graph *merge_graph)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < graph->n_edge; ++i) {
|
|
struct isl_sched_edge *edge = &graph->edge[i];
|
|
isl_bool bounded;
|
|
|
|
if (!isl_sched_edge_is_proximity(edge))
|
|
continue;
|
|
if (!c->scc_in_merge[edge->src->scc])
|
|
continue;
|
|
if (!c->scc_in_merge[edge->dst->scc])
|
|
continue;
|
|
if (c->scc_cluster[edge->dst->scc] ==
|
|
c->scc_cluster[edge->src->scc])
|
|
continue;
|
|
bounded = has_bounded_distances(ctx, edge, graph, c,
|
|
merge_graph);
|
|
if (bounded < 0 || bounded)
|
|
return bounded;
|
|
}
|
|
|
|
return isl_bool_false;
|
|
}
|
|
|
|
/* Should the clusters be merged based on the cluster schedule
|
|
* in the current (and only) band of "merge_graph"?
|
|
* "graph" is the original dependence graph, while "c" records
|
|
* which SCCs are involved in the latest merge.
|
|
*
|
|
* If the current band is empty, then the clusters should not be merged.
|
|
*
|
|
* If the band depth should be maximized and the merge schedule
|
|
* is incomplete (meaning that the dimension of some of the schedule
|
|
* bands in the original schedule will be reduced), then the clusters
|
|
* should not be merged.
|
|
*
|
|
* If the schedule_maximize_coincidence option is set, then check that
|
|
* the number of coincident schedule dimensions is not reduced.
|
|
*
|
|
* Finally, only allow the merge if at least one proximity
|
|
* constraint is optimized.
|
|
*/
|
|
static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
|
|
struct isl_clustering *c, struct isl_sched_graph *merge_graph)
|
|
{
|
|
if (merge_graph->n_total_row == merge_graph->band_start)
|
|
return isl_bool_false;
|
|
|
|
if (isl_options_get_schedule_maximize_band_depth(ctx) &&
|
|
merge_graph->n_total_row < merge_graph->maxvar)
|
|
return isl_bool_false;
|
|
|
|
if (isl_options_get_schedule_maximize_coincidence(ctx)) {
|
|
isl_bool ok;
|
|
|
|
ok = ok_to_merge_coincident(c, merge_graph);
|
|
if (ok < 0 || !ok)
|
|
return ok;
|
|
}
|
|
|
|
return ok_to_merge_proximity(ctx, graph, c, merge_graph);
|
|
}
|
|
|
|
/* Apply the schedule in "t_node" to the "n" rows starting at "first"
|
|
* of the schedule in "node" and return the result.
|
|
*
|
|
* That is, essentially compute
|
|
*
|
|
* T * N(first:first+n-1)
|
|
*
|
|
* taking into account the constant term and the parameter coefficients
|
|
* in "t_node".
|
|
*/
|
|
static __isl_give isl_mat *node_transformation(isl_ctx *ctx,
|
|
struct isl_sched_node *t_node, struct isl_sched_node *node,
|
|
int first, int n)
|
|
{
|
|
int i, j;
|
|
isl_mat *t;
|
|
isl_size n_row, n_col;
|
|
int n_param, n_var;
|
|
|
|
n_param = node->nparam;
|
|
n_var = node->nvar;
|
|
n_row = isl_mat_rows(t_node->sched);
|
|
n_col = isl_mat_cols(node->sched);
|
|
if (n_row < 0 || n_col < 0)
|
|
return NULL;
|
|
t = isl_mat_alloc(ctx, n_row, n_col);
|
|
if (!t)
|
|
return NULL;
|
|
for (i = 0; i < n_row; ++i) {
|
|
isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param);
|
|
isl_seq_clr(t->row[i] + 1 + n_param, n_var);
|
|
for (j = 0; j < n; ++j)
|
|
isl_seq_addmul(t->row[i],
|
|
t_node->sched->row[i][1 + n_param + j],
|
|
node->sched->row[first + j],
|
|
1 + n_param + n_var);
|
|
}
|
|
return t;
|
|
}
|
|
|
|
/* Apply the cluster schedule in "t_node" to the current band
|
|
* schedule of the nodes in "graph".
|
|
*
|
|
* In particular, replace the rows starting at band_start
|
|
* by the result of applying the cluster schedule in "t_node"
|
|
* to the original rows.
|
|
*
|
|
* The coincidence of the schedule is determined by the coincidence
|
|
* of the cluster schedule.
|
|
*/
|
|
static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph,
|
|
struct isl_sched_node *t_node)
|
|
{
|
|
int i, j;
|
|
isl_size n_new;
|
|
int start, n;
|
|
|
|
start = graph->band_start;
|
|
n = graph->n_total_row - start;
|
|
|
|
n_new = isl_mat_rows(t_node->sched);
|
|
if (n_new < 0)
|
|
return isl_stat_error;
|
|
for (i = 0; i < graph->n; ++i) {
|
|
struct isl_sched_node *node = &graph->node[i];
|
|
isl_mat *t;
|
|
|
|
t = node_transformation(ctx, t_node, node, start, n);
|
|
node->sched = isl_mat_drop_rows(node->sched, start, n);
|
|
node->sched = isl_mat_concat(node->sched, t);
|
|
node->sched_map = isl_map_free(node->sched_map);
|
|
if (!node->sched)
|
|
return isl_stat_error;
|
|
for (j = 0; j < n_new; ++j)
|
|
node->coincident[start + j] = t_node->coincident[j];
|
|
}
|
|
graph->n_total_row -= n;
|
|
graph->n_row -= n;
|
|
graph->n_total_row += n_new;
|
|
graph->n_row += n_new;
|
|
|
|
return isl_stat_ok;
|
|
}
|
|
|
|
/* Merge the clusters marked for merging in "c" into a single
|
|
* cluster using the cluster schedule in the current band of "merge_graph".
|
|
* The representative SCC for the new cluster is the SCC with
|
|
* the smallest index.
|
|
*
|
|
* The current band schedule of each SCC in the new cluster is obtained
|
|
* by applying the schedule of the corresponding original cluster
|
|
* to the original band schedule.
|
|
* All SCCs in the new cluster have the same number of schedule rows.
|
|
*/
|
|
static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c,
|
|
struct isl_sched_graph *merge_graph)
|
|
{
|
|
int i;
|
|
int cluster = -1;
|
|
isl_space *space;
|
|
|
|
for (i = 0; i < c->n; ++i) {
|
|
struct isl_sched_node *node;
|
|
|
|
if (!c->scc_in_merge[i])
|
|
continue;
|
|
if (cluster < 0)
|
|
cluster = i;
|
|
space = cluster_space(&c->scc[i], c->scc_cluster[i]);
|
|
node = isl_sched_graph_find_node(ctx, merge_graph, space);
|
|
isl_space_free(space);
|
|
if (!node)
|
|
return isl_stat_error;
|
|
if (!isl_sched_graph_is_node(merge_graph, node))
|
|
isl_die(ctx, isl_error_internal,
|
|
"unable to find cluster",
|
|
return isl_stat_error);
|
|
if (transform(ctx, &c->scc[i], node) < 0)
|
|
return isl_stat_error;
|
|
c->scc_cluster[i] = cluster;
|
|
}
|
|
|
|
return isl_stat_ok;
|
|
}
|
|
|
|
/* Try and merge the clusters of SCCs marked in c->scc_in_merge
|
|
* by scheduling the current cluster bands with respect to each other.
|
|
*
|
|
* Construct a dependence graph with a space for each cluster and
|
|
* with the coordinates of each space corresponding to the schedule
|
|
* dimensions of the current band of that cluster.
|
|
* Construct a cluster schedule in this cluster dependence graph and
|
|
* apply it to the current cluster bands if it is applicable
|
|
* according to ok_to_merge.
|
|
*
|
|
* If the number of remaining schedule dimensions in a cluster
|
|
* with a non-maximal current schedule dimension is greater than
|
|
* the number of remaining schedule dimensions in clusters
|
|
* with a maximal current schedule dimension, then restrict
|
|
* the number of rows to be computed in the cluster schedule
|
|
* to the minimal such non-maximal current schedule dimension.
|
|
* Do this by adjusting merge_graph.maxvar.
|
|
*
|
|
* Return isl_bool_true if the clusters have effectively been merged
|
|
* into a single cluster.
|
|
*
|
|
* Note that since the standard scheduling algorithm minimizes the maximal
|
|
* distance over proximity constraints, the proximity constraints between
|
|
* the merged clusters may not be optimized any further than what is
|
|
* sufficient to bring the distances within the limits of the internal
|
|
* proximity constraints inside the individual clusters.
|
|
* It may therefore make sense to perform an additional translation step
|
|
* to bring the clusters closer to each other, while maintaining
|
|
* the linear part of the merging schedule found using the standard
|
|
* scheduling algorithm.
|
|
*/
|
|
static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
|
|
struct isl_clustering *c)
|
|
{
|
|
struct isl_sched_graph merge_graph = { 0 };
|
|
isl_bool merged;
|
|
|
|
if (init_merge_graph(ctx, graph, c, &merge_graph) < 0)
|
|
goto error;
|
|
|
|
if (isl_sched_graph_compute_maxvar(&merge_graph) < 0)
|
|
goto error;
|
|
if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0)
|
|
goto error;
|
|
if (isl_schedule_node_compute_wcc_band(ctx, &merge_graph) < 0)
|
|
goto error;
|
|
merged = ok_to_merge(ctx, graph, c, &merge_graph);
|
|
if (merged && merge(ctx, c, &merge_graph) < 0)
|
|
goto error;
|
|
|
|
isl_sched_graph_free(ctx, &merge_graph);
|
|
return merged;
|
|
error:
|
|
isl_sched_graph_free(ctx, &merge_graph);
|
|
return isl_bool_error;
|
|
}
|
|
|
|
/* Is there any edge marked "no_merge" between two SCCs that are
|
|
* about to be merged (i.e., that are set in "scc_in_merge")?
|
|
* "merge_edge" is the proximity edge along which the clusters of SCCs
|
|
* are going to be merged.
|
|
*
|
|
* If there is any edge between two SCCs with a negative weight,
|
|
* while the weight of "merge_edge" is non-negative, then this
|
|
* means that the edge was postponed. "merge_edge" should then
|
|
* also be postponed since merging along the edge with negative weight should
|
|
* be postponed until all edges with non-negative weight have been tried.
|
|
* Replace the weight of "merge_edge" by a negative weight as well and
|
|
* tell the caller not to attempt a merge.
|
|
*/
|
|
static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge,
|
|
struct isl_sched_edge *merge_edge)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < graph->n_edge; ++i) {
|
|
struct isl_sched_edge *edge = &graph->edge[i];
|
|
|
|
if (!scc_in_merge[edge->src->scc])
|
|
continue;
|
|
if (!scc_in_merge[edge->dst->scc])
|
|
continue;
|
|
if (edge->no_merge)
|
|
return 1;
|
|
if (merge_edge->weight >= 0 && edge->weight < 0) {
|
|
merge_edge->weight -= graph->max_weight + 1;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Merge the two clusters in "c" connected by the edge in "graph"
|
|
* with index "edge" into a single cluster.
|
|
* If it turns out to be impossible to merge these two clusters,
|
|
* then mark the edge as "no_merge" such that it will not be
|
|
* considered again.
|
|
*
|
|
* First mark all SCCs that need to be merged. This includes the SCCs
|
|
* in the two clusters, but it may also include the SCCs
|
|
* of intermediate clusters.
|
|
* If there is already a no_merge edge between any pair of such SCCs,
|
|
* then simply mark the current edge as no_merge as well.
|
|
* Likewise, if any of those edges was postponed by has_bounded_distances,
|
|
* then postpone the current edge as well.
|
|
* Otherwise, try and merge the clusters and mark "edge" as "no_merge"
|
|
* if the clusters did not end up getting merged, unless the non-merge
|
|
* is due to the fact that the edge was postponed. This postponement
|
|
* can be recognized by a change in weight (from non-negative to negative).
|
|
*/
|
|
static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
|
|
struct isl_sched_graph *graph, int edge, struct isl_clustering *c)
|
|
{
|
|
isl_bool merged;
|
|
int edge_weight = graph->edge[edge].weight;
|
|
|
|
if (mark_merge_sccs(ctx, graph, edge, c) < 0)
|
|
return isl_stat_error;
|
|
|
|
if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge]))
|
|
merged = isl_bool_false;
|
|
else
|
|
merged = try_merge(ctx, graph, c);
|
|
if (merged < 0)
|
|
return isl_stat_error;
|
|
if (!merged && edge_weight == graph->edge[edge].weight)
|
|
graph->edge[edge].no_merge = 1;
|
|
|
|
return isl_stat_ok;
|
|
}
|
|
|
|
/* Does "node" belong to the cluster identified by "cluster"?
|
|
*/
|
|
static int node_cluster_exactly(struct isl_sched_node *node, int cluster)
|
|
{
|
|
return node->cluster == cluster;
|
|
}
|
|
|
|
/* Does "edge" connect two nodes belonging to the cluster
|
|
* identified by "cluster"?
|
|
*/
|
|
static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster)
|
|
{
|
|
return edge->src->cluster == cluster && edge->dst->cluster == cluster;
|
|
}
|
|
|
|
/* Swap the schedule of "node1" and "node2".
|
|
* Both nodes have been derived from the same node in a common parent graph.
|
|
* Since the "coincident" field is shared with that node
|
|
* in the parent graph, there is no need to also swap this field.
|
|
*/
|
|
static void swap_sched(struct isl_sched_node *node1,
|
|
struct isl_sched_node *node2)
|
|
{
|
|
isl_mat *sched;
|
|
isl_map *sched_map;
|
|
|
|
sched = node1->sched;
|
|
node1->sched = node2->sched;
|
|
node2->sched = sched;
|
|
|
|
sched_map = node1->sched_map;
|
|
node1->sched_map = node2->sched_map;
|
|
node2->sched_map = sched_map;
|
|
}
|
|
|
|
/* Copy the current band schedule from the SCCs that form the cluster
|
|
* with index "pos" to the actual cluster at position "pos".
|
|
* By construction, the index of the first SCC that belongs to the cluster
|
|
* is also "pos".
|
|
*
|
|
* The order of the nodes inside both the SCCs and the cluster
|
|
* is assumed to be same as the order in the original "graph".
|
|
*
|
|
* Since the SCC graphs will no longer be used after this function,
|
|
* the schedules are actually swapped rather than copied.
|
|
*/
|
|
static isl_stat copy_partial(struct isl_sched_graph *graph,
|
|
struct isl_clustering *c, int pos)
|
|
{
|
|
int i, j;
|
|
|
|
c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
|
|
c->cluster[pos].n_row = c->scc[pos].n_row;
|
|
c->cluster[pos].maxvar = c->scc[pos].maxvar;
|
|
j = 0;
|
|
for (i = 0; i < graph->n; ++i) {
|
|
int k;
|
|
int s;
|
|
|
|
if (graph->node[i].cluster != pos)
|
|
continue;
|
|
s = graph->node[i].scc;
|
|
k = c->scc_node[s]++;
|
|
swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]);
|
|
if (c->scc[s].maxvar > c->cluster[pos].maxvar)
|
|
c->cluster[pos].maxvar = c->scc[s].maxvar;
|
|
++j;
|
|
}
|
|
|
|
return isl_stat_ok;
|
|
}
|
|
|
|
/* Is there a (conditional) validity dependence from node[j] to node[i],
|
|
* forcing node[i] to follow node[j] or do the nodes belong to the same
|
|
* cluster?
|
|
*/
|
|
static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
|
|
{
|
|
struct isl_sched_graph *graph = user;
|
|
|
|
if (graph->node[i].cluster == graph->node[j].cluster)
|
|
return isl_bool_true;
|
|
return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
|
|
&graph->node[i]);
|
|
}
|
|
|
|
/* Extract the merged clusters of SCCs in "graph", sort them, and
|
|
* store them in c->clusters. Update c->scc_cluster accordingly.
|
|
*
|
|
* First keep track of the cluster containing the SCC to which a node
|
|
* belongs in the node itself.
|
|
* Then extract the clusters into c->clusters, copying the current
|
|
* band schedule from the SCCs that belong to the cluster.
|
|
* Do this only once per cluster.
|
|
*
|
|
* Finally, topologically sort the clusters and update c->scc_cluster
|
|
* to match the new scc numbering. While the SCCs were originally
|
|
* sorted already, some SCCs that depend on some other SCCs may
|
|
* have been merged with SCCs that appear before these other SCCs.
|
|
* A reordering may therefore be required.
|
|
*/
|
|
static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph,
|
|
struct isl_clustering *c)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < graph->n; ++i)
|
|
graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
|
|
|
|
for (i = 0; i < graph->scc; ++i) {
|
|
if (c->scc_cluster[i] != i)
|
|
continue;
|
|
if (isl_sched_graph_extract_sub_graph(ctx, graph,
|
|
&node_cluster_exactly,
|
|
&edge_cluster_exactly, i, &c->cluster[i]) < 0)
|
|
return isl_stat_error;
|
|
c->cluster[i].src_scc = -1;
|
|
c->cluster[i].dst_scc = -1;
|
|
if (copy_partial(graph, c, i) < 0)
|
|
return isl_stat_error;
|
|
}
|
|
|
|
if (isl_sched_graph_detect_ccs(ctx, graph,
|
|
&node_follows_strong_or_same_cluster) < 0)
|
|
return isl_stat_error;
|
|
for (i = 0; i < graph->n; ++i)
|
|
c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
|
|
|
|
return isl_stat_ok;
|
|
}
|
|
|
|
/* Compute weights on the proximity edges of "graph" that can
|
|
* be used by find_proximity to find the most appropriate
|
|
* proximity edge to use to merge two clusters in "c".
|
|
* The weights are also used by has_bounded_distances to determine
|
|
* whether the merge should be allowed.
|
|
* Store the maximum of the computed weights in graph->max_weight.
|
|
*
|
|
* The computed weight is a measure for the number of remaining schedule
|
|
* dimensions that can still be completely aligned.
|
|
* In particular, compute the number of equalities between
|
|
* input dimensions and output dimensions in the proximity constraints.
|
|
* The directions that are already handled by outer schedule bands
|
|
* are projected out prior to determining this number.
|
|
*
|
|
* Edges that will never be considered by find_proximity are ignored.
|
|
*/
|
|
static isl_stat compute_weights(struct isl_sched_graph *graph,
|
|
struct isl_clustering *c)
|
|
{
|
|
int i;
|
|
|
|
graph->max_weight = 0;
|
|
|
|
for (i = 0; i < graph->n_edge; ++i) {
|
|
struct isl_sched_edge *edge = &graph->edge[i];
|
|
struct isl_sched_node *src = edge->src;
|
|
struct isl_sched_node *dst = edge->dst;
|
|
isl_basic_map *hull;
|
|
isl_bool prox;
|
|
isl_size n_in, n_out, n;
|
|
|
|
prox = is_non_empty_proximity(edge);
|
|
if (prox < 0)
|
|
return isl_stat_error;
|
|
if (!prox)
|
|
continue;
|
|
if (bad_cluster(&c->scc[edge->src->scc]) ||
|
|
bad_cluster(&c->scc[edge->dst->scc]))
|
|
continue;
|
|
if (c->scc_cluster[edge->dst->scc] ==
|
|
c->scc_cluster[edge->src->scc])
|
|
continue;
|
|
|
|
hull = isl_map_affine_hull(isl_map_copy(edge->map));
|
|
hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0,
|
|
isl_mat_copy(src->vmap));
|
|
hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0,
|
|
isl_mat_copy(dst->vmap));
|
|
hull = isl_basic_map_project_out(hull,
|
|
isl_dim_in, 0, src->rank);
|
|
hull = isl_basic_map_project_out(hull,
|
|
isl_dim_out, 0, dst->rank);
|
|
hull = isl_basic_map_remove_divs(hull);
|
|
n_in = isl_basic_map_dim(hull, isl_dim_in);
|
|
n_out = isl_basic_map_dim(hull, isl_dim_out);
|
|
if (n_in < 0 || n_out < 0)
|
|
hull = isl_basic_map_free(hull);
|
|
hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
|
|
isl_dim_in, 0, n_in);
|
|
hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
|
|
isl_dim_out, 0, n_out);
|
|
n = isl_basic_map_n_equality(hull);
|
|
isl_basic_map_free(hull);
|
|
if (n < 0)
|
|
return isl_stat_error;
|
|
edge->weight = n;
|
|
|
|
if (edge->weight > graph->max_weight)
|
|
graph->max_weight = edge->weight;
|
|
}
|
|
|
|
return isl_stat_ok;
|
|
}
|
|
|
|
/* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" and
|
|
* update "node" to arrange for them to be executed in an order
|
|
* possibly involving set nodes that generalizes the topological order
|
|
* determined by the scc fields of the nodes in "graph".
|
|
*
|
|
* Note that at this stage, there are graph->scc clusters and
|
|
* their positions in c->cluster are determined by the values
|
|
* of c->scc_cluster.
|
|
*
|
|
* Construct an isl_scc_graph and perform the decomposition
|
|
* using this graph.
|
|
*/
|
|
static __isl_give isl_schedule_node *finish_bands_decompose(
|
|
__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
|
|
struct isl_clustering *c)
|
|
{
|
|
isl_ctx *ctx;
|
|
struct isl_scc_graph *scc_graph;
|
|
|
|
ctx = isl_schedule_node_get_ctx(node);
|
|
|
|
scc_graph = isl_scc_graph_from_sched_graph(ctx, graph, c);
|
|
node = isl_scc_graph_decompose(scc_graph, node);
|
|
isl_scc_graph_free(scc_graph);
|
|
|
|
return node;
|
|
}
|
|
|
|
/* Call isl_schedule_node_compute_finish_band on each of the clusters in "c"
|
|
* in their topological order. This order is determined by the scc
|
|
* fields of the nodes in "graph".
|
|
* Combine the results in a sequence expressing the topological order.
|
|
*
|
|
* If there is only one cluster left, then there is no need to introduce
|
|
* a sequence node. Also, in this case, the cluster necessarily contains
|
|
* the SCC at position 0 in the original graph and is therefore also
|
|
* stored in the first cluster of "c".
|
|
*
|
|
* If there are more than two clusters left, then some subsets of the clusters
|
|
* may still be independent of each other. These could then still
|
|
* be reordered with respect to each other. Call finish_bands_decompose
|
|
* to try and construct an ordering involving set and sequence nodes
|
|
* that generalizes the topological order.
|
|
* Note that at the outermost level there can be no independent components
|
|
* because isl_schedule_node_compute_wcc_clustering is called
|
|
* on a (weakly) connected component.
|
|
*/
|
|
static __isl_give isl_schedule_node *finish_bands_clustering(
|
|
__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
|
|
struct isl_clustering *c)
|
|
{
|
|
int i;
|
|
isl_ctx *ctx;
|
|
isl_union_set_list *filters;
|
|
|
|
if (graph->scc == 1)
|
|
return isl_schedule_node_compute_finish_band(node,
|
|
&c->cluster[0], 0);
|
|
if (graph->scc > 2)
|
|
return finish_bands_decompose(node, graph, c);
|
|
|
|
ctx = isl_schedule_node_get_ctx(node);
|
|
|
|
filters = isl_sched_graph_extract_sccs(ctx, graph);
|
|
node = isl_schedule_node_insert_sequence(node, filters);
|
|
|
|
for (i = 0; i < graph->scc; ++i) {
|
|
int j = c->scc_cluster[i];
|
|
node = isl_schedule_node_grandchild(node, i, 0);
|
|
node = isl_schedule_node_compute_finish_band(node,
|
|
&c->cluster[j], 0);
|
|
node = isl_schedule_node_grandparent(node);
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
/* Compute a schedule for a connected dependence graph by first considering
|
|
* each strongly connected component (SCC) in the graph separately and then
|
|
* incrementally combining them into clusters.
|
|
* Return the updated schedule node.
|
|
*
|
|
* Initially, each cluster consists of a single SCC, each with its
|
|
* own band schedule. The algorithm then tries to merge pairs
|
|
* of clusters along a proximity edge until no more suitable
|
|
* proximity edges can be found. During this merging, the schedule
|
|
* is maintained in the individual SCCs.
|
|
* After the merging is completed, the full resulting clusters
|
|
* are extracted and in finish_bands_clustering,
|
|
* isl_schedule_node_compute_finish_band is called on each of them to integrate
|
|
* the band into "node" and to continue the computation.
|
|
*
|
|
* compute_weights initializes the weights that are used by find_proximity.
|
|
*/
|
|
__isl_give isl_schedule_node *isl_schedule_node_compute_wcc_clustering(
|
|
__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
|
|
{
|
|
isl_ctx *ctx;
|
|
struct isl_clustering c;
|
|
int i;
|
|
|
|
ctx = isl_schedule_node_get_ctx(node);
|
|
|
|
if (clustering_init(ctx, &c, graph) < 0)
|
|
goto error;
|
|
|
|
if (compute_weights(graph, &c) < 0)
|
|
goto error;
|
|
|
|
for (;;) {
|
|
i = find_proximity(graph, &c);
|
|
if (i < 0)
|
|
goto error;
|
|
if (i >= graph->n_edge)
|
|
break;
|
|
if (merge_clusters_along_edge(ctx, graph, i, &c) < 0)
|
|
goto error;
|
|
}
|
|
|
|
if (extract_clusters(ctx, graph, &c) < 0)
|
|
goto error;
|
|
|
|
node = finish_bands_clustering(node, graph, &c);
|
|
|
|
clustering_free(ctx, &c);
|
|
return node;
|
|
error:
|
|
clustering_free(ctx, &c);
|
|
return isl_schedule_node_free(node);
|
|
}
|