123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459 |
- #ifndef _GPU_H
- #define _GPU_H
- #include <isl/ast.h>
- #include <isl/id.h>
- #include <isl/id_to_ast_expr.h>
- #include <pet.h>
- #include "ppcg.h"
- #include "ppcg_options.h"
- /* An access to an outer array element or an iterator.
- * Accesses to iterators have an access relation that maps to an unnamed space.
- * An access may be both read and write.
- * If the access relation is empty, then the output dimension may
- * not be equal to the dimension of the corresponding array.
- */
- struct gpu_stmt_access {
- /* Access reads elements */
- int read;
- /* Access writes elements */
- int write;
- /* All writes are definite writes. */
- int exact_write;
- /* Is a single, fixed element being accessed? */
- isl_bool fixed_element;
- /* The number of index expressions specified in the access. */
- int n_index;
- /* May access relation */
- isl_map *access;
- /* May access relation with as domain a mapping from iteration domain
- * to a reference identifier.
- */
- isl_map *tagged_access;
- /* The reference id of the corresponding pet_expr. */
- isl_id *ref_id;
- struct gpu_stmt_access *next;
- };
- /* A representation of a user statement.
- * "stmt" points to the corresponding pet statement.
- * "id" is the identifier of the instance set of the statement.
- * "accesses" is a linked list of accesses performed by the statement.
- * If the statement has been killed, i.e., if it will not be scheduled,
- * then this linked list may be empty even if the actual statement does
- * perform accesses.
- */
- struct gpu_stmt {
- isl_id *id;
- struct pet_stmt *stmt;
- struct gpu_stmt_access *accesses;
- };
- /* Represents an outer array possibly accessed by a gpu_prog.
- */
- struct gpu_array_info {
- /* The array data space. */
- isl_space *space;
- /* Element type. */
- char *type;
- /* Element size. */
- int size;
- /* Name of the array. */
- char *name;
- /* Declared extent of original array. */
- isl_set *declared_extent;
- /* AST expression for declared size of original array. */
- isl_ast_expr *declared_size;
- /* Extent of the array that needs to be copied. */
- isl_set *extent;
- /* Number of indices. */
- unsigned n_index;
- /* For each index, a bound on "extent" in that direction. */
- isl_multi_pw_aff *bound;
- /* The corresponding access AST expression, if the array needs
- * to be allocated on the device.
- */
- isl_ast_expr *bound_expr;
- /* All references to this array; point to elements of a linked list. */
- int n_ref;
- struct gpu_stmt_access **refs;
- /* Is this array accessed at all by the program? */
- int accessed;
- /* Is this a scalar that is read-only within the entire program? */
- int read_only_scalar;
- /* Are the elements of the array structures? */
- int has_compound_element;
- /* Are the elements only accessed through constant index expressions? */
- int only_fixed_element;
- /* Is the array local to the scop? */
- int local;
- /* Is the array local and should it be declared on the host? */
- int declare_local;
- /* Is the corresponding global device memory accessed in any way? */
- int global;
- /* Should the array be linearized? */
- int linearize;
- /* Order dependences on this array.
- * Only used if live_range_reordering option is set.
- * It is set to NULL otherwise.
- */
- isl_union_map *dep_order;
- void *user;
- };
- /* Represents an outer array accessed by a ppcg_kernel, localized
- * to the context of this kernel.
- *
- * "array" points to the corresponding array in the gpu_prog.
- * The "n_group" "groups" are the reference groups associated to the array.
- * If "force_private" is set, then the array (in practice a scalar)
- * must be mapped to a register.
- * "global" is set if the global device memory corresponding
- * to this array is accessed by the kernel.
- * "bound" is equal to array->bound specialized to the current kernel.
- * "bound_expr" is the corresponding access AST expression.
- */
- struct gpu_local_array_info {
- struct gpu_array_info *array;
- int n_group;
- struct gpu_array_ref_group **groups;
- int force_private;
- int global;
- unsigned n_index;
- isl_multi_pw_aff *bound;
- isl_ast_expr *bound_expr;
- };
- __isl_give isl_ast_expr *gpu_local_array_info_linearize_index(
- struct gpu_local_array_info *array, __isl_take isl_ast_expr *expr);
- /* A sequence of "n" names of types.
- */
- struct gpu_types {
- int n;
- char **name;
- };
- /* "read" and "write" contain the original access relations, possibly
- * involving member accesses.
- *
- * The elements of "array", as well as the ranges of "copy_in" and "copy_out"
- * only refer to the outer arrays of any possible member accesses.
- */
- struct gpu_prog {
- isl_ctx *ctx;
- struct ppcg_scop *scop;
- /* Set of parameter values */
- isl_set *context;
- /* All potential read accesses in the entire program */
- isl_union_map *read;
- /* All potential write accesses in the entire program */
- isl_union_map *may_write;
- /* All definite write accesses in the entire program */
- isl_union_map *must_write;
- /* All tagged definite kills in the entire program */
- isl_union_map *tagged_must_kill;
- /* The set of inner array elements that may be preserved. */
- isl_union_set *may_persist;
- /* A mapping from all innermost arrays to their outer arrays. */
- isl_union_map *to_outer;
- /* A mapping from the outer arrays to all corresponding inner arrays. */
- isl_union_map *to_inner;
- /* A mapping from all intermediate arrays to their outer arrays,
- * including an identity mapping from the anonymous 1D space to itself.
- */
- isl_union_map *any_to_outer;
- /* Order dependences on non-scalars. */
- isl_union_map *array_order;
- /* Array of statements */
- int n_stmts;
- struct gpu_stmt *stmts;
- int n_array;
- struct gpu_array_info *array;
- };
- struct gpu_gen {
- isl_ctx *ctx;
- struct ppcg_options *options;
- /* Callback for printing of AST in appropriate format. */
- __isl_give isl_printer *(*print)(__isl_take isl_printer *p,
- struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
- struct gpu_types *types, void *user);
- void *print_user;
- isl_id_to_ast_expr *(*build_ast_expr)(void *stmt,
- isl_ast_build *build,
- isl_multi_pw_aff *(*fn_index)(
- __isl_take isl_multi_pw_aff *mpa, isl_id *id,
- void *user),
- void *user_index,
- isl_ast_expr *(*fn_expr)(isl_ast_expr *expr,
- isl_id *id, void *user),
- void *user_expr);
- struct gpu_prog *prog;
- /* The generated AST. */
- isl_ast_node *tree;
- /* The sequence of types for which a definition has been printed. */
- struct gpu_types types;
- /* User specified tile, grid and block sizes for each kernel */
- isl_union_map *sizes;
- /* Effectively used tile, grid and block sizes for each kernel */
- isl_union_map *used_sizes;
- /* Identifier of the next kernel. */
- int kernel_id;
- };
- enum ppcg_group_access_type {
- ppcg_access_global,
- ppcg_access_shared,
- ppcg_access_private
- };
- enum ppcg_kernel_stmt_type {
- ppcg_kernel_copy,
- ppcg_kernel_domain,
- ppcg_kernel_sync
- };
- /* Representation of special statements, in particular copy statements
- * and __syncthreads statements, inside a kernel.
- *
- * type represents the kind of statement
- *
- *
- * for ppcg_kernel_copy statements we have
- *
- * read is set if the statement should copy data from global memory
- * to shared memory or registers.
- *
- * index expresses an access to the array element that needs to be copied
- * local_index expresses the corresponding element in the tile
- *
- * array refers to the original array being copied
- * local_array is a pointer to the appropriate element in the "array"
- * array of the ppcg_kernel to which this copy access belongs
- *
- *
- * for ppcg_kernel_domain statements we have
- *
- * stmt is the corresponding input statement
- *
- * n_access is the number of accesses in stmt
- * access is an array of local information about the accesses
- */
- struct ppcg_kernel_stmt {
- enum ppcg_kernel_stmt_type type;
- union {
- struct {
- int read;
- isl_ast_expr *index;
- isl_ast_expr *local_index;
- struct gpu_array_info *array;
- struct gpu_local_array_info *local_array;
- } c;
- struct {
- struct gpu_stmt *stmt;
- isl_id_to_ast_expr *ref2expr;
- } d;
- } u;
- };
- /* Representation of a local variable in a kernel.
- */
- struct ppcg_kernel_var {
- struct gpu_array_info *array;
- enum ppcg_group_access_type type;
- char *name;
- isl_vec *size;
- };
- /* Representation of a kernel.
- *
- * prog describes the original code from which the kernel is extracted.
- *
- * id is the sequence number of the kernel.
- *
- * block_ids contains the list of block identifiers for this kernel.
- * thread_ids contains the list of thread identifiers for this kernel.
- *
- * the first n_grid elements of grid_dim represent the specified size
- * of the grid.
- * the first n_block elements of block_dim represent the specified or
- * effective size of the block.
- * Note that in the input file, the sizes of the grid and the blocks
- * are specified in the order x, y, z, but internally, the sizes
- * are stored in reverse order, so that the last element always
- * refers to the x dimension.
- *
- * grid_size reflects the effective grid size.
- * grid_size_expr contains a corresponding access AST expression, built within
- * the context where the launch appears.
- *
- * context contains the values of the parameters and outer schedule dimensions
- * for which any statement instance in this kernel needs to be executed.
- *
- * n_sync is the number of synchronization operations that have
- * been introduced in the schedule tree corresponding to this kernel (so far).
- *
- * core contains the spaces of the statement domains that form
- * the core computation of the kernel. It is used to navigate
- * the tree during the construction of the device part of the schedule
- * tree in gpu_create_kernel.
- *
- * expanded_domain contains the original statement instances,
- * i.e., those that appear in the domains of access relations,
- * that are involved in the kernel.
- * contraction maps those original statement instances to
- * the statement instances that are active at the point
- * in the schedule tree where the kernel is created.
- *
- * arrays is the set of possibly accessed outer array elements.
- *
- * space is the schedule space of the AST context. That is, it represents
- * the loops of the generated host code containing the kernel launch.
- *
- * n_array is the total number of arrays in the input program and also
- * the number of element in the array array.
- * array contains information about each array that is local
- * to the current kernel. If an array is not used in a kernel,
- * then the corresponding entry does not contain any information.
- *
- * any_force_private is set if any array in the kernel is marked force_private
- *
- * block_filter contains constraints on the domain elements in the kernel
- * that encode the mapping to block identifiers, where the block identifiers
- * are represented by "n_grid" parameters with as names the elements
- * of "block_ids".
- *
- * thread_filter contains constraints on the domain elements in the kernel
- * that encode the mapping to thread identifiers, where the thread identifiers
- * are represented by "n_block" parameters with as names the elements
- * of "thread_ids".
- *
- * copy_schedule corresponds to the schedule dimensions of
- * the (tiled) schedule for this kernel that have been taken into account
- * for computing private/shared memory tiles.
- * The domain corresponds to the original statement instances, i.e.,
- * those that appear in the leaves of the schedule tree.
- * copy_schedule_dim is the dimension of this schedule.
- *
- * sync_writes contains write references that require synchronization.
- * Each reference is represented by a universe set in a space [S[i,j] -> R[]]
- * with S[i,j] the statement instance space and R[] the array reference.
- */
- struct ppcg_kernel {
- isl_ctx *ctx;
- struct ppcg_options *options;
- struct gpu_prog *prog;
- int id;
- isl_id_list *block_ids;
- isl_id_list *thread_ids;
- int n_grid;
- int n_block;
- int grid_dim[2];
- int block_dim[3];
- isl_multi_pw_aff *grid_size;
- isl_ast_expr *grid_size_expr;
- isl_set *context;
- int n_sync;
- isl_union_set *core;
- isl_union_set *arrays;
- isl_union_pw_multi_aff *contraction;
- isl_union_set *expanded_domain;
- isl_space *space;
- int n_array;
- struct gpu_local_array_info *array;
- int n_var;
- struct ppcg_kernel_var *var;
- int any_force_private;
- isl_union_set *block_filter;
- isl_union_set *thread_filter;
- isl_union_pw_multi_aff *copy_schedule;
- int copy_schedule_dim;
- isl_union_set *sync_writes;
- isl_ast_node *tree;
- };
- int gpu_array_is_scalar(struct gpu_array_info *array);
- int gpu_array_is_read_only_scalar(struct gpu_array_info *array);
- int gpu_array_requires_device_allocation(struct gpu_array_info *array);
- __isl_give isl_set *gpu_array_positive_size_guard(struct gpu_array_info *array);
- isl_bool gpu_array_can_be_private(struct gpu_array_info *array);
- struct gpu_prog *gpu_prog_alloc(isl_ctx *ctx, struct ppcg_scop *scop);
- void *gpu_prog_free(struct gpu_prog *prog);
- int ppcg_kernel_requires_array_argument(struct ppcg_kernel *kernel, int i);
- int generate_gpu(isl_ctx *ctx, const char *input, FILE *out,
- struct ppcg_options *options,
- __isl_give isl_printer *(*print)(__isl_take isl_printer *p,
- struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
- struct gpu_types *types, void *user), void *user);
- __isl_give isl_schedule_node *gpu_create_kernel(struct gpu_gen *gen,
- __isl_take isl_schedule_node *node, int scale,
- __isl_keep isl_multi_val *sizes);
- __isl_give isl_schedule *get_schedule(struct gpu_gen *gen);
- int has_any_permutable_node(__isl_keep isl_schedule *schedule);
- __isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
- __isl_take isl_schedule *schedule,
- int to_from_device);
- __isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
- __isl_take isl_schedule *schedule);
- __isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog);
- void collect_references(struct gpu_prog *prog, struct gpu_array_info *array);
- void collect_order_dependences(struct gpu_prog *prog);
- isl_bool only_fixed_element_accessed(struct gpu_array_info *array);
- #endif
|