gpu_hybrid.c 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. /*
  2. * Copyright 2013 Ecole Normale Superieure
  3. * Copyright 2015 Sven Verdoolaege
  4. *
  5. * Use of this software is governed by the MIT license
  6. *
  7. * Written by Sven Verdoolaege,
  8. * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
  9. */
  10. #include <string.h>
  11. #include <isl/val.h>
  12. #include <isl/space.h>
  13. #include <isl/union_set.h>
  14. #include <isl/schedule_node.h>
  15. #include "hybrid.h"
  16. #include "gpu_hybrid.h"
  17. #include "gpu_tree.h"
  18. #include "schedule.h"
  19. #include "util.h"
  20. /* Have all domain elements been filtered out before reaching
  21. * the "node" position in the schedule tree?
  22. */
  23. static isl_bool has_empty_domain(__isl_keep isl_schedule_node *node)
  24. {
  25. isl_union_set *domain;
  26. isl_bool empty;
  27. domain = isl_schedule_node_get_domain(node);
  28. empty = isl_union_set_is_empty(domain);
  29. isl_union_set_free(domain);
  30. return empty;
  31. }
  32. /* Given a pointer to a phase in the result of hybrid tiling,
  33. * map the phase to the device, provided the phase is non-empty.
  34. * Empty phases can occur if the input schedule domain can be
  35. * covered by a small number of hexagons that all belong to the same phase.
  36. *
  37. * The input has the following form:
  38. *
  39. * M - CT - P - C - ...
  40. *
  41. * with M the phase marker, CT the space tiling, P the original
  42. * parent band and C the original child band.
  43. * The (outer dimensions of the) C band need to be mapped to threads.
  44. * The (outer dimension of the) CT band needs to be mapped to blocks.
  45. * The mapping to shared memory needs to be computed between the CT and
  46. * the P band.
  47. *
  48. * The C band is first shifted to start at zero.
  49. * Then the appropriate markers are introduced and a kernel is
  50. * created for the tree rooted at CT.
  51. * If the "unroll_gpu_tile" option is set, then the AST generator
  52. * is instructed to unroll the P and C bands.
  53. */
  54. static __isl_give isl_schedule_node *update_phase(
  55. __isl_take isl_schedule_node *node, void *user)
  56. {
  57. struct gpu_gen *gen = user;
  58. int depth0, depth;
  59. isl_ctx *ctx;
  60. isl_id *id;
  61. isl_bool empty_domain;
  62. ppcg_ht_phase *phase;
  63. empty_domain = has_empty_domain(node);
  64. if (empty_domain < 0)
  65. return isl_schedule_node_free(node);
  66. if (empty_domain)
  67. return node;
  68. if (!node)
  69. return NULL;
  70. ctx = isl_schedule_node_get_ctx(node);
  71. phase = ppcg_ht_phase_extract_from_mark(node);
  72. depth0 = isl_schedule_node_get_tree_depth(node);
  73. node = isl_schedule_node_child(node, 0);
  74. node = isl_schedule_node_child(node, 0);
  75. node = isl_schedule_node_child(node, 0);
  76. node = ppcg_ht_phase_shift_space_point(phase, node);
  77. if (gen->options->unroll_gpu_tile)
  78. node = ppcg_set_schedule_node_type(node, isl_ast_loop_unroll);
  79. id = isl_id_alloc(ctx, "thread", NULL);
  80. node = isl_schedule_node_insert_mark(node, id);
  81. node = isl_schedule_node_parent(node);
  82. if (gen->options->unroll_gpu_tile)
  83. node = ppcg_set_schedule_node_type(node, isl_ast_loop_unroll);
  84. id = isl_id_alloc(ctx, "shared", NULL);
  85. node = isl_schedule_node_insert_mark(node, id);
  86. node = isl_schedule_node_parent(node);
  87. node = gpu_create_kernel(gen, node, 0, NULL);
  88. depth = isl_schedule_node_get_tree_depth(node);
  89. node = isl_schedule_node_ancestor(node, depth - depth0);
  90. return node;
  91. }
  92. /* Apply hybrid tiling on "node" and its parent based on the (valid)
  93. * bounds on the relative dependence distances "bounds" and
  94. * the tile sizes in "tile_sizes".
  95. * The number of elements in "tile_sizes" is at least as large
  96. * as the sum of the dimensions of the parent and the child node.
  97. *
  98. * Convert the tile_sizes to an isl_multi_val in the right space,
  99. * insert the hybrid tiling and then create a kernel inside each phase.
  100. * Finally, remove the phase marks.
  101. */
  102. __isl_give isl_schedule_node *gpu_hybrid_tile(struct gpu_gen *gen,
  103. __isl_take isl_schedule_node *node, __isl_take ppcg_ht_bounds *bounds,
  104. int *tile_sizes)
  105. {
  106. isl_multi_val *mv;
  107. isl_space *space, *space2;
  108. if (!node || !bounds)
  109. goto error;
  110. space2 = isl_schedule_node_band_get_space(node);
  111. node = isl_schedule_node_parent(node);
  112. space = isl_schedule_node_band_get_space(node);
  113. space = isl_space_product(space, space2);
  114. mv = ppcg_multi_val_from_int_list(space, tile_sizes);
  115. node = ppcg_ht_bounds_insert_tiling(bounds, mv, node, gen->options);
  116. node = hybrid_tile_foreach_phase(node, &update_phase, gen);
  117. node = hybrid_tile_drop_phase_marks(node);
  118. return node;
  119. error:
  120. isl_schedule_node_free(node);
  121. ppcg_ht_bounds_free(bounds);
  122. return NULL;
  123. }