hybrid.c 65 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242
  1. /*
  2. * Copyright 2013 Ecole Normale Superieure
  3. * Copyright 2015 Sven Verdoolaege
  4. *
  5. * Use of this software is governed by the MIT license
  6. *
  7. * Written by Sven Verdoolaege,
  8. * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
  9. */
  10. #include <string.h>
  11. #include <isl/space.h>
  12. #include <isl/constraint.h>
  13. #include <isl/val.h>
  14. #include <isl/aff.h>
  15. #include <isl/set.h>
  16. #include <isl/map.h>
  17. #include <isl/union_set.h>
  18. #include <isl/union_map.h>
  19. #include "hybrid.h"
  20. #include "schedule.h"
  21. /* The hybrid tiling implemented in this file is based on
  22. * Grosser et al., "Hybrid Hexagonal/Classical Tiling for GPUs".
  23. */
  24. /* Bounds on relative dependence distances in input to hybrid tiling.
  25. * upper is an upper bound on the relative dependence distances
  26. * in the first space dimension
  27. * -lower is a lower bound on the relative dependence distances
  28. * in all space dimensions.
  29. *
  30. * In particular,
  31. *
  32. * d_i >= -lower_i d_0
  33. * and
  34. * d_1 <= upper d_0
  35. *
  36. * for each dependence distance vector d, where d_1 is the component
  37. * corresponding to the first space dimension.
  38. *
  39. * upper and lower are always non-negative.
  40. * Some of the values may be NaN if no bound could be found.
  41. */
  42. struct ppcg_ht_bounds {
  43. isl_val *upper;
  44. isl_multi_val *lower;
  45. };
  46. /* Free "bounds" along with all its fields.
  47. */
  48. __isl_null ppcg_ht_bounds *ppcg_ht_bounds_free(
  49. __isl_take ppcg_ht_bounds *bounds)
  50. {
  51. if (!bounds)
  52. return NULL;
  53. isl_val_free(bounds->upper);
  54. isl_multi_val_free(bounds->lower);
  55. free(bounds);
  56. return NULL;
  57. }
  58. /* Create a ppcg_ht_bounds object for a band living in "space".
  59. * The bounds are initialized to NaN.
  60. */
  61. __isl_give ppcg_ht_bounds *ppcg_ht_bounds_alloc(__isl_take isl_space *space)
  62. {
  63. int i, n;
  64. isl_ctx *ctx;
  65. ppcg_ht_bounds *bounds;
  66. if (!space)
  67. return NULL;
  68. ctx = isl_space_get_ctx(space);
  69. bounds = isl_alloc_type(ctx, struct ppcg_ht_bounds);
  70. if (!bounds)
  71. goto error;
  72. bounds->upper = isl_val_nan(ctx);
  73. bounds->lower = isl_multi_val_zero(space);
  74. n = isl_multi_val_dim(bounds->lower, isl_dim_set);
  75. for (i = 0; i < n; ++i) {
  76. isl_val *v = isl_val_copy(bounds->upper);
  77. bounds->lower = isl_multi_val_set_val(bounds->lower, i, v);
  78. }
  79. if (!bounds->lower || !bounds->upper)
  80. return ppcg_ht_bounds_free(bounds);
  81. return bounds;
  82. error:
  83. isl_space_free(space);
  84. return NULL;
  85. }
  86. void ppcg_ht_bounds_dump(__isl_keep ppcg_ht_bounds *bounds)
  87. {
  88. if (!bounds)
  89. return;
  90. fprintf(stderr, "lower: ");
  91. isl_multi_val_dump(bounds->lower);
  92. fprintf(stderr, "upper: ");
  93. isl_val_dump(bounds->upper);
  94. }
  95. /* Return the upper bound on the relative dependence distances
  96. * in the first space dimension.
  97. */
  98. __isl_give isl_val *ppcg_ht_bounds_get_upper(__isl_keep ppcg_ht_bounds *bounds)
  99. {
  100. if (!bounds)
  101. return NULL;
  102. return isl_val_copy(bounds->upper);
  103. }
  104. /* Replace the upper bound on the relative dependence distances
  105. * in the first space dimension by "upper".
  106. */
  107. __isl_give ppcg_ht_bounds *ppcg_ht_bounds_set_upper(
  108. __isl_take ppcg_ht_bounds *bounds, __isl_take isl_val *upper)
  109. {
  110. if (!bounds || !upper)
  111. goto error;
  112. isl_val_free(bounds->upper);
  113. bounds->upper = upper;
  114. return bounds;
  115. error:
  116. ppcg_ht_bounds_free(bounds);
  117. isl_val_free(upper);
  118. return NULL;
  119. }
  120. /* Return the lower bound on the relative dependence distances
  121. * in space dimension "pos".
  122. */
  123. __isl_give isl_val *ppcg_ht_bounds_get_lower(__isl_keep ppcg_ht_bounds *bounds,
  124. int pos)
  125. {
  126. if (!bounds)
  127. return NULL;
  128. return isl_multi_val_get_val(bounds->lower, pos);
  129. }
  130. /* Replace the lower bound on the relative dependence distances
  131. * in space dimension "pos" by "lower".
  132. */
  133. __isl_give ppcg_ht_bounds *ppcg_ht_bounds_set_lower(
  134. __isl_take ppcg_ht_bounds *bounds, int pos, __isl_take isl_val *lower)
  135. {
  136. if (!bounds || !lower)
  137. goto error;
  138. bounds->lower = isl_multi_val_set_val(bounds->lower, pos, lower);
  139. if (!bounds->lower)
  140. return ppcg_ht_bounds_free(bounds);
  141. return bounds;
  142. error:
  143. ppcg_ht_bounds_free(bounds);
  144. isl_val_free(lower);
  145. return NULL;
  146. }
  147. /* Can the bounds on relative dependence distances recorded in "bounds"
  148. * be used to perform hybrid tiling?
  149. * In particular, have appropriate lower and upper bounds been found?
  150. * Any NaN indicates that no corresponding bound was found.
  151. */
  152. isl_bool ppcg_ht_bounds_is_valid(__isl_keep ppcg_ht_bounds *bounds)
  153. {
  154. isl_bool is_nan;
  155. int i, n;
  156. if (!bounds)
  157. return isl_bool_error;
  158. is_nan = isl_val_is_nan(bounds->upper);
  159. if (is_nan < 0)
  160. return isl_bool_error;
  161. if (is_nan)
  162. return isl_bool_false;
  163. n = isl_multi_val_dim(bounds->lower, isl_dim_set);
  164. for (i = 0; i < n; ++i) {
  165. isl_val *v;
  166. v = isl_multi_val_get_val(bounds->lower, i);
  167. is_nan = isl_val_is_nan(v);
  168. if (is_nan < 0)
  169. return isl_bool_error;
  170. if (is_nan)
  171. return isl_bool_false;
  172. isl_val_free(v);
  173. }
  174. return isl_bool_true;
  175. }
  176. /* Structure that represents the basic hexagonal tiling,
  177. * along with information that is needed to perform the hybrid tiling.
  178. *
  179. * "bounds" are the bounds on the dependence distances that
  180. * define the hexagonal shape and the required skewing in the remaining
  181. * space dimensions.
  182. *
  183. * "input_node" points to the input pair of band nodes.
  184. * "input_schedule" is the partial schedule of this input pair of band nodes.
  185. * The space of this schedule is [P -> C], where P is the space
  186. * of the parent node and C is the space of the child node.
  187. *
  188. * "space_sizes" represent the total size of a tile for the space
  189. * dimensions, i.e., those corresponding to the child node.
  190. * The space of "space_sizes" is C.
  191. * If S_0 is the original tile size in the first space dimension,
  192. * then the first entry of "space_sizes" is equal to
  193. * W = 2*S_0 + floor(d_l h) + floor(d_u h).
  194. * The remaining entries are the same as in the original tile sizes.
  195. *
  196. * The basic hexagonal tiling "hex" is defined
  197. * in a "ts" (time-space) space and corresponds to the phase-1 tiles.
  198. * "time_tile" maps the "ts" space to outer time tile.
  199. * Is is equal to ts[t, s] -> floor(t/(2 * S_t)), with S_t the original tile
  200. * size corresponding to the parent node.
  201. * "local_time" maps the "ts" space to the time dimension inside each tile.
  202. * It is equal to ts[t, s] -> t mod (2 S_t), with S_t the original tile
  203. * size corresponding to the parent node.
  204. * "shift_space" shifts the tiles at time tile T = floor(t/(2 S_t))
  205. * in the space dimension such that they align to a multiple of W.
  206. * It is equal to ts[t, s] -> s + (-(2 * shift_s)*T) % W,
  207. * with shift_s = S_0 + floor(d_u h).
  208. * "shift_phase" is the shift taken to go from phase 0 to phase 1
  209. * It is equal to ts[t, s] -> ts[t + S_t, s + shift_s],
  210. * with shift_s = S_0 + floor(d_u h).
  211. *
  212. * "project_ts" projects the space of the input schedule to the ts-space.
  213. * It is equal to [P[t] -> C[s_0, ...]] -> ts[t, s_0].
  214. */
  215. struct ppcg_ht_tiling {
  216. int ref;
  217. ppcg_ht_bounds *bounds;
  218. isl_schedule_node *input_node;
  219. isl_multi_union_pw_aff *input_schedule;
  220. isl_multi_val *space_sizes;
  221. isl_aff *time_tile;
  222. isl_aff *local_time;
  223. isl_aff *shift_space;
  224. isl_multi_aff *shift_phase;
  225. isl_set *hex;
  226. isl_multi_aff *project_ts;
  227. };
  228. typedef struct ppcg_ht_tiling ppcg_ht_tiling;
  229. /* Return the space of the pair of band nodes that form the input
  230. * to the hybrid tiling.
  231. * In particular, return the space [P -> C], where P is the space
  232. * of the parent node and C is the space of the child node.
  233. */
  234. __isl_give isl_space *ppcg_ht_tiling_get_input_space(
  235. __isl_keep ppcg_ht_tiling *tile)
  236. {
  237. if (!tile)
  238. return NULL;
  239. return isl_multi_union_pw_aff_get_space(tile->input_schedule);
  240. }
  241. /* Remove a reference to "tile" and free "tile" along with all its fields
  242. * as soon as the reference count drops to zero.
  243. */
  244. static __isl_null ppcg_ht_tiling *ppcg_ht_tiling_free(
  245. __isl_take ppcg_ht_tiling *tiling)
  246. {
  247. if (!tiling)
  248. return NULL;
  249. if (--tiling->ref > 0)
  250. return NULL;
  251. ppcg_ht_bounds_free(tiling->bounds);
  252. isl_schedule_node_free(tiling->input_node);
  253. isl_multi_union_pw_aff_free(tiling->input_schedule);
  254. isl_multi_val_free(tiling->space_sizes);
  255. isl_aff_free(tiling->time_tile);
  256. isl_aff_free(tiling->local_time);
  257. isl_aff_free(tiling->shift_space);
  258. isl_multi_aff_free(tiling->shift_phase);
  259. isl_set_free(tiling->hex);
  260. isl_multi_aff_free(tiling->project_ts);
  261. free(tiling);
  262. return NULL;
  263. }
  264. /* Return a new reference to "tiling".
  265. */
  266. __isl_give ppcg_ht_tiling *ppcg_ht_tiling_copy(
  267. __isl_keep ppcg_ht_tiling *tiling)
  268. {
  269. if (!tiling)
  270. return NULL;
  271. tiling->ref++;
  272. return tiling;
  273. }
  274. /* Return the isl_ctx to which "tiling" belongs.
  275. */
  276. isl_ctx *ppcg_ht_tiling_get_ctx(__isl_keep ppcg_ht_tiling *tiling)
  277. {
  278. if (!tiling)
  279. return NULL;
  280. return isl_multi_union_pw_aff_get_ctx(tiling->input_schedule);
  281. }
  282. /* Representation of one of the two phases of hybrid tiling.
  283. *
  284. * "tiling" points to the shared tiling data.
  285. *
  286. * "time_tile", "local_time" and "shift_space" are equal to the corresponding
  287. * fields of "tiling", pulled back to the input space.
  288. * In case of phase 0, these expressions have also been moved
  289. * from phase 1 to phase 0.
  290. *
  291. * "domain" contains the hexagonal tiling of this phase.
  292. *
  293. * "space_shift" is the shift that should be added to the space band
  294. * in order to be able to apply rectangular tiling to the space.
  295. * For phase 1, it is equal to
  296. *
  297. * [P[t] -> C[s_0, s_i]] -> C[(-(2 * shift_s)*T) % W, dl_i * u]
  298. *
  299. * with shift_s = S_0 + floor(d_u h),
  300. * T equal to "time_tile" and u equal to "local_time".
  301. * For phase 0, it is equal to
  302. *
  303. * [P[t] -> C[s_0, s_i]] -> C[shift_s + (-(2 * shift_s)*T) % W, dl_i * u]
  304. *
  305. * "space_tile" is the space tiling. It is equal to
  306. *
  307. * [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size]
  308. */
  309. struct ppcg_ht_phase {
  310. ppcg_ht_tiling *tiling;
  311. isl_aff *time_tile;
  312. isl_aff *local_time;
  313. isl_aff *shift_space;
  314. isl_set *domain;
  315. isl_multi_aff *space_shift;
  316. isl_multi_aff *space_tile;
  317. };
  318. /* Free "phase" along with all its fields.
  319. */
  320. static __isl_null ppcg_ht_phase *ppcg_ht_phase_free(
  321. __isl_take ppcg_ht_phase *phase)
  322. {
  323. if (!phase)
  324. return NULL;
  325. ppcg_ht_tiling_free(phase->tiling);
  326. isl_aff_free(phase->time_tile);
  327. isl_aff_free(phase->local_time);
  328. isl_aff_free(phase->shift_space);
  329. isl_set_free(phase->domain);
  330. isl_multi_aff_free(phase->space_shift);
  331. isl_multi_aff_free(phase->space_tile);
  332. free(phase);
  333. return NULL;
  334. }
  335. /* Wrapper around ppcg_ht_phase_free for use as an argument
  336. * to isl_id_set_free_user.
  337. */
  338. static void ppcg_ht_phase_free_wrap(void *user)
  339. {
  340. ppcg_ht_phase *phase = user;
  341. ppcg_ht_phase_free(phase);
  342. }
  343. /* Return the domain of hybrid tiling phase "phase".
  344. */
  345. static __isl_give isl_set *ppcg_ht_phase_get_domain(ppcg_ht_phase *phase)
  346. {
  347. if (!phase)
  348. return NULL;
  349. return isl_set_copy(phase->domain);
  350. }
  351. /* Return the space of the pair of band nodes that form the input
  352. * to the hybrid tiling of which "phase" is a phase.
  353. * In particular, return the space [P -> C], where P is the space
  354. * of the parent node and C is the space of the child node.
  355. */
  356. static __isl_give isl_space *ppcg_ht_phase_get_input_space(
  357. __isl_keep ppcg_ht_phase *phase)
  358. {
  359. if (!phase)
  360. return NULL;
  361. return ppcg_ht_tiling_get_input_space(phase->tiling);
  362. }
  363. /* Construct the lower left constraint of the hexagonal tile, i.e.,
  364. *
  365. * du a - b <= (2h+1) du - duh
  366. * -du a + b + (2h+1) du - duh >= 0
  367. *
  368. * where duh = floor(du * h).
  369. *
  370. * This constraint corresponds to (6) in
  371. * "Hybrid Hexagonal/Classical Tiling for GPUs".
  372. */
  373. static __isl_give isl_constraint *hex_lower_left(__isl_take isl_local_space *ls,
  374. __isl_keep isl_val *h, __isl_keep isl_val *du, __isl_keep isl_val *duh)
  375. {
  376. isl_val *v;
  377. isl_aff *aff;
  378. v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1);
  379. v = isl_val_mul(v, isl_val_copy(du));
  380. v = isl_val_sub(v, isl_val_copy(duh));
  381. aff = isl_aff_val_on_domain(ls, v);
  382. v = isl_val_neg(isl_val_copy(du));
  383. aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, v);
  384. aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, 1);
  385. return isl_inequality_from_aff(aff);
  386. }
  387. /* Construct the lower constraint of the hexagonal tile, i.e.,
  388. *
  389. * a <= 2h+1
  390. * -a + 2h+1 >= 0
  391. *
  392. * This constraint corresponds to (7) in
  393. * "Hybrid Hexagonal/Classical Tiling for GPUs".
  394. */
  395. static __isl_give isl_constraint *hex_lower(__isl_take isl_local_space *ls,
  396. __isl_keep isl_val *h)
  397. {
  398. isl_val *v;
  399. isl_aff *aff;
  400. v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1);
  401. aff = isl_aff_val_on_domain(ls, v);
  402. aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 0, -1);
  403. return isl_inequality_from_aff(aff);
  404. }
  405. /* Construct the lower right constraint of the hexagonal tile, i.e.,
  406. *
  407. * dl a + b <= (2h+1) dl + duh + (s0-1)
  408. * -dl a - b + (2h+1) dl + duh + (s0-1) >= 0
  409. *
  410. * where duh = floor(du * h).
  411. *
  412. * This constraint corresponds to (8) in
  413. * "Hybrid Hexagonal/Classical Tiling for GPUs".
  414. */
  415. static __isl_give isl_constraint *hex_lower_right(
  416. __isl_take isl_local_space *ls, __isl_keep isl_val *h,
  417. __isl_keep isl_val *s0, __isl_keep isl_val *dl, __isl_keep isl_val *duh)
  418. {
  419. isl_val *v;
  420. isl_aff *aff;
  421. v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1);
  422. v = isl_val_mul(v, isl_val_copy(dl));
  423. v = isl_val_add(v, isl_val_copy(duh));
  424. v = isl_val_add(v, isl_val_copy(s0));
  425. v = isl_val_sub_ui(v, 1);
  426. aff = isl_aff_val_on_domain(ls, v);
  427. v = isl_val_neg(isl_val_copy(dl));
  428. aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, v);
  429. aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, -1);
  430. return isl_inequality_from_aff(aff);
  431. }
  432. /* Construct the upper left constraint of the hexagonal tile, i.e.,
  433. *
  434. * dl a + b >= h dl - (d - 1)/d with d = den(dl)
  435. * dl a + b - h dl + (d - 1)/d >= 0
  436. *
  437. * This constraint corresponds to (10) in
  438. * "Hybrid Hexagonal/Classical Tiling for GPUs".
  439. */
  440. static __isl_give isl_constraint *hex_upper_left(__isl_take isl_local_space *ls,
  441. __isl_keep isl_val *h, __isl_keep isl_val *dl)
  442. {
  443. isl_val *v, *d;
  444. isl_aff *aff;
  445. d = isl_val_get_den_val(dl);
  446. v = isl_val_sub_ui(isl_val_copy(d), 1);
  447. v = isl_val_div(v, d);
  448. v = isl_val_sub(v, isl_val_mul(isl_val_copy(h), isl_val_copy(dl)));
  449. aff = isl_aff_val_on_domain(ls, v);
  450. aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, isl_val_copy(dl));
  451. aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, 1);
  452. return isl_inequality_from_aff(aff);
  453. }
  454. /* Construct the upper right constraint of the hexagonal tile, i.e.,
  455. *
  456. * du a - b >= du h - duh - (s0-1) - dlh - (d - 1)/d with d = den(du)
  457. * du a - b - du h + duh + (s0-1) + dlh + (d - 1)/d >= 0
  458. *
  459. * where dlh = floor(dl * h) and duh = floor(du * h).
  460. *
  461. * This constraint corresponds to (12) in
  462. * "Hybrid Hexagonal/Classical Tiling for GPUs".
  463. */
  464. static __isl_give isl_constraint *hex_upper_right(
  465. __isl_take isl_local_space *ls, __isl_keep isl_val *h,
  466. __isl_keep isl_val *s0, __isl_keep isl_val *du,
  467. __isl_keep isl_val *dlh, __isl_keep isl_val *duh)
  468. {
  469. isl_val *v, *d;
  470. isl_aff *aff;
  471. d = isl_val_get_den_val(du);
  472. v = isl_val_sub_ui(isl_val_copy(d), 1);
  473. v = isl_val_div(v, d);
  474. v = isl_val_sub(v, isl_val_mul(isl_val_copy(h), isl_val_copy(du)));
  475. v = isl_val_add(v, isl_val_copy(duh));
  476. v = isl_val_add(v, isl_val_copy(dlh));
  477. v = isl_val_add(v, isl_val_copy(s0));
  478. v = isl_val_sub_ui(v, 1);
  479. aff = isl_aff_val_on_domain(ls, v);
  480. aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, isl_val_copy(du));
  481. aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, -1);
  482. return isl_inequality_from_aff(aff);
  483. }
  484. /* Construct the uppper constraint of the hexagonal tile, i.e.,
  485. *
  486. * a >= 0
  487. *
  488. * This constraint corresponds to (13) in
  489. * "Hybrid Hexagonal/Classical Tiling for GPUs".
  490. */
  491. static __isl_give isl_constraint *hex_upper(__isl_take isl_local_space *ls)
  492. {
  493. isl_aff *aff;
  494. aff = isl_aff_var_on_domain(ls, isl_dim_set, 0);
  495. return isl_inequality_from_aff(aff);
  496. }
  497. /* Construct the basic hexagonal tile shape.
  498. * "space" is the 2D space in which the hexagon should be constructed.
  499. * h is st-1, with st the tile size in the time dimension
  500. * s0 is the tile size in the space dimension
  501. * dl is a bound on the negative relative dependence distances, i.e.,
  502. *
  503. * d_s >= -dl d_t
  504. *
  505. * du is a bound on the positive relative dependence distances, i.e.,
  506. *
  507. * d_s <= du d_t
  508. *
  509. * with (d_t,d_s) any dependence distance vector.
  510. * dlh = floor(dl * h)
  511. * duh = floor(du * h)
  512. *
  513. * The shape of the hexagon is as follows:
  514. *
  515. * 0 dlh dlh+s0-1
  516. * ______ __
  517. * 0 / \_ /
  518. * / \_ /
  519. * h / \ ______ /
  520. * h+1 \_ // \\_
  521. * \_ // \\_
  522. * 2h+1 \______// \\
  523. * 0 duh duh+s0-1
  524. * duh+s0-1+dlh
  525. * duh+s0-1+dlh+1+s0+1
  526. *
  527. * The next hexagon is shifted by duh + dlh + 2 * s0.
  528. *
  529. * The slope of the "/" constraints is dl.
  530. * The slope of the "\_" constraints is du.
  531. */
  532. static __isl_give isl_set *compute_hexagon(__isl_take isl_space *space,
  533. __isl_keep isl_val *h, __isl_keep isl_val *s0,
  534. __isl_keep isl_val *dl, __isl_keep isl_val *du,
  535. __isl_keep isl_val *dlh, __isl_keep isl_val *duh)
  536. {
  537. isl_local_space *ls;
  538. isl_constraint *c;
  539. isl_basic_set *bset;
  540. ls = isl_local_space_from_space(space);
  541. c = hex_lower_left(isl_local_space_copy(ls), h, du, duh);
  542. bset = isl_basic_set_from_constraint(c);
  543. c = hex_lower(isl_local_space_copy(ls), h);
  544. bset = isl_basic_set_add_constraint(bset, c);
  545. c = hex_lower_right(isl_local_space_copy(ls), h, s0, dl, duh);
  546. bset = isl_basic_set_add_constraint(bset, c);
  547. c = hex_upper_left(isl_local_space_copy(ls), h, dl);
  548. bset = isl_basic_set_add_constraint(bset, c);
  549. c = hex_upper_right(isl_local_space_copy(ls), h, s0, du, dlh, duh);
  550. bset = isl_basic_set_add_constraint(bset, c);
  551. c = hex_upper(ls);
  552. bset = isl_basic_set_add_constraint(bset, c);
  553. return isl_set_from_basic_set(bset);
  554. }
  555. /* Name of the ts-space.
  556. */
  557. static const char *ts_space_name = "ts";
  558. /* Construct and return the space ts[t, s].
  559. */
  560. static __isl_give isl_space *construct_ts_space(isl_ctx *ctx)
  561. {
  562. isl_space *s;
  563. s = isl_space_set_alloc(ctx, 0, 2);
  564. s = isl_space_set_tuple_name(s, isl_dim_set, ts_space_name);
  565. return s;
  566. }
  567. /* Name of the local ts-space.
  568. */
  569. static const char *local_ts_space_name = "local_ts";
  570. /* Construct and return the space local_ts[t, s].
  571. */
  572. static __isl_give isl_space *construct_local_ts_space(isl_ctx *ctx)
  573. {
  574. isl_space *s;
  575. s = isl_space_set_alloc(ctx, 0, 2);
  576. s = isl_space_set_tuple_name(s, isl_dim_set, local_ts_space_name);
  577. return s;
  578. }
  579. /* Compute the total size of a tile for the space dimensions,
  580. * i.e., those corresponding to the child node
  581. * of the input pattern.
  582. * If S_0 is the original tile size in the first space dimension,
  583. * then the first entry of "space_sizes" is equal to
  584. * W = 2*S_0 + floor(d_l h) + floor(d_u h).
  585. * The remaining entries are the same as in the original tile sizes.
  586. * "tile_sizes" contains the original tile sizes, including
  587. * the tile size corresponding to the parent node.
  588. * "dlh" is equal to floor(d_l h).
  589. * "duh" is equal to floor(d_u h).
  590. */
  591. static __isl_give isl_multi_val *compute_space_sizes(
  592. __isl_keep isl_multi_val *tile_sizes,
  593. __isl_keep isl_val *dlh, __isl_keep isl_val *duh)
  594. {
  595. isl_val *size;
  596. isl_multi_val *space_sizes;
  597. space_sizes = isl_multi_val_copy(tile_sizes);
  598. space_sizes = isl_multi_val_factor_range(space_sizes);
  599. size = isl_multi_val_get_val(space_sizes, 0);
  600. size = isl_val_mul_ui(size, 2);
  601. size = isl_val_add(size, isl_val_copy(duh));
  602. size = isl_val_add(size, isl_val_copy(dlh));
  603. space_sizes = isl_multi_val_set_val(space_sizes, 0, size);
  604. return space_sizes;
  605. }
  606. /* Compute the offset of phase 1 with respect to phase 0
  607. * in the ts-space ("space").
  608. * In particular, return
  609. *
  610. * ts[st, s0 + duh]
  611. */
  612. static __isl_give isl_multi_val *compute_phase_shift(
  613. __isl_keep isl_space *space, __isl_keep isl_val *st,
  614. __isl_keep isl_val *s0, __isl_keep isl_val *duh)
  615. {
  616. isl_val *v;
  617. isl_multi_val *phase_shift;
  618. phase_shift = isl_multi_val_zero(isl_space_copy(space));
  619. phase_shift = isl_multi_val_set_val(phase_shift, 0, isl_val_copy(st));
  620. v = isl_val_add(isl_val_copy(duh), isl_val_copy(s0));
  621. phase_shift = isl_multi_val_set_val(phase_shift, 1, v);
  622. return phase_shift;
  623. }
  624. /* Return the function
  625. *
  626. * ts[t, s] -> floor(t/(2 * st))
  627. *
  628. * representing the time tile.
  629. * "space" is the space ts[t, s].
  630. */
  631. static __isl_give isl_aff *compute_time_tile(__isl_keep isl_space *space,
  632. __isl_keep isl_val *st)
  633. {
  634. isl_val *v;
  635. isl_aff *t;
  636. isl_local_space *ls;
  637. ls = isl_local_space_from_space(isl_space_copy(space));
  638. t = isl_aff_var_on_domain(ls, isl_dim_set, 0);
  639. v = isl_val_mul_ui(isl_val_copy(st), 2);
  640. t = isl_aff_floor(isl_aff_scale_down_val(t, v));
  641. return t;
  642. }
  643. /* Compute a shift in the space dimension for tiles
  644. * at time tile T = floor(t/(2 * S_t))
  645. * such that they align to a multiple of the total space tile dimension W.
  646. * In particular, compute
  647. *
  648. * ts[t, s] -> s + (-(2 * shift_s)*T) % W
  649. *
  650. * where shift_s is the shift of phase 1 with respect to phase 0
  651. * in the space dimension (the first element of "phase_shift").
  652. * W is stored in the first element of "space_sizes".
  653. * "time_tile" is the function
  654. *
  655. * ts[t, s] -> floor(t/(2 * S_T))
  656. *
  657. * Since phase 1 is shifted by shift_s with respect to phase 0,
  658. * the next line of phase 0 (at T+1) is shifted by 2*shift_s
  659. * with respect to the previous line (at T).
  660. * A shift of -(2 * shift_s)*T therefore allows the basic pattern
  661. * (which starts at 0) to be applied.
  662. * However, this shift will be used to obtain the tile coordinate
  663. * in the first space dimension and if the original values
  664. * in the space dimension are non-negative, then the shift should
  665. * not make them negative. Moreover, the shift should be as minimal
  666. * as possible.
  667. * Since the pattern repeats itself with a period of W in the space
  668. * dimension, the shift can be replaced by (-(2 * shift_s)*T) % W.
  669. */
  670. static __isl_give isl_aff *compute_shift_space(__isl_keep isl_aff *time_tile,
  671. __isl_keep isl_multi_val *space_sizes,
  672. __isl_keep isl_multi_val *phase_shift)
  673. {
  674. isl_val *v;
  675. isl_aff *s, *t;
  676. isl_local_space *ls;
  677. ls = isl_local_space_from_space(isl_aff_get_domain_space(time_tile));
  678. t = isl_aff_copy(time_tile);
  679. v = isl_val_mul_ui(isl_multi_val_get_val(phase_shift, 1), 2);
  680. v = isl_val_neg(v);
  681. t = isl_aff_scale_val(t, v);
  682. v = isl_multi_val_get_val(space_sizes, 0);
  683. t = isl_aff_mod_val(t, v);
  684. s = isl_aff_var_on_domain(ls, isl_dim_set, 1);
  685. s = isl_aff_add(s, t);
  686. return s;
  687. }
  688. /* Give the phase_shift ts[S_t, S_0 + floor(d_u h)],
  689. * compute a function that applies the shift, i.e.,
  690. *
  691. * ts[t, s] -> ts[t + S_t, s + S_0 + floor(d_u h)],
  692. */
  693. static __isl_give isl_multi_aff *compute_shift_phase(
  694. __isl_keep isl_multi_val *phase_shift)
  695. {
  696. isl_space *space;
  697. isl_multi_aff *shift;
  698. space = isl_multi_val_get_space(phase_shift);
  699. shift = isl_multi_aff_multi_val_on_space(space,
  700. isl_multi_val_copy(phase_shift));
  701. space = isl_multi_aff_get_space(shift);
  702. shift = isl_multi_aff_add(shift, isl_multi_aff_identity(space));
  703. return shift;
  704. }
  705. /* Compute a mapping from the ts-space to the local coordinates
  706. * within each tile. In particular, compute
  707. *
  708. * ts[t, s] -> local_ts[t % (2 S_t), (s + (-(2 * shift_s)*T) % W) % W]
  709. *
  710. * "ts" is the space ts[t, s]
  711. * "local_ts" is the space local_ts[t, s]
  712. * "shift_space" is equal to ts[t, s] -> s + (-(2 * shift_s)*T) % W
  713. * "st" is the tile size in the time dimension S_t.
  714. * The first element of "space_sizes" is equal to W.
  715. */
  716. static __isl_give isl_multi_aff *compute_localize(
  717. __isl_keep isl_space *local_ts, __isl_keep isl_aff *shift_space,
  718. __isl_keep isl_val *st, __isl_keep isl_multi_val *space_sizes)
  719. {
  720. isl_val *v;
  721. isl_space *space;
  722. isl_aff *s, *t;
  723. isl_multi_aff *localize;
  724. space = isl_aff_get_domain_space(shift_space);
  725. local_ts = isl_space_copy(local_ts);
  726. space = isl_space_map_from_domain_and_range(space, local_ts);
  727. localize = isl_multi_aff_identity(space);
  728. t = isl_multi_aff_get_aff(localize, 0);
  729. v = isl_val_mul_ui(isl_val_copy(st), 2);
  730. t = isl_aff_mod_val(t, v);
  731. localize = isl_multi_aff_set_aff(localize, 0, t);
  732. s = isl_aff_copy(shift_space);
  733. v = isl_multi_val_get_val(space_sizes, 0);
  734. s = isl_aff_mod_val(s, v);
  735. localize = isl_multi_aff_set_aff(localize, 1, s);
  736. return localize;
  737. }
  738. /* Set the project_ts field of "tiling".
  739. *
  740. * This field projects the space of the input schedule to the ts-space.
  741. * It is equal to [P[t] -> C[s_0, ...]] -> ts[t, s_0].
  742. */
  743. static __isl_give ppcg_ht_tiling *ppcg_ht_tiling_set_project_ts(
  744. __isl_take ppcg_ht_tiling *tiling)
  745. {
  746. int n;
  747. isl_space *space;
  748. isl_multi_aff *project;
  749. if (!tiling)
  750. return NULL;
  751. space = ppcg_ht_tiling_get_input_space(tiling);
  752. n = isl_space_dim(space, isl_dim_set);
  753. project = isl_multi_aff_project_out_map(space, isl_dim_set, 2, n - 2);
  754. project = isl_multi_aff_set_tuple_name(project,
  755. isl_dim_out, ts_space_name);
  756. if (!project)
  757. return ppcg_ht_tiling_free(tiling);
  758. tiling->project_ts = project;
  759. return tiling;
  760. }
  761. /* Construct a hybrid tiling description from bounds on the dependence
  762. * distances "bounds".
  763. * "input_node" points to the original parent node.
  764. * "input_schedule" is the combined schedule of the parent and child
  765. * node in the input.
  766. * "tile_sizes" are the original, user specified tile sizes.
  767. */
  768. static __isl_give ppcg_ht_tiling *ppcg_ht_bounds_construct_tiling(
  769. __isl_take ppcg_ht_bounds *bounds,
  770. __isl_keep isl_schedule_node *input_node,
  771. __isl_keep isl_multi_union_pw_aff *input_schedule,
  772. __isl_keep isl_multi_val *tile_sizes)
  773. {
  774. isl_ctx *ctx;
  775. ppcg_ht_tiling *tiling;
  776. isl_multi_val *space_sizes, *phase_shift;
  777. isl_aff *time_tile, *shift_space;
  778. isl_multi_aff *localize;
  779. isl_val *h, *duh, *dlh;
  780. isl_val *st, *s0, *du, *dl;
  781. isl_space *ts, *local_ts;
  782. if (!bounds || !input_node || !input_schedule || !tile_sizes)
  783. goto error;
  784. ctx = isl_multi_union_pw_aff_get_ctx(input_schedule);
  785. tiling = isl_calloc_type(ctx, struct ppcg_ht_tiling);
  786. if (!tiling)
  787. goto error;
  788. tiling->ref = 1;
  789. st = isl_multi_val_get_val(tile_sizes, 0);
  790. h = isl_val_sub_ui(isl_val_copy(st), 1);
  791. s0 = isl_multi_val_get_val(tile_sizes, 1);
  792. du = ppcg_ht_bounds_get_upper(bounds);
  793. dl = ppcg_ht_bounds_get_lower(bounds, 0);
  794. duh = isl_val_floor(isl_val_mul(isl_val_copy(du), isl_val_copy(h)));
  795. dlh = isl_val_floor(isl_val_mul(isl_val_copy(dl), isl_val_copy(h)));
  796. ts = construct_ts_space(ctx);
  797. local_ts = construct_local_ts_space(ctx);
  798. space_sizes = compute_space_sizes(tile_sizes, dlh, duh);
  799. phase_shift = compute_phase_shift(ts, st, s0, duh);
  800. time_tile = compute_time_tile(ts, st);
  801. shift_space = compute_shift_space(time_tile, space_sizes, phase_shift);
  802. localize = compute_localize(local_ts, shift_space, st, space_sizes);
  803. isl_space_free(ts);
  804. tiling->input_node = isl_schedule_node_copy(input_node);
  805. tiling->input_schedule = isl_multi_union_pw_aff_copy(input_schedule);
  806. tiling->space_sizes = space_sizes;
  807. tiling->bounds = bounds;
  808. tiling->local_time = isl_multi_aff_get_aff(localize, 0);
  809. tiling->hex = compute_hexagon(local_ts, h, s0, dl, du, dlh, duh);
  810. tiling->hex = isl_set_preimage_multi_aff(tiling->hex, localize);
  811. tiling->time_tile = time_tile;
  812. tiling->shift_space = shift_space;
  813. tiling->shift_phase = compute_shift_phase(phase_shift);
  814. isl_multi_val_free(phase_shift);
  815. isl_val_free(duh);
  816. isl_val_free(dlh);
  817. isl_val_free(du);
  818. isl_val_free(dl);
  819. isl_val_free(s0);
  820. isl_val_free(st);
  821. isl_val_free(h);
  822. if (!tiling->input_schedule || !tiling->local_time || !tiling->hex ||
  823. !tiling->shift_space || !tiling->shift_phase)
  824. return ppcg_ht_tiling_free(tiling);
  825. tiling = ppcg_ht_tiling_set_project_ts(tiling);
  826. return tiling;
  827. error:
  828. ppcg_ht_bounds_free(bounds);
  829. return NULL;
  830. }
  831. /* Are all members of the band node "node" coincident?
  832. */
  833. static isl_bool all_coincident(__isl_keep isl_schedule_node *node)
  834. {
  835. int i, n;
  836. n = isl_schedule_node_band_n_member(node);
  837. for (i = 0; i < n; ++i) {
  838. isl_bool c;
  839. c = isl_schedule_node_band_member_get_coincident(node, i);
  840. if (c < 0 || !c)
  841. return c;
  842. }
  843. return isl_bool_true;
  844. }
  845. /* Does "node" satisfy the properties of the inner node in the input
  846. * pattern for hybrid tiling?
  847. * That is, is it a band node with only coincident members, of which
  848. * there is at least one?
  849. */
  850. static isl_bool has_child_properties(__isl_keep isl_schedule_node *node)
  851. {
  852. if (!node)
  853. return isl_bool_error;
  854. if (isl_schedule_node_get_type(node) != isl_schedule_node_band)
  855. return isl_bool_false;
  856. if (isl_schedule_node_band_n_member(node) < 1)
  857. return isl_bool_false;
  858. return all_coincident(node);
  859. }
  860. /* Does "node" satisfy the properties of the outer node in the input
  861. * pattern for hybrid tiling?
  862. * That is, is it a band node with a single member?
  863. */
  864. static isl_bool has_parent_properties(__isl_keep isl_schedule_node *node)
  865. {
  866. if (!node)
  867. return isl_bool_error;
  868. if (isl_schedule_node_get_type(node) != isl_schedule_node_band)
  869. return isl_bool_false;
  870. if (isl_schedule_node_band_n_member(node) != 1)
  871. return isl_bool_false;
  872. return isl_bool_true;
  873. }
  874. /* Does the parent of "node" satisfy the input patttern for hybrid tiling?
  875. * That is, does "node" satisfy the properties of the inner node and
  876. * does the parent of "node" satisfy the properties of the outer node?
  877. */
  878. isl_bool ppcg_ht_parent_has_input_pattern(__isl_keep isl_schedule_node *node)
  879. {
  880. isl_bool has_pattern;
  881. has_pattern = has_child_properties(node);
  882. if (has_pattern < 0 || !has_pattern)
  883. return has_pattern;
  884. node = isl_schedule_node_copy(node);
  885. node = isl_schedule_node_parent(node);
  886. has_pattern = has_parent_properties(node);
  887. isl_schedule_node_free(node);
  888. return has_pattern;
  889. }
  890. /* Does "node" satisfy the input patttern for hybrid tiling?
  891. * That is, does "node" satisfy the properties of the outer node and
  892. * does the child of "node" satisfy the properties of the inner node?
  893. */
  894. isl_bool ppcg_ht_has_input_pattern(__isl_keep isl_schedule_node *node)
  895. {
  896. isl_bool has_pattern;
  897. has_pattern = has_parent_properties(node);
  898. if (has_pattern < 0 || !has_pattern)
  899. return has_pattern;
  900. node = isl_schedule_node_get_child(node, 0);
  901. has_pattern = has_child_properties(node);
  902. isl_schedule_node_free(node);
  903. return has_pattern;
  904. }
  905. /* Check that "node" satisfies the input pattern for hybrid tiling.
  906. * Error out if it does not.
  907. */
  908. static isl_stat check_input_pattern(__isl_keep isl_schedule_node *node)
  909. {
  910. isl_bool has_pattern;
  911. has_pattern = ppcg_ht_has_input_pattern(node);
  912. if (has_pattern < 0)
  913. return isl_stat_error;
  914. if (!has_pattern)
  915. isl_die(isl_schedule_node_get_ctx(node), isl_error_invalid,
  916. "invalid input pattern for hybrid tiling",
  917. return isl_stat_error);
  918. return isl_stat_ok;
  919. }
  920. /* Extract the input schedule from "node", i.e., the product
  921. * of the partial schedules of the parent and child nodes
  922. * in the input pattern.
  923. */
  924. static __isl_give isl_multi_union_pw_aff *extract_input_schedule(
  925. __isl_keep isl_schedule_node *node)
  926. {
  927. isl_multi_union_pw_aff *partial, *partial2;
  928. partial = isl_schedule_node_band_get_partial_schedule(node);
  929. node = isl_schedule_node_get_child(node, 0);
  930. partial2 = isl_schedule_node_band_get_partial_schedule(node);
  931. isl_schedule_node_free(node);
  932. return isl_multi_union_pw_aff_range_product(partial, partial2);
  933. }
  934. /* Collect all dependences from "scop" that are relevant for performing
  935. * hybrid tiling on "node" and its child and map them to the schedule
  936. * space of this pair of nodes.
  937. *
  938. * In case live range reordering is not used,
  939. * the flow and the false dependences are collected.
  940. * In case live range reordering is used,
  941. * the flow and the forced dependences are collected, as well
  942. * as the order dependences that are adjacent to non-local
  943. * flow dependences.
  944. *
  945. * In all cases, only dependences that map to the same instance
  946. * of the outer part of the schedule are considered.
  947. */
  948. static __isl_give isl_map *collect_deps(struct ppcg_scop *scop,
  949. __isl_keep isl_schedule_node *node)
  950. {
  951. isl_space *space;
  952. isl_multi_union_pw_aff *prefix, *partial;
  953. isl_union_map *flow, *other, *dep, *umap;
  954. isl_map *map;
  955. prefix = isl_schedule_node_get_prefix_schedule_multi_union_pw_aff(node);
  956. partial = extract_input_schedule(node);
  957. space = isl_multi_union_pw_aff_get_space(partial);
  958. flow = isl_union_map_copy(scop->dep_flow);
  959. flow = isl_union_map_eq_at_multi_union_pw_aff(flow,
  960. isl_multi_union_pw_aff_copy(prefix));
  961. if (!scop->options->live_range_reordering) {
  962. other = isl_union_map_copy(scop->dep_false);
  963. other = isl_union_map_eq_at_multi_union_pw_aff(other, prefix);
  964. } else {
  965. isl_union_map *local, *non_local, *order, *adj;
  966. isl_union_set *domain, *range;
  967. other = isl_union_map_copy(scop->dep_forced);
  968. other = isl_union_map_eq_at_multi_union_pw_aff(other,
  969. isl_multi_union_pw_aff_copy(prefix));
  970. local = isl_union_map_copy(flow);
  971. local = isl_union_map_eq_at_multi_union_pw_aff(local,
  972. isl_multi_union_pw_aff_copy(partial));
  973. non_local = isl_union_map_copy(flow);
  974. non_local = isl_union_map_subtract(non_local, local);
  975. order = isl_union_map_copy(scop->dep_order);
  976. order = isl_union_map_eq_at_multi_union_pw_aff(order, prefix);
  977. adj = isl_union_map_copy(order);
  978. domain = isl_union_map_domain(isl_union_map_copy(non_local));
  979. domain = isl_union_set_coalesce(domain);
  980. adj = isl_union_map_intersect_range(adj, domain);
  981. other = isl_union_map_union(other, adj);
  982. adj = order;
  983. range = isl_union_map_range(non_local);
  984. range = isl_union_set_coalesce(range);
  985. adj = isl_union_map_intersect_domain(adj, range);
  986. other = isl_union_map_union(other, adj);
  987. }
  988. dep = isl_union_map_union(flow, other);
  989. umap = isl_union_map_from_multi_union_pw_aff(partial);
  990. dep = isl_union_map_apply_domain(dep, isl_union_map_copy(umap));
  991. dep = isl_union_map_apply_range(dep, umap);
  992. space = isl_space_map_from_set(space);
  993. map = isl_union_map_extract_map(dep, space);
  994. isl_union_map_free(dep);
  995. map = isl_map_coalesce(map);
  996. return map;
  997. }
  998. /* Given a constraint of the form
  999. *
  1000. * a i_0 + b i_1 >= 0
  1001. * or
  1002. * a i_0 + b i_1 = 0
  1003. *
  1004. * use it to update one or both of the non-negative bounds
  1005. * in "list" = (min, max) such that
  1006. *
  1007. * i_1 >= -min i_0
  1008. * and
  1009. * i_1 <= max i_0
  1010. *
  1011. * If b = 0, then the constraint cannot be used.
  1012. * Otherwise, the constraint is equivalent to
  1013. *
  1014. * sgn(b) i_1 >= - a/abs(b) i_0
  1015. * i.e.,
  1016. * i_1 >= - a/abs(b) i_0
  1017. * or
  1018. * i_1 <= a/abs(b) i_0
  1019. *
  1020. * Set the first or second element of "list" to max(0, a/abs(b)),
  1021. * according to the sign of "b". Or set both in case the constraint
  1022. * is an equality, taking into account the sign change.
  1023. */
  1024. static __isl_give isl_val_list *list_set_min_max(__isl_take isl_val_list *list,
  1025. __isl_keep isl_constraint *c)
  1026. {
  1027. isl_val *a, *b;
  1028. int sign;
  1029. int pos;
  1030. isl_bool eq, is_zero, is_neg;
  1031. eq = isl_constraint_is_equality(c);
  1032. if (eq < 0)
  1033. return isl_val_list_free(list);
  1034. b = isl_constraint_get_coefficient_val(c, isl_dim_set, 1);
  1035. is_zero = isl_val_is_zero(b);
  1036. if (is_zero == isl_bool_true) {
  1037. isl_val_free(b);
  1038. return list;
  1039. }
  1040. a = isl_constraint_get_coefficient_val(c, isl_dim_set, 0);
  1041. sign = isl_val_sgn(b);
  1042. b = isl_val_abs(b);
  1043. a = isl_val_div(a, b);
  1044. if (eq)
  1045. b = isl_val_copy(a);
  1046. pos = sign > 0 ? 0 : 1;
  1047. is_neg = isl_val_is_neg(a);
  1048. if (is_neg == isl_bool_true)
  1049. a = isl_val_set_si(a, 0);
  1050. list = isl_val_list_set_val(list, pos, a);
  1051. if (!eq)
  1052. return is_neg < 0 ? isl_val_list_free(list) : list;
  1053. pos = 1 - pos;
  1054. a = isl_val_neg(b);
  1055. is_neg = isl_val_is_neg(a);
  1056. if (is_neg == isl_bool_true)
  1057. a = isl_val_set_si(a, 0);
  1058. list = isl_val_list_set_val(list, pos, a);
  1059. return is_neg < 0 ? isl_val_list_free(list) : list;
  1060. }
  1061. /* If constraint "c" passes through the origin, then try and use it
  1062. * to update the non-negative bounds in "list" = (min, max) such that
  1063. *
  1064. * i_1 >= -min i_0
  1065. * and
  1066. * i_1 <= max i_0
  1067. */
  1068. static isl_stat set_min_max(__isl_take isl_constraint *c, void *user)
  1069. {
  1070. isl_val *v;
  1071. isl_val_list **list = user;
  1072. isl_bool is_zero;
  1073. v = isl_constraint_get_constant_val(c);
  1074. is_zero = isl_val_is_zero(v);
  1075. isl_val_free(v);
  1076. if (is_zero == isl_bool_true)
  1077. *list = list_set_min_max(*list, c);
  1078. isl_constraint_free(c);
  1079. return is_zero < 0 ? isl_stat_error : isl_stat_ok;
  1080. }
  1081. /* Given a set of dependence distance vectors "dist", compute
  1082. * pair of non-negative bounds min and max such that
  1083. *
  1084. * d_pos >= -min d_0
  1085. * and
  1086. * d_pos <= max d_0
  1087. *
  1088. * and return the pair (min, max).
  1089. * If no bound can be found in either direction, then the bound
  1090. * is replaced by NaN.
  1091. *
  1092. * The dependence distances are first projected onto the (d_0, d_pos).
  1093. * Then the zero dependence distance is added and the convex hull is computed.
  1094. * Finally, the bounds are extracted from the constraints of the convex hull
  1095. * that pass through the origin.
  1096. */
  1097. static __isl_give isl_val_list *min_max_dist(__isl_keep isl_set *dist, int pos)
  1098. {
  1099. isl_space *space;
  1100. isl_basic_set *hull;
  1101. int dim;
  1102. isl_ctx *ctx;
  1103. isl_val *nan;
  1104. isl_val_list *list;
  1105. ctx = isl_set_get_ctx(dist);
  1106. nan = isl_val_nan(ctx);
  1107. list = isl_val_list_alloc(ctx, 2);
  1108. list = isl_val_list_add(list, isl_val_copy(nan));
  1109. list = isl_val_list_add(list, nan);
  1110. dist = isl_set_copy(dist);
  1111. dim = isl_set_dim(dist, isl_dim_set);
  1112. if (dist && pos >= dim)
  1113. isl_die(ctx, isl_error_internal, "position out of bounds",
  1114. dist = isl_set_free(dist));
  1115. dist = isl_set_project_out(dist, isl_dim_set, pos + 1, dim - (pos + 1));
  1116. dist = isl_set_project_out(dist, isl_dim_set, 1, pos - 1);
  1117. space = isl_set_get_space(dist);
  1118. dist = isl_set_union(dist, isl_set_from_point(isl_point_zero(space)));
  1119. dist = isl_set_remove_divs(dist);
  1120. hull = isl_set_convex_hull(dist);
  1121. if (isl_basic_set_foreach_constraint(hull, &set_min_max, &list) < 0)
  1122. list = isl_val_list_free(list);
  1123. isl_basic_set_free(hull);
  1124. return list;
  1125. }
  1126. /* Given a schedule node "node" that, together with its child,
  1127. * satisfies the input pattern for hybrid tiling, compute bounds
  1128. * on the relative dependence distances of the child node with
  1129. * respect to the parent node. These bounds are needed to
  1130. * construct a hybrid tiling.
  1131. *
  1132. * First all relevant dependences are collected and mapped
  1133. * to the schedule space of the pair of nodes. Then, the
  1134. * dependence distances are computed in this space.
  1135. *
  1136. * These dependence distances are then projected onto a two-dimensional
  1137. * space consisting of the single schedule dimension of the outer node
  1138. * and one of the schedule dimensions of the inner node.
  1139. * The maximal and minimal relative dependence distances are extracted
  1140. * from these projections.
  1141. * This process is repeated for each of the schedule dimensions
  1142. * of the inner node. For the first dimension, both minimal and
  1143. * maximal relative dependence distances are stored in the result.
  1144. * For the other dimensions, only the minimal relative dependence
  1145. * distance is stored.
  1146. */
  1147. __isl_give ppcg_ht_bounds *ppcg_ht_compute_bounds(struct ppcg_scop *scop,
  1148. __isl_keep isl_schedule_node *node)
  1149. {
  1150. ppcg_ht_bounds *bnd;
  1151. isl_space *space;
  1152. isl_map *map;
  1153. isl_set *dist;
  1154. isl_val_list *pair;
  1155. isl_schedule_node *child;
  1156. int n;
  1157. int i, dim;
  1158. if (!scop || !node || check_input_pattern(node) < 0)
  1159. return NULL;
  1160. child = isl_schedule_node_get_child(node, 0);
  1161. space = isl_schedule_node_band_get_space(child);
  1162. dim = isl_schedule_node_band_n_member(child);
  1163. isl_schedule_node_free(child);
  1164. bnd = ppcg_ht_bounds_alloc(space);
  1165. if (!bnd)
  1166. return NULL;
  1167. map = collect_deps(scop, node);
  1168. dist = isl_map_deltas(map);
  1169. n = isl_set_dim(dist, isl_dim_param);
  1170. dist = isl_set_project_out(dist, isl_dim_param, 0, n);
  1171. pair = min_max_dist(dist, 1);
  1172. bnd = ppcg_ht_bounds_set_lower(bnd, 0, isl_val_list_get_val(pair, 0));
  1173. bnd = ppcg_ht_bounds_set_upper(bnd, isl_val_list_get_val(pair, 1));
  1174. isl_val_list_free(pair);
  1175. for (i = 1; i < dim; ++i) {
  1176. pair = min_max_dist(dist, 1 + i);
  1177. bnd = ppcg_ht_bounds_set_lower(bnd, i,
  1178. isl_val_list_get_val(pair, 0));
  1179. isl_val_list_free(pair);
  1180. }
  1181. isl_set_free(dist);
  1182. return bnd;
  1183. }
  1184. /* Check if all the fields of "phase" are valid, freeing "phase"
  1185. * if they are not.
  1186. */
  1187. static __isl_give ppcg_ht_phase *check_phase(__isl_take ppcg_ht_phase *phase)
  1188. {
  1189. if (!phase)
  1190. return NULL;
  1191. if (!phase->tiling || !phase->local_time ||
  1192. !phase->shift_space || !phase->domain)
  1193. return ppcg_ht_phase_free(phase);
  1194. return phase;
  1195. }
  1196. /* Construct a ppcg_ht_phase object, that simply copies
  1197. * information from "tiling".
  1198. * That is, the result is defined over the "ts" space and
  1199. * corresponds to phase 1.
  1200. */
  1201. static __isl_give ppcg_ht_phase *construct_phase(
  1202. __isl_keep ppcg_ht_tiling *tiling)
  1203. {
  1204. isl_ctx *ctx;
  1205. ppcg_ht_phase *phase;
  1206. if (!tiling)
  1207. return NULL;
  1208. ctx = ppcg_ht_tiling_get_ctx(tiling);
  1209. phase = isl_calloc_type(ctx, struct ppcg_ht_phase);
  1210. if (!phase)
  1211. return NULL;
  1212. phase->tiling = ppcg_ht_tiling_copy(tiling);
  1213. phase->time_tile = isl_aff_copy(tiling->time_tile);
  1214. phase->local_time = isl_aff_copy(tiling->local_time);
  1215. phase->shift_space = isl_aff_copy(tiling->shift_space);
  1216. phase->domain = isl_set_copy(tiling->hex);
  1217. return check_phase(phase);
  1218. }
  1219. /* Align the parameters of the elements of "phase" to those of "space".
  1220. */
  1221. static __isl_give ppcg_ht_phase *phase_align_params(
  1222. __isl_take ppcg_ht_phase *phase, __isl_take isl_space *space)
  1223. {
  1224. if (!phase)
  1225. goto error;
  1226. phase->time_tile = isl_aff_align_params(phase->time_tile,
  1227. isl_space_copy(space));
  1228. phase->local_time = isl_aff_align_params(phase->local_time,
  1229. isl_space_copy(space));
  1230. phase->shift_space = isl_aff_align_params(phase->shift_space,
  1231. isl_space_copy(space));
  1232. phase->domain = isl_set_align_params(phase->domain, space);
  1233. return check_phase(phase);
  1234. error:
  1235. isl_space_free(space);
  1236. return NULL;
  1237. }
  1238. /* Pull back "phase" over "ma".
  1239. * That is, take a phase defined over the range of "ma" and
  1240. * turn it into a phase defined over the domain of "ma".
  1241. */
  1242. static __isl_give ppcg_ht_phase *pullback_phase(__isl_take ppcg_ht_phase *phase,
  1243. __isl_take isl_multi_aff *ma)
  1244. {
  1245. phase = phase_align_params(phase, isl_multi_aff_get_space(ma));
  1246. if (!phase)
  1247. goto error;
  1248. phase->time_tile = isl_aff_pullback_multi_aff(phase->time_tile,
  1249. isl_multi_aff_copy(ma));
  1250. phase->local_time = isl_aff_pullback_multi_aff(phase->local_time,
  1251. isl_multi_aff_copy(ma));
  1252. phase->shift_space = isl_aff_pullback_multi_aff(phase->shift_space,
  1253. isl_multi_aff_copy(ma));
  1254. phase->domain = isl_set_preimage_multi_aff(phase->domain, ma);
  1255. return check_phase(phase);
  1256. error:
  1257. isl_multi_aff_free(ma);
  1258. return NULL;
  1259. }
  1260. /* Pullback "phase" over phase->tiling->shift_phase, which shifts
  1261. * phase 0 to phase 1. The pullback therefore takes a phase 1
  1262. * description and turns it into a phase 0 description.
  1263. */
  1264. static __isl_give ppcg_ht_phase *shift_phase(__isl_take ppcg_ht_phase *phase)
  1265. {
  1266. ppcg_ht_tiling *tiling;
  1267. if (!phase)
  1268. return NULL;
  1269. tiling = phase->tiling;
  1270. return pullback_phase(phase, isl_multi_aff_copy(tiling->shift_phase));
  1271. }
  1272. /* Take a "phase" defined over the ts-space and plug in the projection
  1273. * from the input schedule space to the ts-space.
  1274. * The result is then defined over this input schedule space.
  1275. */
  1276. static __isl_give ppcg_ht_phase *lift_phase(__isl_take ppcg_ht_phase *phase)
  1277. {
  1278. ppcg_ht_tiling *tiling;
  1279. if (!phase)
  1280. return NULL;
  1281. tiling = phase->tiling;
  1282. return pullback_phase(phase, isl_multi_aff_copy(tiling->project_ts));
  1283. }
  1284. /* Compute the shift that should be added to the space band
  1285. * in order to be able to apply rectangular tiling to the space.
  1286. * Store the shift in phase->space_shift.
  1287. *
  1288. * In the first dimension, it is equal to shift_space - s.
  1289. * For phase 1, this results in
  1290. *
  1291. * (-(2 * shift_s)*T) % W
  1292. *
  1293. * In phase 0, the "s" in shift_space has been replaced by "s + shift_s",
  1294. * so the result is
  1295. *
  1296. * shift_s + (-(2 * shift_s)*T) % W
  1297. *
  1298. * In the other dimensions, the shift is equal to
  1299. *
  1300. * dl_i * local_time.
  1301. */
  1302. static __isl_give ppcg_ht_phase *compute_space_shift(
  1303. __isl_take ppcg_ht_phase *phase)
  1304. {
  1305. int i, n;
  1306. isl_space *space;
  1307. isl_local_space *ls;
  1308. isl_aff *aff, *s;
  1309. isl_multi_aff *space_shift;
  1310. if (!phase)
  1311. return NULL;
  1312. space = ppcg_ht_phase_get_input_space(phase);
  1313. space = isl_space_unwrap(space);
  1314. space = isl_space_range_map(space);
  1315. space_shift = isl_multi_aff_zero(space);
  1316. aff = isl_aff_copy(phase->shift_space);
  1317. ls = isl_local_space_from_space(isl_aff_get_domain_space(aff));
  1318. s = isl_aff_var_on_domain(ls, isl_dim_set, 1);
  1319. aff = isl_aff_sub(aff, s);
  1320. space_shift = isl_multi_aff_set_aff(space_shift, 0, aff);
  1321. n = isl_multi_aff_dim(space_shift, isl_dim_out);
  1322. for (i = 1; i < n; ++i) {
  1323. isl_val *v;
  1324. isl_aff *time;
  1325. v = ppcg_ht_bounds_get_lower(phase->tiling->bounds, i);
  1326. time = isl_aff_copy(phase->local_time);
  1327. time = isl_aff_scale_val(time, v);
  1328. space_shift = isl_multi_aff_set_aff(space_shift, i, time);
  1329. }
  1330. if (!space_shift)
  1331. return ppcg_ht_phase_free(phase);
  1332. phase->space_shift = space_shift;
  1333. return phase;
  1334. }
  1335. /* Compute the space tiling and store the result in phase->space_tile.
  1336. * The space tiling is of the form
  1337. *
  1338. * [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size]
  1339. */
  1340. static __isl_give ppcg_ht_phase *compute_space_tile(
  1341. __isl_take ppcg_ht_phase *phase)
  1342. {
  1343. isl_space *space;
  1344. isl_multi_val *space_sizes;
  1345. isl_multi_aff *space_shift;
  1346. isl_multi_aff *tile;
  1347. if (!phase)
  1348. return NULL;
  1349. space = ppcg_ht_phase_get_input_space(phase);
  1350. space = isl_space_unwrap(space);
  1351. tile = isl_multi_aff_range_map(space);
  1352. space_shift = isl_multi_aff_copy(phase->space_shift);
  1353. tile = isl_multi_aff_add(space_shift, tile);
  1354. space_sizes = isl_multi_val_copy(phase->tiling->space_sizes);
  1355. tile = isl_multi_aff_scale_down_multi_val(tile, space_sizes);
  1356. tile = isl_multi_aff_floor(tile);
  1357. if (!tile)
  1358. return ppcg_ht_phase_free(phase);
  1359. phase->space_tile = tile;
  1360. return phase;
  1361. }
  1362. /* Construct a representation for one of the two phase for hybrid tiling
  1363. * "tiling". If "shift" is not set, then the phase is constructed
  1364. * directly from the hexagonal tile shape in "tiling", which represents
  1365. * the phase-1 tiles. If "shift" is set, then this tile shape is shifted
  1366. * back over tiling->shift_phase to obtain the phase-0 tiles.
  1367. *
  1368. * First copy data from "tiling", then optionally shift the phase and
  1369. * finally move the tiling from the "ts" space of "tiling" to
  1370. * the space of the input pattern.
  1371. *
  1372. * After the basic phase has been computed, also compute
  1373. * the corresponding space shift.
  1374. */
  1375. static __isl_give ppcg_ht_phase *ppcg_ht_tiling_compute_phase(
  1376. __isl_keep ppcg_ht_tiling *tiling, int shift)
  1377. {
  1378. ppcg_ht_phase *phase;
  1379. phase = construct_phase(tiling);
  1380. if (shift)
  1381. phase = shift_phase(phase);
  1382. phase = lift_phase(phase);
  1383. phase = compute_space_shift(phase);
  1384. phase = compute_space_tile(phase);
  1385. return phase;
  1386. }
  1387. /* Consruct a function that is equal to the time tile of "phase0"
  1388. * on the domain of "phase0" and equal to the time tile of "phase1"
  1389. * on the domain of "phase1".
  1390. * The two domains are assumed to form a partition of the input
  1391. * schedule space.
  1392. */
  1393. static __isl_give isl_pw_multi_aff *combine_time_tile(
  1394. __isl_keep ppcg_ht_phase *phase0, __isl_keep ppcg_ht_phase *phase1)
  1395. {
  1396. isl_aff *T;
  1397. isl_pw_aff *time, *time1;
  1398. if (!phase0 || !phase1)
  1399. return NULL;
  1400. T = isl_aff_copy(phase0->time_tile);
  1401. time = isl_pw_aff_alloc(ppcg_ht_phase_get_domain(phase0), T);
  1402. T = isl_aff_copy(phase1->time_tile);
  1403. time1 = isl_pw_aff_alloc(ppcg_ht_phase_get_domain(phase1), T);
  1404. time = isl_pw_aff_union_add(time, time1);
  1405. return isl_pw_multi_aff_from_pw_aff(time);
  1406. }
  1407. /* Name used in mark nodes that contain a pointer to a ppcg_ht_phase.
  1408. */
  1409. static char *ppcg_phase_name = "phase";
  1410. /* Does "id" contain a pointer to a ppcg_ht_phase?
  1411. * That is, is it called "phase"?
  1412. */
  1413. static isl_bool is_phase_id(__isl_keep isl_id *id)
  1414. {
  1415. const char *name;
  1416. name = isl_id_get_name(id);
  1417. if (!name)
  1418. return isl_bool_error;
  1419. return !strcmp(name, ppcg_phase_name);
  1420. }
  1421. /* Given a mark node with an identifier that points to a ppcg_ht_phase,
  1422. * extract this ppcg_ht_phase pointer.
  1423. */
  1424. __isl_keep ppcg_ht_phase *ppcg_ht_phase_extract_from_mark(
  1425. __isl_keep isl_schedule_node *node)
  1426. {
  1427. isl_bool is_phase;
  1428. isl_id *id;
  1429. void *p;
  1430. if (!node)
  1431. return NULL;
  1432. if (isl_schedule_node_get_type(node) != isl_schedule_node_mark)
  1433. isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
  1434. "not a phase mark", return NULL);
  1435. id = isl_schedule_node_mark_get_id(node);
  1436. is_phase = is_phase_id(id);
  1437. p = isl_id_get_user(id);
  1438. isl_id_free(id);
  1439. if (is_phase < 0)
  1440. return NULL;
  1441. if (!is_phase)
  1442. isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
  1443. "not a phase mark", return NULL);
  1444. return p;
  1445. }
  1446. /* Insert a mark node at "node" holding a pointer to "phase".
  1447. */
  1448. static __isl_give isl_schedule_node *insert_phase(
  1449. __isl_take isl_schedule_node *node, __isl_take ppcg_ht_phase *phase)
  1450. {
  1451. isl_ctx *ctx;
  1452. isl_id *id;
  1453. if (!node)
  1454. goto error;
  1455. ctx = isl_schedule_node_get_ctx(node);
  1456. id = isl_id_alloc(ctx, ppcg_phase_name, phase);
  1457. if (!id)
  1458. goto error;
  1459. id = isl_id_set_free_user(id, &ppcg_ht_phase_free_wrap);
  1460. node = isl_schedule_node_insert_mark(node, id);
  1461. return node;
  1462. error:
  1463. ppcg_ht_phase_free(phase);
  1464. isl_schedule_node_free(node);
  1465. return NULL;
  1466. }
  1467. /* Construct a mapping from the elements of the original pair of bands
  1468. * to which tiling was applied that belong to a tile of "phase"
  1469. * to that tile, preserving the values for the outer bands.
  1470. *
  1471. * The mapping is of the form
  1472. *
  1473. * [[outer] -> [P -> C]] -> [[outer] -> [tile]]
  1474. *
  1475. * where tile is defined by a concatenation of the time_tile and
  1476. * the space_tile.
  1477. */
  1478. static __isl_give isl_map *construct_tile_map(__isl_keep ppcg_ht_phase *phase)
  1479. {
  1480. int depth;
  1481. isl_space *space;
  1482. isl_multi_aff *ma;
  1483. isl_multi_aff *tiling;
  1484. isl_map *el2tile;
  1485. depth = isl_schedule_node_get_schedule_depth(
  1486. phase->tiling->input_node);
  1487. space = isl_aff_get_space(phase->time_tile);
  1488. space = isl_space_params(space);
  1489. space = isl_space_set_from_params(space);
  1490. space = isl_space_add_dims(space, isl_dim_set, depth);
  1491. space = isl_space_map_from_set(space);
  1492. ma = isl_multi_aff_identity(space);
  1493. tiling = isl_multi_aff_flat_range_product(
  1494. isl_multi_aff_from_aff(isl_aff_copy(phase->time_tile)),
  1495. isl_multi_aff_copy(phase->space_tile));
  1496. el2tile = isl_map_from_multi_aff(tiling);
  1497. el2tile = isl_map_intersect_domain(el2tile,
  1498. isl_set_copy(phase->domain));
  1499. el2tile = isl_map_product(isl_map_from_multi_aff(ma), el2tile);
  1500. return el2tile;
  1501. }
  1502. /* Return a description of the full tiles of "phase" at the point
  1503. * in the original schedule tree where the tiling was applied.
  1504. *
  1505. * First construct a mapping from the input schedule dimensions
  1506. * up to an including the original pair of bands to which hybrid tiling
  1507. * was applied to schedule dimensions in which this original pair
  1508. * has been replaced by the tiles.
  1509. * This mapping is of the form
  1510. *
  1511. * [[outer] -> [P -> C]] -> [[outer] -> [tile]]
  1512. *
  1513. * Apply this mapping to the set of all values for the input
  1514. * schedule dimensions and then apply its inverse.
  1515. * The result is the set of values for the input schedule dimensions
  1516. * that would map to any of the tiles. Subtracting from this set
  1517. * the set of values that are actually executed produces the set
  1518. * of values that belong to a tile but that are not executed.
  1519. * Mapping these back to the tiles produces a description of
  1520. * the partial tiles. Subtracting these from the set of all tiles
  1521. * produces a description of the full tiles in the form
  1522. *
  1523. * [[outer] -> [tile]]
  1524. */
  1525. static __isl_give isl_set *compute_full_tile(__isl_keep ppcg_ht_phase *phase)
  1526. {
  1527. isl_schedule_node *node;
  1528. isl_union_set *domain;
  1529. isl_union_map *prefix, *schedule;
  1530. isl_set *all, *partial, *all_el;
  1531. isl_map *tile2el, *el2tile;
  1532. isl_multi_union_pw_aff *mupa;
  1533. el2tile = construct_tile_map(phase);
  1534. tile2el = isl_map_reverse(isl_map_copy(el2tile));
  1535. node = phase->tiling->input_node;
  1536. prefix = isl_schedule_node_get_prefix_schedule_union_map(node);
  1537. domain = isl_schedule_node_get_domain(node);
  1538. mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule);
  1539. schedule = isl_union_map_from_multi_union_pw_aff(mupa);
  1540. schedule = isl_union_map_range_product(prefix, schedule);
  1541. all_el = isl_set_from_union_set(isl_union_set_apply(domain, schedule));
  1542. all_el = isl_set_coalesce(all_el);
  1543. all = isl_set_apply(isl_set_copy(all_el), isl_map_copy(el2tile));
  1544. partial = isl_set_copy(all);
  1545. partial = isl_set_apply(partial, tile2el);
  1546. partial = isl_set_subtract(partial, all_el);
  1547. partial = isl_set_apply(partial, el2tile);
  1548. return isl_set_subtract(all, partial);
  1549. }
  1550. /* Copy the AST loop types of the non-isolated part to those
  1551. * of the isolated part.
  1552. */
  1553. static __isl_give isl_schedule_node *set_isolate_loop_type(
  1554. __isl_take isl_schedule_node *node)
  1555. {
  1556. int i, n;
  1557. n = isl_schedule_node_band_n_member(node);
  1558. for (i = 0; i < n; ++i) {
  1559. enum isl_ast_loop_type type;
  1560. type = isl_schedule_node_band_member_get_ast_loop_type(node, i);
  1561. node = isl_schedule_node_band_member_set_isolate_ast_loop_type(
  1562. node, i, type);
  1563. }
  1564. return node;
  1565. }
  1566. /* If options->isolate_full_tiles is set, then mark the full tiles
  1567. * in "node" for isolation. The full tiles are derived from "phase".
  1568. * "node" may point to a part of the tiling, e.g., the space tiling.
  1569. *
  1570. * The full tiles are originally computed in the form
  1571. *
  1572. * [[outer] -> [tile]]
  1573. *
  1574. * However, the band that "node" points to may only contain
  1575. * subset of the tile dimensions.
  1576. * The description above is therefore treated as
  1577. *
  1578. * [[outer] -> [before; this; after]]
  1579. *
  1580. * before is of size "pos"; this is of size "dim"; and
  1581. * after is of size "out - pos - dim".
  1582. * The after part is first project out. Then the range is split
  1583. * into a before and this part and finally the before part is moved
  1584. * to the domain, resulting in
  1585. *
  1586. * [[outer; before] -> [this]]
  1587. *
  1588. * This description is then used as the isolate option.
  1589. *
  1590. * The AST loop type for the isolated part is set to be the same
  1591. * as that of the non-isolated part.
  1592. */
  1593. static __isl_give isl_schedule_node *ppcg_ht_phase_isolate_full_tile_node(
  1594. __isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node,
  1595. struct ppcg_options *options)
  1596. {
  1597. int in, out, pos, depth, dim;
  1598. isl_space *space;
  1599. isl_multi_aff *ma1, *ma2;
  1600. isl_set *tile;
  1601. isl_map *map;
  1602. isl_set *set;
  1603. isl_union_set *opt;
  1604. if (!options->isolate_full_tiles)
  1605. return node;
  1606. depth = isl_schedule_node_get_schedule_depth(node);
  1607. dim = isl_schedule_node_band_n_member(node);
  1608. tile = compute_full_tile(phase);
  1609. map = isl_set_unwrap(tile);
  1610. in = isl_map_dim(map, isl_dim_in);
  1611. out = isl_map_dim(map, isl_dim_out);
  1612. pos = depth - in;
  1613. map = isl_map_project_out(map, isl_dim_out, pos + dim,
  1614. out - (pos + dim));
  1615. space = isl_space_range(isl_map_get_space(map));
  1616. ma1 = isl_multi_aff_project_out_map(isl_space_copy(space),
  1617. isl_dim_set, pos, dim);
  1618. ma2 = isl_multi_aff_project_out_map(space, isl_dim_set, 0, pos);
  1619. ma1 = isl_multi_aff_range_product(ma1, ma2);
  1620. map = isl_map_apply_range(map, isl_map_from_multi_aff(ma1));
  1621. map = isl_map_uncurry(map);
  1622. map = isl_map_flatten_domain(map);
  1623. set = isl_map_wrap(map);
  1624. set = isl_set_set_tuple_name(set, "isolate");
  1625. opt = isl_schedule_node_band_get_ast_build_options(node);
  1626. opt = isl_union_set_add_set(opt, set);
  1627. node = isl_schedule_node_band_set_ast_build_options(node, opt);
  1628. node = set_isolate_loop_type(node);
  1629. return node;
  1630. }
  1631. /* Insert a band node for performing the space tiling for "phase" at "node".
  1632. * In particular, insert a band node with partial schedule
  1633. *
  1634. * [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size)]
  1635. *
  1636. * pulled back over the input schedule.
  1637. * "options" determines whether full tiles should be separated
  1638. * from partial tiles.
  1639. *
  1640. * The first tile dimension iterates over the hexagons in the same
  1641. * phase, which are independent by construction. The first dimension
  1642. * is therefore marked coincident.
  1643. * All dimensions are also marked for being generated as atomic loops
  1644. * because separation is usually not desirable on tile loops.
  1645. */
  1646. static __isl_give isl_schedule_node *insert_space_tiling(
  1647. __isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node,
  1648. struct ppcg_options *options)
  1649. {
  1650. isl_multi_aff *space_tile;
  1651. isl_multi_union_pw_aff *mupa;
  1652. if (!phase)
  1653. return isl_schedule_node_free(node);
  1654. space_tile = isl_multi_aff_copy(phase->space_tile);
  1655. mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule);
  1656. mupa = isl_multi_union_pw_aff_apply_multi_aff(mupa, space_tile);
  1657. node = isl_schedule_node_insert_partial_schedule(node, mupa);
  1658. node = ppcg_set_schedule_node_type(node, isl_ast_loop_atomic);
  1659. node = ppcg_ht_phase_isolate_full_tile_node(phase, node, options);
  1660. node = isl_schedule_node_band_member_set_coincident(node, 0, 1);
  1661. return node;
  1662. }
  1663. /* Given a pointer "node" to (a copy of) the original child node
  1664. * in the input pattern, adjust its partial schedule such that
  1665. * it starts at zero within each tile.
  1666. *
  1667. * That is, replace "s" by (s + space_shift) % space_sizes.
  1668. */
  1669. __isl_give isl_schedule_node *ppcg_ht_phase_shift_space_point(
  1670. __isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node)
  1671. {
  1672. isl_multi_val *space_sizes;
  1673. isl_multi_aff *space_shift;
  1674. isl_multi_union_pw_aff *mupa;
  1675. space_shift = isl_multi_aff_copy(phase->space_shift);
  1676. mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule);
  1677. mupa = isl_multi_union_pw_aff_apply_multi_aff(mupa, space_shift);
  1678. node = isl_schedule_node_band_shift(node, mupa);
  1679. space_sizes = isl_multi_val_copy(phase->tiling->space_sizes);
  1680. node = isl_schedule_node_band_mod(node, space_sizes);
  1681. return node;
  1682. }
  1683. /* Does
  1684. *
  1685. * s0 > delta + 2 * {delta * h} - 1
  1686. *
  1687. * hold?
  1688. */
  1689. static isl_bool wide_enough(__isl_keep isl_val *s0, __isl_keep isl_val *delta,
  1690. __isl_keep isl_val *h)
  1691. {
  1692. isl_val *v, *v2;
  1693. isl_bool ok;
  1694. v = isl_val_mul(isl_val_copy(delta), isl_val_copy(h));
  1695. v2 = isl_val_floor(isl_val_copy(v));
  1696. v = isl_val_sub(v, v2);
  1697. v = isl_val_mul_ui(v, 2);
  1698. v = isl_val_add(v, isl_val_copy(delta));
  1699. v = isl_val_sub_ui(v, 1);
  1700. ok = isl_val_gt(s0, v);
  1701. isl_val_free(v);
  1702. return ok;
  1703. }
  1704. /* Is the tile size specified by "sizes" wide enough in the first space
  1705. * dimension, i.e., the base of the hexagon? This ensures that,
  1706. * after hybrid tiling using "bounds" and these sizes,
  1707. * neighboring hexagons in the same phase are far enough apart
  1708. * that they do not depend on each other.
  1709. * The test is only meaningful if the bounds are valid.
  1710. *
  1711. * Let st be (half) the size in the time dimension and s0 the base
  1712. * size in the first space dimension. Let delta be the dependence
  1713. * distance in either positive or negative direction. In principle,
  1714. * it should be enough to have s0 + 1 > delta, i.e., s0 >= delta.
  1715. * However, in case of fractional delta, the tile is not extended
  1716. * with delta * (st - 1), but instead with floor(delta * (st - 1)).
  1717. * The condition therefore needs to be adjusted to
  1718. *
  1719. * s0 + 1 > delta + 2 {delta * (st - 1)}
  1720. *
  1721. * (with {} the fractional part) to account for the two slanted sides.
  1722. * The condition in the paper "Hybrid Hexagonal/Classical Tiling for GPUs"
  1723. * translates to
  1724. *
  1725. * s0 >= delta + {delta * (st - 1)}
  1726. *
  1727. * Since 1 > frac(delta * (st - 1)), this condition implies
  1728. * the condition above.
  1729. *
  1730. * The condition is checked for both directions.
  1731. */
  1732. isl_bool ppcg_ht_bounds_supports_sizes(__isl_keep ppcg_ht_bounds *bounds,
  1733. __isl_keep isl_multi_val *sizes)
  1734. {
  1735. isl_val *s0, *h;
  1736. isl_val *delta;
  1737. isl_bool ok;
  1738. ok = ppcg_ht_bounds_is_valid(bounds);
  1739. if (ok < 0 || !ok)
  1740. return ok;
  1741. h = isl_val_sub_ui(isl_multi_val_get_val(sizes, 0), 1);
  1742. s0 = isl_multi_val_get_val(sizes, 1);
  1743. delta = ppcg_ht_bounds_get_lower(bounds, 0);
  1744. ok = wide_enough(s0, delta, h);
  1745. isl_val_free(delta);
  1746. delta = ppcg_ht_bounds_get_upper(bounds);
  1747. if (ok == isl_bool_true)
  1748. ok = wide_enough(s0, delta, h);
  1749. isl_val_free(delta);
  1750. isl_val_free(s0);
  1751. isl_val_free(h);
  1752. return ok;
  1753. }
  1754. /* Check that the tile will be wide enough in the first space
  1755. * dimension, i.e., the base of the hexagon. This ensures that
  1756. * neighboring hexagons in the same phase are far enough apart
  1757. * that they do not depend on each other.
  1758. *
  1759. * Error out if the condition fails to hold.
  1760. */
  1761. static isl_stat check_width(__isl_keep ppcg_ht_bounds *bounds,
  1762. __isl_keep isl_multi_val *sizes)
  1763. {
  1764. isl_bool ok;
  1765. ok = ppcg_ht_bounds_supports_sizes(bounds, sizes);
  1766. if (ok < 0)
  1767. return isl_stat_error;
  1768. if (!ok)
  1769. isl_die(isl_multi_val_get_ctx(sizes), isl_error_invalid,
  1770. "base of hybrid tiling hexagon not sufficiently wide",
  1771. return isl_stat_error);
  1772. return isl_stat_ok;
  1773. }
  1774. /* Given valid bounds on the relative dependence distances for
  1775. * the pair of nested nodes that "node" point to, as well as sufficiently
  1776. * wide tile sizes "sizes", insert the corresponding time and space tiling
  1777. * at "node", along with a pair of phase nodes that can be used
  1778. * to make further changes.
  1779. * The space of "sizes" should be the product of the spaces
  1780. * of the schedules of the pair of parent and child nodes.
  1781. * "options" determines whether full tiles should be separated
  1782. * from partial tiles.
  1783. *
  1784. * In particular, given an input of the form
  1785. *
  1786. * P - C - ...
  1787. *
  1788. * the output has the form
  1789. *
  1790. * /- F0 - M0 - CT0 - P - C - ...
  1791. * PT - seq
  1792. * \- F1 - M1 - CT1 - P - C - ...
  1793. *
  1794. * PT is the global time tiling. Within each of these tiles,
  1795. * two phases are executed in order. Within each phase, the schedule
  1796. * space is further subdivided into tiles through CT0 and CT1.
  1797. * The first dimension of each of these iterates over the hexagons
  1798. * within a phase and these are independent by construction.
  1799. * The F0 and F1 filters filter the statement instances that belong
  1800. * to the corresponding phase. The M0 and M1 marks contain a pointer
  1801. * to a ppcg_ht_phase object that can be used to perform further changes.
  1802. *
  1803. * After checking that input satisfies the requirements,
  1804. * a data structure is constructed that represents the tiling and
  1805. * two additional data structures are constructed for the two phases
  1806. * of the tiling. These are then used to define the filters F0 and F1 and
  1807. * combined to construct the time tiling PT.
  1808. * Then the time tiling node PT is inserted, followed by
  1809. * the sequence with the two filters, the CT space tiling nodes and
  1810. * the phase markers M.
  1811. */
  1812. __isl_give isl_schedule_node *ppcg_ht_bounds_insert_tiling(
  1813. __isl_take ppcg_ht_bounds *bounds, __isl_take isl_multi_val *sizes,
  1814. __isl_take isl_schedule_node *node, struct ppcg_options *options)
  1815. {
  1816. isl_ctx *ctx;
  1817. isl_union_set *phase0;
  1818. isl_union_set *phase1;
  1819. isl_multi_union_pw_aff *input, *dom_time;
  1820. isl_union_pw_multi_aff *upma;
  1821. isl_pw_multi_aff *time;
  1822. isl_union_set_list *phases;
  1823. ppcg_ht_tiling *tiling;
  1824. ppcg_ht_phase *phase_0;
  1825. ppcg_ht_phase *phase_1;
  1826. if (!node || !sizes || !bounds)
  1827. goto error;
  1828. if (check_input_pattern(node) < 0 || check_width(bounds, sizes) < 0)
  1829. goto error;
  1830. ctx = isl_schedule_node_get_ctx(node);
  1831. input = extract_input_schedule(node);
  1832. tiling = ppcg_ht_bounds_construct_tiling(bounds, node, input, sizes);
  1833. phase_0 = ppcg_ht_tiling_compute_phase(tiling, 1);
  1834. phase_1 = ppcg_ht_tiling_compute_phase(tiling, 0);
  1835. time = combine_time_tile(phase_0, phase_1);
  1836. ppcg_ht_tiling_free(tiling);
  1837. upma = isl_union_pw_multi_aff_from_multi_union_pw_aff(
  1838. isl_multi_union_pw_aff_copy(input));
  1839. phase0 = isl_union_set_from_set(ppcg_ht_phase_get_domain(phase_0));
  1840. phase0 = isl_union_set_preimage_union_pw_multi_aff(phase0,
  1841. isl_union_pw_multi_aff_copy(upma));
  1842. phase1 = isl_union_set_from_set(ppcg_ht_phase_get_domain(phase_1));
  1843. phase1 = isl_union_set_preimage_union_pw_multi_aff(phase1, upma);
  1844. phases = isl_union_set_list_alloc(ctx, 2);
  1845. phases = isl_union_set_list_add(phases, phase0);
  1846. phases = isl_union_set_list_add(phases, phase1);
  1847. dom_time = isl_multi_union_pw_aff_apply_pw_multi_aff(input, time);
  1848. node = isl_schedule_node_insert_partial_schedule(node, dom_time);
  1849. node = isl_schedule_node_child(node, 0);
  1850. node = isl_schedule_node_insert_sequence(node, phases);
  1851. node = isl_schedule_node_child(node, 0);
  1852. node = isl_schedule_node_child(node, 0);
  1853. node = insert_space_tiling(phase_0, node, options);
  1854. node = insert_phase(node, phase_0);
  1855. node = isl_schedule_node_parent(node);
  1856. node = isl_schedule_node_next_sibling(node);
  1857. node = isl_schedule_node_child(node, 0);
  1858. node = insert_space_tiling(phase_1, node, options);
  1859. node = insert_phase(node, phase_1);
  1860. node = isl_schedule_node_parent(node);
  1861. node = isl_schedule_node_parent(node);
  1862. node = isl_schedule_node_parent(node);
  1863. isl_multi_val_free(sizes);
  1864. return node;
  1865. error:
  1866. isl_multi_val_free(sizes);
  1867. isl_schedule_node_free(node);
  1868. ppcg_ht_bounds_free(bounds);
  1869. return NULL;
  1870. }
  1871. /* Given a branch "node" that contains a sequence node with two phases
  1872. * of hybrid tiling as input, call "fn" on each of the two phase marker
  1873. * nodes.
  1874. *
  1875. * That is, the input is as follows
  1876. *
  1877. * /- F0 - M0 - ...
  1878. * ... - seq
  1879. * \- F1 - M1 - ...
  1880. *
  1881. * and "fn" is called on M0 and on M1.
  1882. */
  1883. __isl_give isl_schedule_node *hybrid_tile_foreach_phase(
  1884. __isl_take isl_schedule_node *node,
  1885. __isl_give isl_schedule_node *(*fn)(__isl_take isl_schedule_node *node,
  1886. void *user), void *user)
  1887. {
  1888. int depth0, depth;
  1889. depth0 = isl_schedule_node_get_tree_depth(node);
  1890. while (node &&
  1891. isl_schedule_node_get_type(node) != isl_schedule_node_sequence)
  1892. node = isl_schedule_node_child(node, 0);
  1893. node = isl_schedule_node_child(node, 0);
  1894. node = isl_schedule_node_child(node, 0);
  1895. if (!node)
  1896. return NULL;
  1897. node = fn(node, user);
  1898. node = isl_schedule_node_parent(node);
  1899. node = isl_schedule_node_next_sibling(node);
  1900. node = isl_schedule_node_child(node, 0);
  1901. if (!node)
  1902. return NULL;
  1903. node = fn(node, user);
  1904. node = isl_schedule_node_parent(node);
  1905. node = isl_schedule_node_parent(node);
  1906. depth = isl_schedule_node_get_tree_depth(node);
  1907. node = isl_schedule_node_ancestor(node, depth - depth0);
  1908. return node;
  1909. }
  1910. /* This function is called on each of the two phase marks
  1911. * in a hybrid tiling tree.
  1912. * Drop the phase mark at "node".
  1913. */
  1914. static __isl_give isl_schedule_node *drop_phase_mark(
  1915. __isl_take isl_schedule_node *node, void *user)
  1916. {
  1917. isl_id *id;
  1918. isl_bool is_phase;
  1919. if (isl_schedule_node_get_type(node) != isl_schedule_node_mark)
  1920. return node;
  1921. id = isl_schedule_node_mark_get_id(node);
  1922. is_phase = is_phase_id(id);
  1923. isl_id_free(id);
  1924. if (is_phase < 0)
  1925. return isl_schedule_node_free(node);
  1926. if (is_phase)
  1927. node = isl_schedule_node_delete(node);
  1928. return node;
  1929. }
  1930. /* Given a branch "node" that contains a sequence node with two phases
  1931. * of hybrid tiling as input, remove the two phase marker nodes.
  1932. *
  1933. * That is, the input is as follows
  1934. *
  1935. * /- F0 - M0 - ...
  1936. * ... - seq
  1937. * \- F1 - M1 - ...
  1938. *
  1939. * and the output is
  1940. *
  1941. * /- F0 - ...
  1942. * ... - seq
  1943. * \- F1 - ...
  1944. */
  1945. __isl_give isl_schedule_node *hybrid_tile_drop_phase_marks(
  1946. __isl_take isl_schedule_node *node)
  1947. {
  1948. return hybrid_tile_foreach_phase(node, &drop_phase_mark, NULL);
  1949. }