cuda.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
  1. /*
  2. * Copyright 2012 Ecole Normale Superieure
  3. *
  4. * Use of this software is governed by the MIT license
  5. *
  6. * Written by Sven Verdoolaege,
  7. * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
  8. */
  9. #include <isl/aff.h>
  10. #include <isl/ast.h>
  11. #include "cuda_common.h"
  12. #include "cuda.h"
  13. #include "gpu.h"
  14. #include "gpu_print.h"
  15. #include "print.h"
  16. #include "util.h"
  17. static __isl_give isl_printer *print_cuda_macros(__isl_take isl_printer *p)
  18. {
  19. const char *macros =
  20. "#define cudaCheckReturn(ret) \\\n"
  21. " do { \\\n"
  22. " cudaError_t cudaCheckReturn_e = (ret); \\\n"
  23. " if (cudaCheckReturn_e != cudaSuccess) { \\\n"
  24. " fprintf(stderr, \"CUDA error: %s\\n\", "
  25. "cudaGetErrorString(cudaCheckReturn_e)); \\\n"
  26. " fflush(stderr); \\\n"
  27. " } \\\n"
  28. " assert(cudaCheckReturn_e == cudaSuccess); \\\n"
  29. " } while(0)\n"
  30. "#define cudaCheckKernel() \\\n"
  31. " do { \\\n"
  32. " cudaCheckReturn(cudaGetLastError()); \\\n"
  33. " } while(0)\n\n";
  34. p = isl_printer_print_str(p, macros);
  35. return p;
  36. }
  37. /* Print a declaration for the device array corresponding to "array" on "p".
  38. */
  39. static __isl_give isl_printer *declare_device_array(__isl_take isl_printer *p,
  40. struct gpu_array_info *array)
  41. {
  42. int i;
  43. p = isl_printer_start_line(p);
  44. p = isl_printer_print_str(p, array->type);
  45. p = isl_printer_print_str(p, " ");
  46. if (!array->linearize && array->n_index > 1)
  47. p = isl_printer_print_str(p, "(");
  48. p = isl_printer_print_str(p, "*dev_");
  49. p = isl_printer_print_str(p, array->name);
  50. if (!array->linearize && array->n_index > 1) {
  51. p = isl_printer_print_str(p, ")");
  52. for (i = 1; i < array->n_index; i++) {
  53. isl_ast_expr *bound;
  54. bound = isl_ast_expr_get_op_arg(array->bound_expr,
  55. 1 + i);
  56. p = isl_printer_print_str(p, "[");
  57. p = isl_printer_print_ast_expr(p, bound);
  58. p = isl_printer_print_str(p, "]");
  59. isl_ast_expr_free(bound);
  60. }
  61. }
  62. p = isl_printer_print_str(p, ";");
  63. p = isl_printer_end_line(p);
  64. return p;
  65. }
  66. static __isl_give isl_printer *declare_device_arrays(__isl_take isl_printer *p,
  67. struct gpu_prog *prog)
  68. {
  69. int i;
  70. for (i = 0; i < prog->n_array; ++i) {
  71. if (!gpu_array_requires_device_allocation(&prog->array[i]))
  72. continue;
  73. p = declare_device_array(p, &prog->array[i]);
  74. }
  75. p = isl_printer_start_line(p);
  76. p = isl_printer_end_line(p);
  77. return p;
  78. }
  79. static __isl_give isl_printer *allocate_device_arrays(
  80. __isl_take isl_printer *p, struct gpu_prog *prog)
  81. {
  82. int i;
  83. for (i = 0; i < prog->n_array; ++i) {
  84. struct gpu_array_info *array = &prog->array[i];
  85. if (!gpu_array_requires_device_allocation(&prog->array[i]))
  86. continue;
  87. p = ppcg_ast_expr_print_macros(array->bound_expr, p);
  88. p = isl_printer_start_line(p);
  89. p = isl_printer_print_str(p,
  90. "cudaCheckReturn(cudaMalloc((void **) &dev_");
  91. p = isl_printer_print_str(p, prog->array[i].name);
  92. p = isl_printer_print_str(p, ", ");
  93. p = gpu_array_info_print_size(p, &prog->array[i]);
  94. p = isl_printer_print_str(p, "));");
  95. p = isl_printer_end_line(p);
  96. }
  97. p = isl_printer_start_line(p);
  98. p = isl_printer_end_line(p);
  99. return p;
  100. }
  101. static __isl_give isl_printer *free_device_arrays(__isl_take isl_printer *p,
  102. struct gpu_prog *prog)
  103. {
  104. int i;
  105. for (i = 0; i < prog->n_array; ++i) {
  106. if (!gpu_array_requires_device_allocation(&prog->array[i]))
  107. continue;
  108. p = isl_printer_start_line(p);
  109. p = isl_printer_print_str(p, "cudaCheckReturn(cudaFree(dev_");
  110. p = isl_printer_print_str(p, prog->array[i].name);
  111. p = isl_printer_print_str(p, "));");
  112. p = isl_printer_end_line(p);
  113. }
  114. return p;
  115. }
  116. /* Print code to "p" for copying "array" from the host to the device
  117. * in its entirety. The bounds on the extent of "array" have
  118. * been precomputed in extract_array_info and are used in
  119. * gpu_array_info_print_size.
  120. */
  121. static __isl_give isl_printer *copy_array_to_device(__isl_take isl_printer *p,
  122. struct gpu_array_info *array)
  123. {
  124. p = isl_printer_start_line(p);
  125. p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(dev_");
  126. p = isl_printer_print_str(p, array->name);
  127. p = isl_printer_print_str(p, ", ");
  128. if (gpu_array_is_scalar(array))
  129. p = isl_printer_print_str(p, "&");
  130. p = isl_printer_print_str(p, array->name);
  131. p = isl_printer_print_str(p, ", ");
  132. p = gpu_array_info_print_size(p, array);
  133. p = isl_printer_print_str(p, ", cudaMemcpyHostToDevice));");
  134. p = isl_printer_end_line(p);
  135. return p;
  136. }
  137. /* Print code to "p" for copying "array" back from the device to the host
  138. * in its entirety. The bounds on the extent of "array" have
  139. * been precomputed in extract_array_info and are used in
  140. * gpu_array_info_print_size.
  141. */
  142. static __isl_give isl_printer *copy_array_from_device(
  143. __isl_take isl_printer *p, struct gpu_array_info *array)
  144. {
  145. p = isl_printer_start_line(p);
  146. p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(");
  147. if (gpu_array_is_scalar(array))
  148. p = isl_printer_print_str(p, "&");
  149. p = isl_printer_print_str(p, array->name);
  150. p = isl_printer_print_str(p, ", dev_");
  151. p = isl_printer_print_str(p, array->name);
  152. p = isl_printer_print_str(p, ", ");
  153. p = gpu_array_info_print_size(p, array);
  154. p = isl_printer_print_str(p, ", cudaMemcpyDeviceToHost));");
  155. p = isl_printer_end_line(p);
  156. return p;
  157. }
  158. static __isl_give isl_printer* print_reverse_list(__isl_take isl_printer *p, int len, int *list)
  159. {
  160. int i;
  161. if (len == 0)
  162. return p;
  163. p = isl_printer_print_str(p, "(");
  164. for (i = 0; i < len; ++i) {
  165. if (i)
  166. p = isl_printer_print_str(p, ", ");
  167. p = isl_printer_print_int(p, list[len - 1 - i]);
  168. }
  169. return isl_printer_print_str(p, ")");
  170. }
  171. /* Print the effective grid size as a list of the sizes in each
  172. * dimension, from innermost to outermost.
  173. */
  174. static __isl_give isl_printer *print_grid_size(__isl_take isl_printer *p,
  175. struct ppcg_kernel *kernel)
  176. {
  177. int i;
  178. int dim;
  179. dim = isl_multi_pw_aff_dim(kernel->grid_size, isl_dim_set);
  180. if (dim == 0)
  181. return p;
  182. p = isl_printer_print_str(p, "(");
  183. for (i = dim - 1; i >= 0; --i) {
  184. isl_ast_expr *bound;
  185. bound = isl_ast_expr_get_op_arg(kernel->grid_size_expr, 1 + i);
  186. p = isl_printer_print_ast_expr(p, bound);
  187. isl_ast_expr_free(bound);
  188. if (i > 0)
  189. p = isl_printer_print_str(p, ", ");
  190. }
  191. p = isl_printer_print_str(p, ")");
  192. return p;
  193. }
  194. /* Print the grid definition.
  195. */
  196. static __isl_give isl_printer *print_grid(__isl_take isl_printer *p,
  197. struct ppcg_kernel *kernel)
  198. {
  199. p = isl_printer_start_line(p);
  200. p = isl_printer_print_str(p, "dim3 k");
  201. p = isl_printer_print_int(p, kernel->id);
  202. p = isl_printer_print_str(p, "_dimGrid");
  203. p = print_grid_size(p, kernel);
  204. p = isl_printer_print_str(p, ";");
  205. p = isl_printer_end_line(p);
  206. return p;
  207. }
  208. /* Print the arguments to a kernel declaration or call. If "types" is set,
  209. * then print a declaration (including the types of the arguments).
  210. *
  211. * The arguments are printed in the following order
  212. * - the arrays accessed by the kernel
  213. * - the parameters
  214. * - the host loop iterators
  215. */
  216. static __isl_give isl_printer *print_kernel_arguments(__isl_take isl_printer *p,
  217. struct gpu_prog *prog, struct ppcg_kernel *kernel, int types)
  218. {
  219. int i, n;
  220. int first = 1;
  221. unsigned nparam;
  222. isl_space *space;
  223. const char *type;
  224. for (i = 0; i < prog->n_array; ++i) {
  225. int required;
  226. required = ppcg_kernel_requires_array_argument(kernel, i);
  227. if (required < 0)
  228. return isl_printer_free(p);
  229. if (!required)
  230. continue;
  231. if (!first)
  232. p = isl_printer_print_str(p, ", ");
  233. if (types)
  234. p = gpu_array_info_print_declaration_argument(p,
  235. &prog->array[i], NULL);
  236. else
  237. p = gpu_array_info_print_call_argument(p,
  238. &prog->array[i]);
  239. first = 0;
  240. }
  241. space = isl_union_set_get_space(kernel->arrays);
  242. nparam = isl_space_dim(space, isl_dim_param);
  243. for (i = 0; i < nparam; ++i) {
  244. const char *name;
  245. name = isl_space_get_dim_name(space, isl_dim_param, i);
  246. if (!first)
  247. p = isl_printer_print_str(p, ", ");
  248. if (types)
  249. p = isl_printer_print_str(p, "int ");
  250. p = isl_printer_print_str(p, name);
  251. first = 0;
  252. }
  253. isl_space_free(space);
  254. n = isl_space_dim(kernel->space, isl_dim_set);
  255. type = isl_options_get_ast_iterator_type(prog->ctx);
  256. for (i = 0; i < n; ++i) {
  257. const char *name;
  258. if (!first)
  259. p = isl_printer_print_str(p, ", ");
  260. name = isl_space_get_dim_name(kernel->space, isl_dim_set, i);
  261. if (types) {
  262. p = isl_printer_print_str(p, type);
  263. p = isl_printer_print_str(p, " ");
  264. }
  265. p = isl_printer_print_str(p, name);
  266. first = 0;
  267. }
  268. return p;
  269. }
  270. /* Print the header of the given kernel.
  271. */
  272. static __isl_give isl_printer *print_kernel_header(__isl_take isl_printer *p,
  273. struct gpu_prog *prog, struct ppcg_kernel *kernel)
  274. {
  275. p = isl_printer_start_line(p);
  276. p = isl_printer_print_str(p, "__global__ void kernel");
  277. p = isl_printer_print_int(p, kernel->id);
  278. p = isl_printer_print_str(p, "(");
  279. p = print_kernel_arguments(p, prog, kernel, 1);
  280. p = isl_printer_print_str(p, ")");
  281. return p;
  282. }
  283. /* Print the header of the given kernel to both gen->cuda.kernel_h
  284. * and gen->cuda.kernel_c.
  285. */
  286. static void print_kernel_headers(struct gpu_prog *prog,
  287. struct ppcg_kernel *kernel, struct cuda_info *cuda)
  288. {
  289. isl_printer *p;
  290. p = isl_printer_to_file(prog->ctx, cuda->kernel_h);
  291. p = isl_printer_set_output_format(p, ISL_FORMAT_C);
  292. p = print_kernel_header(p, prog, kernel);
  293. p = isl_printer_print_str(p, ";");
  294. p = isl_printer_end_line(p);
  295. isl_printer_free(p);
  296. p = isl_printer_to_file(prog->ctx, cuda->kernel_c);
  297. p = isl_printer_set_output_format(p, ISL_FORMAT_C);
  298. p = print_kernel_header(p, prog, kernel);
  299. p = isl_printer_end_line(p);
  300. isl_printer_free(p);
  301. }
  302. static void print_indent(FILE *dst, int indent)
  303. {
  304. fprintf(dst, "%*s", indent, "");
  305. }
  306. /* Print a list of iterators of type "type" with names "ids" to "out".
  307. * Each iterator is assigned one of the cuda identifiers in cuda_dims.
  308. * In particular, the last iterator is assigned the x identifier
  309. * (the first in the list of cuda identifiers).
  310. */
  311. static void print_iterators(FILE *out, const char *type,
  312. __isl_keep isl_id_list *ids, const char *cuda_dims[])
  313. {
  314. int i, n;
  315. n = isl_id_list_n_id(ids);
  316. if (n <= 0)
  317. return;
  318. print_indent(out, 4);
  319. fprintf(out, "%s ", type);
  320. for (i = 0; i < n; ++i) {
  321. isl_id *id;
  322. if (i)
  323. fprintf(out, ", ");
  324. id = isl_id_list_get_id(ids, i);
  325. fprintf(out, "%s = %s", isl_id_get_name(id),
  326. cuda_dims[n - 1 - i]);
  327. isl_id_free(id);
  328. }
  329. fprintf(out, ";\n");
  330. }
  331. static void print_kernel_iterators(FILE *out, struct ppcg_kernel *kernel)
  332. {
  333. isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
  334. const char *type;
  335. const char *block_dims[] = { "blockIdx.x", "blockIdx.y" };
  336. const char *thread_dims[] = { "threadIdx.x", "threadIdx.y",
  337. "threadIdx.z" };
  338. type = isl_options_get_ast_iterator_type(ctx);
  339. print_iterators(out, type, kernel->block_ids, block_dims);
  340. print_iterators(out, type, kernel->thread_ids, thread_dims);
  341. }
  342. static __isl_give isl_printer *print_kernel_var(__isl_take isl_printer *p,
  343. struct ppcg_kernel_var *var)
  344. {
  345. int j;
  346. p = isl_printer_start_line(p);
  347. if (var->type == ppcg_access_shared)
  348. p = isl_printer_print_str(p, "__shared__ ");
  349. p = isl_printer_print_str(p, var->array->type);
  350. p = isl_printer_print_str(p, " ");
  351. p = isl_printer_print_str(p, var->name);
  352. for (j = 0; j < var->array->n_index; ++j) {
  353. isl_val *v;
  354. p = isl_printer_print_str(p, "[");
  355. v = isl_vec_get_element_val(var->size, j);
  356. p = isl_printer_print_val(p, v);
  357. isl_val_free(v);
  358. p = isl_printer_print_str(p, "]");
  359. }
  360. p = isl_printer_print_str(p, ";");
  361. p = isl_printer_end_line(p);
  362. return p;
  363. }
  364. static __isl_give isl_printer *print_kernel_vars(__isl_take isl_printer *p,
  365. struct ppcg_kernel *kernel)
  366. {
  367. int i;
  368. for (i = 0; i < kernel->n_var; ++i)
  369. p = print_kernel_var(p, &kernel->var[i]);
  370. return p;
  371. }
  372. /* Print a sync statement.
  373. */
  374. static __isl_give isl_printer *print_sync(__isl_take isl_printer *p,
  375. struct ppcg_kernel_stmt *stmt)
  376. {
  377. p = isl_printer_start_line(p);
  378. p = isl_printer_print_str(p, "__syncthreads();");
  379. p = isl_printer_end_line(p);
  380. return p;
  381. }
  382. /* This function is called for each user statement in the AST,
  383. * i.e., for each kernel body statement, copy statement or sync statement.
  384. */
  385. static __isl_give isl_printer *print_kernel_stmt(__isl_take isl_printer *p,
  386. __isl_take isl_ast_print_options *print_options,
  387. __isl_keep isl_ast_node *node, void *user)
  388. {
  389. isl_id *id;
  390. struct ppcg_kernel_stmt *stmt;
  391. id = isl_ast_node_get_annotation(node);
  392. stmt = isl_id_get_user(id);
  393. isl_id_free(id);
  394. isl_ast_print_options_free(print_options);
  395. switch (stmt->type) {
  396. case ppcg_kernel_copy:
  397. return ppcg_kernel_print_copy(p, stmt);
  398. case ppcg_kernel_sync:
  399. return print_sync(p, stmt);
  400. case ppcg_kernel_domain:
  401. return ppcg_kernel_print_domain(p, stmt);
  402. }
  403. return p;
  404. }
  405. static void print_kernel(struct gpu_prog *prog, struct ppcg_kernel *kernel,
  406. struct cuda_info *cuda)
  407. {
  408. isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
  409. isl_ast_print_options *print_options;
  410. isl_printer *p;
  411. print_kernel_headers(prog, kernel, cuda);
  412. fprintf(cuda->kernel_c, "{\n");
  413. print_kernel_iterators(cuda->kernel_c, kernel);
  414. p = isl_printer_to_file(ctx, cuda->kernel_c);
  415. p = isl_printer_set_output_format(p, ISL_FORMAT_C);
  416. p = isl_printer_indent(p, 4);
  417. p = print_kernel_vars(p, kernel);
  418. p = isl_printer_end_line(p);
  419. p = ppcg_set_macro_names(p);
  420. p = gpu_print_macros(p, kernel->tree);
  421. print_options = isl_ast_print_options_alloc(ctx);
  422. print_options = isl_ast_print_options_set_print_user(print_options,
  423. &print_kernel_stmt, NULL);
  424. p = isl_ast_node_print(kernel->tree, p, print_options);
  425. isl_printer_free(p);
  426. fprintf(cuda->kernel_c, "}\n");
  427. }
  428. /* Print code for initializing the device for execution of the transformed
  429. * code. This includes declaring locally defined variables as well as
  430. * declaring and allocating the required copies of arrays on the device.
  431. */
  432. static __isl_give isl_printer *init_device(__isl_take isl_printer *p,
  433. struct gpu_prog *prog)
  434. {
  435. p = print_cuda_macros(p);
  436. p = gpu_print_local_declarations(p, prog);
  437. p = declare_device_arrays(p, prog);
  438. p = allocate_device_arrays(p, prog);
  439. return p;
  440. }
  441. /* Print code for clearing the device after execution of the transformed code.
  442. * In particular, free the memory that was allocated on the device.
  443. */
  444. static __isl_give isl_printer *clear_device(__isl_take isl_printer *p,
  445. struct gpu_prog *prog)
  446. {
  447. p = free_device_arrays(p, prog);
  448. return p;
  449. }
  450. /* Print a statement for copying an array to or from the device,
  451. * or for initializing or clearing the device.
  452. * The statement identifier of a copying node is called
  453. * "to_device_<array name>" or "from_device_<array name>" and
  454. * its user pointer points to the gpu_array_info of the array
  455. * that needs to be copied.
  456. * The node for initializing the device is called "init_device".
  457. * The node for clearing the device is called "clear_device".
  458. *
  459. * Extract the array (if any) from the identifier and call
  460. * init_device, clear_device, copy_array_to_device or copy_array_from_device.
  461. */
  462. static __isl_give isl_printer *print_device_node(__isl_take isl_printer *p,
  463. __isl_keep isl_ast_node *node, struct gpu_prog *prog)
  464. {
  465. isl_ast_expr *expr, *arg;
  466. isl_id *id;
  467. const char *name;
  468. struct gpu_array_info *array;
  469. expr = isl_ast_node_user_get_expr(node);
  470. arg = isl_ast_expr_get_op_arg(expr, 0);
  471. id = isl_ast_expr_get_id(arg);
  472. name = isl_id_get_name(id);
  473. array = isl_id_get_user(id);
  474. isl_id_free(id);
  475. isl_ast_expr_free(arg);
  476. isl_ast_expr_free(expr);
  477. if (!name)
  478. return isl_printer_free(p);
  479. if (!strcmp(name, "init_device"))
  480. return init_device(p, prog);
  481. if (!strcmp(name, "clear_device"))
  482. return clear_device(p, prog);
  483. if (!array)
  484. return isl_printer_free(p);
  485. if (!prefixcmp(name, "to_device"))
  486. return copy_array_to_device(p, array);
  487. else
  488. return copy_array_from_device(p, array);
  489. }
  490. struct print_host_user_data {
  491. struct cuda_info *cuda;
  492. struct gpu_prog *prog;
  493. };
  494. /* Print the user statement of the host code to "p".
  495. *
  496. * The host code may contain original user statements, kernel launches,
  497. * statements that copy data to/from the device and statements
  498. * the initialize or clear the device.
  499. * The original user statements and the kernel launches have
  500. * an associated annotation, while the other statements do not.
  501. * The latter are handled by print_device_node.
  502. * The annotation on the user statements is called "user".
  503. *
  504. * In case of a kernel launch, print a block of statements that
  505. * defines the grid and the block and then launches the kernel.
  506. */
  507. __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
  508. __isl_take isl_ast_print_options *print_options,
  509. __isl_keep isl_ast_node *node, void *user)
  510. {
  511. isl_id *id;
  512. int is_user;
  513. struct ppcg_kernel *kernel;
  514. struct ppcg_kernel_stmt *stmt;
  515. struct print_host_user_data *data;
  516. isl_ast_print_options_free(print_options);
  517. data = (struct print_host_user_data *) user;
  518. id = isl_ast_node_get_annotation(node);
  519. if (!id)
  520. return print_device_node(p, node, data->prog);
  521. is_user = !strcmp(isl_id_get_name(id), "user");
  522. kernel = is_user ? NULL : isl_id_get_user(id);
  523. stmt = is_user ? isl_id_get_user(id) : NULL;
  524. isl_id_free(id);
  525. if (is_user)
  526. return ppcg_kernel_print_domain(p, stmt);
  527. p = ppcg_start_block(p);
  528. p = isl_printer_start_line(p);
  529. p = isl_printer_print_str(p, "dim3 k");
  530. p = isl_printer_print_int(p, kernel->id);
  531. p = isl_printer_print_str(p, "_dimBlock");
  532. p = print_reverse_list(p, kernel->n_block, kernel->block_dim);
  533. p = isl_printer_print_str(p, ";");
  534. p = isl_printer_end_line(p);
  535. p = print_grid(p, kernel);
  536. p = isl_printer_start_line(p);
  537. p = isl_printer_print_str(p, "kernel");
  538. p = isl_printer_print_int(p, kernel->id);
  539. p = isl_printer_print_str(p, " <<<k");
  540. p = isl_printer_print_int(p, kernel->id);
  541. p = isl_printer_print_str(p, "_dimGrid, k");
  542. p = isl_printer_print_int(p, kernel->id);
  543. p = isl_printer_print_str(p, "_dimBlock>>> (");
  544. p = print_kernel_arguments(p, data->prog, kernel, 0);
  545. p = isl_printer_print_str(p, ");");
  546. p = isl_printer_end_line(p);
  547. p = isl_printer_start_line(p);
  548. p = isl_printer_print_str(p, "cudaCheckKernel();");
  549. p = isl_printer_end_line(p);
  550. p = ppcg_end_block(p);
  551. p = isl_printer_start_line(p);
  552. p = isl_printer_end_line(p);
  553. #if 0
  554. print_kernel(data->prog, kernel, data->cuda);
  555. #endif
  556. return p;
  557. }
  558. static __isl_give isl_printer *print_host_code(__isl_take isl_printer *p,
  559. struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
  560. struct cuda_info *cuda)
  561. {
  562. isl_ast_print_options *print_options;
  563. isl_ctx *ctx = isl_ast_node_get_ctx(tree);
  564. struct print_host_user_data data = { cuda, prog };
  565. print_options = isl_ast_print_options_alloc(ctx);
  566. print_options = isl_ast_print_options_set_print_user(print_options,
  567. &print_host_user, &data);
  568. p = gpu_print_macros(p, tree);
  569. p = isl_ast_node_print(tree, p, print_options);
  570. return p;
  571. }
  572. /* Given a gpu_prog "prog" and the corresponding transformed AST
  573. * "tree", print the entire CUDA code to "p".
  574. * "types" collects the types for which a definition has already
  575. * been printed.
  576. */
  577. static __isl_give isl_printer *print_cuda(__isl_take isl_printer *p,
  578. struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
  579. struct gpu_types *types, void *user)
  580. {
  581. struct cuda_info *cuda = user;
  582. isl_printer *kernel;
  583. kernel = isl_printer_to_file(isl_printer_get_ctx(p), cuda->kernel_c);
  584. kernel = isl_printer_set_output_format(kernel, ISL_FORMAT_C);
  585. kernel = gpu_print_types(kernel, types, prog);
  586. isl_printer_free(kernel);
  587. if (!kernel)
  588. return isl_printer_free(p);
  589. p = print_host_code(p, prog, tree, cuda);
  590. return p;
  591. }
  592. /* Transform the code in the file called "input" by replacing
  593. * all scops by corresponding CUDA code.
  594. * The names of the output files are derived from "input".
  595. *
  596. * We let generate_gpu do all the hard work and then let it call
  597. * us back for printing the AST in print_cuda.
  598. *
  599. * To prepare for this printing, we first open the output files
  600. * and we close them after generate_gpu has finished.
  601. */
  602. int generate_cuda(isl_ctx *ctx, struct ppcg_options *options,
  603. const char *input)
  604. {
  605. struct cuda_info cuda;
  606. int r;
  607. cuda_open_files(&cuda, input);
  608. r = generate_gpu(ctx, input, cuda.host_c, options, &print_cuda, &cuda);
  609. cuda_close_files(&cuda);
  610. return r;
  611. }