vulkan.c 67 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990
  1. /*
  2. * Copyright (c) Lynne
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "avassert.h"
  21. #include "mem.h"
  22. #include "vulkan.h"
  23. #include "libavutil/vulkan_loader.h"
  24. const VkComponentMapping ff_comp_identity_map = {
  25. .r = VK_COMPONENT_SWIZZLE_IDENTITY,
  26. .g = VK_COMPONENT_SWIZZLE_IDENTITY,
  27. .b = VK_COMPONENT_SWIZZLE_IDENTITY,
  28. .a = VK_COMPONENT_SWIZZLE_IDENTITY,
  29. };
  30. /* Converts return values to strings */
  31. const char *ff_vk_ret2str(VkResult res)
  32. {
  33. #define CASE(VAL) case VAL: return #VAL
  34. switch (res) {
  35. CASE(VK_SUCCESS);
  36. CASE(VK_NOT_READY);
  37. CASE(VK_TIMEOUT);
  38. CASE(VK_EVENT_SET);
  39. CASE(VK_EVENT_RESET);
  40. CASE(VK_INCOMPLETE);
  41. CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
  42. CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
  43. CASE(VK_ERROR_INITIALIZATION_FAILED);
  44. CASE(VK_ERROR_DEVICE_LOST);
  45. CASE(VK_ERROR_MEMORY_MAP_FAILED);
  46. CASE(VK_ERROR_LAYER_NOT_PRESENT);
  47. CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
  48. CASE(VK_ERROR_FEATURE_NOT_PRESENT);
  49. CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
  50. CASE(VK_ERROR_TOO_MANY_OBJECTS);
  51. CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
  52. CASE(VK_ERROR_FRAGMENTED_POOL);
  53. CASE(VK_ERROR_UNKNOWN);
  54. CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
  55. CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
  56. CASE(VK_ERROR_FRAGMENTATION);
  57. CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS);
  58. CASE(VK_PIPELINE_COMPILE_REQUIRED);
  59. CASE(VK_ERROR_SURFACE_LOST_KHR);
  60. CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
  61. CASE(VK_SUBOPTIMAL_KHR);
  62. CASE(VK_ERROR_OUT_OF_DATE_KHR);
  63. CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
  64. CASE(VK_ERROR_VALIDATION_FAILED_EXT);
  65. CASE(VK_ERROR_INVALID_SHADER_NV);
  66. CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR);
  67. CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR);
  68. CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
  69. CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
  70. CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
  71. CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
  72. CASE(VK_ERROR_NOT_PERMITTED_KHR);
  73. CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
  74. CASE(VK_THREAD_IDLE_KHR);
  75. CASE(VK_THREAD_DONE_KHR);
  76. CASE(VK_OPERATION_DEFERRED_KHR);
  77. CASE(VK_OPERATION_NOT_DEFERRED_KHR);
  78. default: return "Unknown error";
  79. }
  80. #undef CASE
  81. }
  82. static void load_enabled_qfs(FFVulkanContext *s)
  83. {
  84. s->nb_qfs = 0;
  85. for (int i = 0; i < s->hwctx->nb_qf; i++) {
  86. /* Skip duplicates */
  87. int skip = 0;
  88. for (int j = 0; j < s->nb_qfs; j++) {
  89. if (s->qfs[j] == s->hwctx->qf[i].idx) {
  90. skip = 1;
  91. break;
  92. }
  93. }
  94. if (skip)
  95. continue;
  96. s->qfs[s->nb_qfs++] = s->hwctx->qf[i].idx;
  97. }
  98. }
  99. int ff_vk_load_props(FFVulkanContext *s)
  100. {
  101. FFVulkanFunctions *vk = &s->vkfn;
  102. s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) {
  103. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT,
  104. };
  105. s->optical_flow_props = (VkPhysicalDeviceOpticalFlowPropertiesNV) {
  106. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_PROPERTIES_NV,
  107. .pNext = &s->hprops,
  108. };
  109. s->coop_matrix_props = (VkPhysicalDeviceCooperativeMatrixPropertiesKHR) {
  110. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
  111. .pNext = &s->optical_flow_props,
  112. };
  113. s->subgroup_props = (VkPhysicalDeviceSubgroupSizeControlProperties) {
  114. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES,
  115. .pNext = &s->coop_matrix_props,
  116. };
  117. s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) {
  118. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT,
  119. .pNext = &s->subgroup_props,
  120. };
  121. s->driver_props = (VkPhysicalDeviceDriverProperties) {
  122. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
  123. .pNext = &s->desc_buf_props,
  124. };
  125. s->props = (VkPhysicalDeviceProperties2) {
  126. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
  127. .pNext = &s->driver_props,
  128. };
  129. s->atomic_float_feats = (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT) {
  130. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT,
  131. };
  132. s->feats_12 = (VkPhysicalDeviceVulkan12Features) {
  133. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
  134. .pNext = &s->atomic_float_feats,
  135. };
  136. s->feats = (VkPhysicalDeviceFeatures2) {
  137. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
  138. .pNext = &s->feats_12,
  139. };
  140. vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
  141. vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
  142. vk->GetPhysicalDeviceFeatures2(s->hwctx->phys_dev, &s->feats);
  143. load_enabled_qfs(s);
  144. if (s->qf_props)
  145. return 0;
  146. vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, NULL);
  147. s->qf_props = av_calloc(s->tot_nb_qfs, sizeof(*s->qf_props));
  148. if (!s->qf_props)
  149. return AVERROR(ENOMEM);
  150. s->query_props = av_calloc(s->tot_nb_qfs, sizeof(*s->query_props));
  151. if (!s->qf_props) {
  152. av_freep(&s->qf_props);
  153. return AVERROR(ENOMEM);
  154. }
  155. s->video_props = av_calloc(s->tot_nb_qfs, sizeof(*s->video_props));
  156. if (!s->video_props) {
  157. av_freep(&s->qf_props);
  158. av_freep(&s->query_props);
  159. return AVERROR(ENOMEM);
  160. }
  161. for (uint32_t i = 0; i < s->tot_nb_qfs; i++) {
  162. s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) {
  163. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR,
  164. };
  165. s->video_props[i] = (VkQueueFamilyVideoPropertiesKHR) {
  166. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
  167. .pNext = &s->query_props[i],
  168. };
  169. s->qf_props[i] = (VkQueueFamilyProperties2) {
  170. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
  171. .pNext = &s->video_props[i],
  172. };
  173. }
  174. vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, s->qf_props);
  175. if (s->extensions & FF_VK_EXT_COOP_MATRIX) {
  176. vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev,
  177. &s->coop_mat_props_nb, NULL);
  178. if (s->coop_mat_props_nb) {
  179. s->coop_mat_props = av_malloc_array(s->coop_mat_props_nb,
  180. sizeof(VkCooperativeMatrixPropertiesKHR));
  181. for (int i = 0; i < s->coop_mat_props_nb; i++) {
  182. s->coop_mat_props[i] = (VkCooperativeMatrixPropertiesKHR) {
  183. .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
  184. };
  185. }
  186. vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev,
  187. &s->coop_mat_props_nb,
  188. s->coop_mat_props);
  189. }
  190. }
  191. return 0;
  192. }
  193. static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
  194. {
  195. for (int i = 0; i < s->hwctx->nb_qf; i++) {
  196. if (s->hwctx->qf[i].flags & dev_family) {
  197. *nb = s->hwctx->qf[i].num;
  198. return s->hwctx->qf[i].idx;
  199. }
  200. }
  201. av_assert0(0); /* Should never happen */
  202. }
  203. int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
  204. VkQueueFlagBits dev_family)
  205. {
  206. /* Fill in queue families from context if not done yet */
  207. if (!s->nb_qfs)
  208. load_enabled_qfs(s);
  209. return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues));
  210. }
  211. void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
  212. {
  213. FFVulkanFunctions *vk = &s->vkfn;
  214. for (int i = 0; i < pool->pool_size; i++) {
  215. FFVkExecContext *e = &pool->contexts[i];
  216. if (e->fence) {
  217. vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
  218. vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
  219. }
  220. pthread_mutex_destroy(&e->lock);
  221. ff_vk_exec_discard_deps(s, e);
  222. av_free(e->frame_deps);
  223. av_free(e->buf_deps);
  224. av_free(e->queue_family_dst);
  225. av_free(e->layout_dst);
  226. av_free(e->access_dst);
  227. av_free(e->frame_update);
  228. av_free(e->frame_locked);
  229. av_free(e->sem_sig);
  230. av_free(e->sem_sig_val_dst);
  231. av_free(e->sem_wait);
  232. }
  233. if (pool->cmd_bufs)
  234. vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool,
  235. pool->pool_size, pool->cmd_bufs);
  236. if (pool->cmd_buf_pool)
  237. vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc);
  238. if (pool->query_pool)
  239. vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc);
  240. av_free(pool->query_data);
  241. av_free(pool->cmd_bufs);
  242. av_free(pool->contexts);
  243. }
  244. int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
  245. FFVkExecPool *pool, int nb_contexts,
  246. int nb_queries, VkQueryType query_type, int query_64bit,
  247. const void *query_create_pnext)
  248. {
  249. int err;
  250. VkResult ret;
  251. FFVulkanFunctions *vk = &s->vkfn;
  252. VkCommandPoolCreateInfo cqueue_create;
  253. VkCommandBufferAllocateInfo cbuf_create;
  254. const VkQueryPoolVideoEncodeFeedbackCreateInfoKHR *ef = NULL;
  255. if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) {
  256. ef = ff_vk_find_struct(query_create_pnext,
  257. VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR);
  258. if (!ef)
  259. return AVERROR(EINVAL);
  260. }
  261. /* Create command pool */
  262. cqueue_create = (VkCommandPoolCreateInfo) {
  263. .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
  264. .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
  265. VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
  266. .queueFamilyIndex = qf->queue_family,
  267. };
  268. ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
  269. s->hwctx->alloc, &pool->cmd_buf_pool);
  270. if (ret != VK_SUCCESS) {
  271. av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
  272. ff_vk_ret2str(ret));
  273. err = AVERROR_EXTERNAL;
  274. goto fail;
  275. }
  276. /* Allocate space for command buffers */
  277. pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs));
  278. if (!pool->cmd_bufs) {
  279. err = AVERROR(ENOMEM);
  280. goto fail;
  281. }
  282. /* Allocate command buffer */
  283. cbuf_create = (VkCommandBufferAllocateInfo) {
  284. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
  285. .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
  286. .commandPool = pool->cmd_buf_pool,
  287. .commandBufferCount = nb_contexts,
  288. };
  289. ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create,
  290. pool->cmd_bufs);
  291. if (ret != VK_SUCCESS) {
  292. av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
  293. ff_vk_ret2str(ret));
  294. err = AVERROR_EXTERNAL;
  295. goto fail;
  296. }
  297. /* Query pool */
  298. if (nb_queries) {
  299. VkQueryPoolCreateInfo query_pool_info = {
  300. .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
  301. .pNext = query_create_pnext,
  302. .queryType = query_type,
  303. .queryCount = nb_queries*nb_contexts,
  304. };
  305. ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
  306. s->hwctx->alloc, &pool->query_pool);
  307. if (ret != VK_SUCCESS) {
  308. av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n",
  309. ff_vk_ret2str(ret));
  310. err = AVERROR_EXTERNAL;
  311. goto fail;
  312. }
  313. pool->nb_queries = nb_queries;
  314. pool->query_status_stride = 1 + 1; /* One result, one status by default */
  315. pool->query_results = nb_queries;
  316. pool->query_statuses = nb_queries;
  317. /* Video encode quieries produce two results per query */
  318. if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) {
  319. int nb_results = av_popcount(ef->encodeFeedbackFlags);
  320. pool->query_status_stride = nb_results + 1;
  321. pool->query_results *= nb_results;
  322. } else if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
  323. pool->query_status_stride = 1;
  324. pool->query_results = 0;
  325. }
  326. pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4);
  327. /* Allocate space for the query data */
  328. pool->query_data = av_calloc(nb_contexts, pool->qd_size);
  329. if (!pool->query_data) {
  330. err = AVERROR(ENOMEM);
  331. goto fail;
  332. }
  333. }
  334. /* Allocate space for the contexts */
  335. pool->contexts = av_calloc(nb_contexts, sizeof(*pool->contexts));
  336. if (!pool->contexts) {
  337. err = AVERROR(ENOMEM);
  338. goto fail;
  339. }
  340. pool->pool_size = nb_contexts;
  341. /* Init contexts */
  342. for (int i = 0; i < pool->pool_size; i++) {
  343. FFVkExecContext *e = &pool->contexts[i];
  344. VkFenceCreateInfo fence_create = {
  345. .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
  346. .flags = VK_FENCE_CREATE_SIGNALED_BIT,
  347. };
  348. /* Mutex */
  349. err = pthread_mutex_init(&e->lock, NULL);
  350. if (err != 0)
  351. return AVERROR(err);
  352. /* Fence */
  353. ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc,
  354. &e->fence);
  355. if (ret != VK_SUCCESS) {
  356. av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n",
  357. ff_vk_ret2str(ret));
  358. return AVERROR_EXTERNAL;
  359. }
  360. e->idx = i;
  361. e->parent = pool;
  362. /* Query data */
  363. e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i;
  364. e->query_idx = nb_queries*i;
  365. /* Command buffer */
  366. e->buf = pool->cmd_bufs[i];
  367. /* Queue index distribution */
  368. e->qi = i % qf->nb_queues;
  369. e->qf = qf->queue_family;
  370. vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
  371. e->qi, &e->queue);
  372. }
  373. return 0;
  374. fail:
  375. ff_vk_exec_pool_free(s, pool);
  376. return err;
  377. }
  378. VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e,
  379. void **data, VkQueryResultFlagBits flags)
  380. {
  381. FFVulkanFunctions *vk = &s->vkfn;
  382. const FFVkExecPool *pool = e->parent;
  383. VkQueryResultFlags qf = flags & ~(VK_QUERY_RESULT_64_BIT |
  384. VK_QUERY_RESULT_WITH_STATUS_BIT_KHR);
  385. if (!e->query_data) {
  386. av_log(s, AV_LOG_ERROR, "Requested a query with a NULL query_data pointer!\n");
  387. return VK_INCOMPLETE;
  388. }
  389. qf |= pool->query_64bit ?
  390. VK_QUERY_RESULT_64_BIT : 0x0;
  391. qf |= pool->query_statuses ?
  392. VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
  393. if (data)
  394. *data = e->query_data;
  395. return vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool,
  396. e->query_idx,
  397. pool->nb_queries,
  398. pool->qd_size, e->query_data,
  399. pool->qd_size, qf);
  400. }
  401. FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool)
  402. {
  403. uint32_t idx = pool->idx++;
  404. idx %= pool->pool_size;
  405. return &pool->contexts[idx];
  406. }
  407. void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e)
  408. {
  409. FFVulkanFunctions *vk = &s->vkfn;
  410. pthread_mutex_lock(&e->lock);
  411. vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
  412. ff_vk_exec_discard_deps(s, e);
  413. pthread_mutex_unlock(&e->lock);
  414. }
  415. int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
  416. {
  417. VkResult ret;
  418. FFVulkanFunctions *vk = &s->vkfn;
  419. const FFVkExecPool *pool = e->parent;
  420. VkCommandBufferBeginInfo cmd_start = {
  421. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  422. .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
  423. };
  424. /* Wait for the fence to be signalled */
  425. vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
  426. /* vkResetFences is defined as being host-synchronized */
  427. pthread_mutex_lock(&e->lock);
  428. vk->ResetFences(s->hwctx->act_dev, 1, &e->fence);
  429. pthread_mutex_unlock(&e->lock);
  430. /* Discard queue dependencies */
  431. ff_vk_exec_discard_deps(s, e);
  432. ret = vk->BeginCommandBuffer(e->buf, &cmd_start);
  433. if (ret != VK_SUCCESS) {
  434. av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
  435. ff_vk_ret2str(ret));
  436. return AVERROR_EXTERNAL;
  437. }
  438. if (pool->nb_queries)
  439. vk->CmdResetQueryPool(e->buf, pool->query_pool,
  440. e->query_idx, pool->nb_queries);
  441. return 0;
  442. }
  443. void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
  444. {
  445. for (int j = 0; j < e->nb_buf_deps; j++)
  446. av_buffer_unref(&e->buf_deps[j]);
  447. e->nb_buf_deps = 0;
  448. for (int j = 0; j < e->nb_frame_deps; j++) {
  449. AVFrame *f = e->frame_deps[j];
  450. if (e->frame_locked[j]) {
  451. AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
  452. AVVulkanFramesContext *vkfc = hwfc->hwctx;
  453. AVVkFrame *vkf = (AVVkFrame *)f->data[0];
  454. vkfc->unlock_frame(hwfc, vkf);
  455. e->frame_locked[j] = 0;
  456. }
  457. e->frame_update[j] = 0;
  458. if (f->buf[0])
  459. av_frame_free(&e->frame_deps[j]);
  460. }
  461. e->nb_frame_deps = 0;
  462. e->sem_wait_cnt = 0;
  463. e->sem_sig_cnt = 0;
  464. e->sem_sig_val_dst_cnt = 0;
  465. }
  466. int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
  467. AVBufferRef **deps, int nb_deps, int ref)
  468. {
  469. AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size,
  470. (e->nb_buf_deps + nb_deps) * sizeof(*dst));
  471. if (!dst) {
  472. ff_vk_exec_discard_deps(s, e);
  473. return AVERROR(ENOMEM);
  474. }
  475. e->buf_deps = dst;
  476. for (int i = 0; i < nb_deps; i++) {
  477. e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i];
  478. if (!e->buf_deps[e->nb_buf_deps]) {
  479. ff_vk_exec_discard_deps(s, e);
  480. return AVERROR(ENOMEM);
  481. }
  482. e->nb_buf_deps++;
  483. }
  484. return 0;
  485. }
  486. #define ARR_REALLOC(str, arr, alloc_s, cnt) \
  487. do { \
  488. arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \
  489. if (!arr) { \
  490. ff_vk_exec_discard_deps(s, e); \
  491. return AVERROR(ENOMEM); \
  492. } \
  493. str->arr = arr; \
  494. } while (0)
  495. typedef struct TempSyncCtx {
  496. int nb_sem;
  497. VkSemaphore sem[];
  498. } TempSyncCtx;
  499. static void destroy_tmp_semaphores(void *opaque, uint8_t *data)
  500. {
  501. FFVulkanContext *s = opaque;
  502. FFVulkanFunctions *vk = &s->vkfn;
  503. TempSyncCtx *ts = (TempSyncCtx *)data;
  504. for (int i = 0; i < ts->nb_sem; i++)
  505. vk->DestroySemaphore(s->hwctx->act_dev, ts->sem[i], s->hwctx->alloc);
  506. av_free(ts);
  507. }
  508. int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e,
  509. VkSemaphore *sem, int nb,
  510. VkPipelineStageFlagBits2 stage,
  511. int wait)
  512. {
  513. int err;
  514. size_t buf_size;
  515. AVBufferRef *buf;
  516. TempSyncCtx *ts;
  517. FFVulkanFunctions *vk = &s->vkfn;
  518. /* Do not transfer ownership if we're signalling a binary semaphore,
  519. * since we're probably exporting it. */
  520. if (!wait) {
  521. for (int i = 0; i < nb; i++) {
  522. VkSemaphoreSubmitInfo *sem_sig;
  523. ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
  524. e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) {
  525. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
  526. .semaphore = sem[i],
  527. .stageMask = stage,
  528. };
  529. }
  530. return 0;
  531. }
  532. buf_size = sizeof(int) + sizeof(VkSemaphore)*nb;
  533. ts = av_mallocz(buf_size);
  534. if (!ts) {
  535. err = AVERROR(ENOMEM);
  536. goto fail;
  537. }
  538. memcpy(ts->sem, sem, nb*sizeof(*sem));
  539. ts->nb_sem = nb;
  540. buf = av_buffer_create((uint8_t *)ts, buf_size, destroy_tmp_semaphores, s, 0);
  541. if (!buf) {
  542. av_free(ts);
  543. err = AVERROR(ENOMEM);
  544. goto fail;
  545. }
  546. err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
  547. if (err < 0) {
  548. av_buffer_unref(&buf);
  549. return err;
  550. }
  551. for (int i = 0; i < nb; i++) {
  552. VkSemaphoreSubmitInfo *sem_wait;
  553. ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
  554. e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) {
  555. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
  556. .semaphore = sem[i],
  557. .stageMask = stage,
  558. };
  559. }
  560. return 0;
  561. fail:
  562. for (int i = 0; i < nb; i++)
  563. vk->DestroySemaphore(s->hwctx->act_dev, sem[i], s->hwctx->alloc);
  564. return err;
  565. }
  566. int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
  567. VkPipelineStageFlagBits2 wait_stage,
  568. VkPipelineStageFlagBits2 signal_stage)
  569. {
  570. uint8_t *frame_locked;
  571. uint8_t *frame_update;
  572. AVFrame **frame_deps;
  573. VkImageLayout *layout_dst;
  574. uint32_t *queue_family_dst;
  575. VkAccessFlagBits *access_dst;
  576. AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
  577. AVVulkanFramesContext *vkfc = hwfc->hwctx;
  578. AVVkFrame *vkf = (AVVkFrame *)f->data[0];
  579. int nb_images = ff_vk_count_images(vkf);
  580. /* Don't add duplicates */
  581. for (int i = 0; i < e->nb_frame_deps; i++)
  582. if (e->frame_deps[i]->data[0] == f->data[0])
  583. return 1;
  584. ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps);
  585. ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps);
  586. ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps);
  587. ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps);
  588. ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps);
  589. ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps);
  590. e->frame_deps[e->nb_frame_deps] = f->buf[0] ? av_frame_clone(f) : f;
  591. if (!e->frame_deps[e->nb_frame_deps]) {
  592. ff_vk_exec_discard_deps(s, e);
  593. return AVERROR(ENOMEM);
  594. }
  595. vkfc->lock_frame(hwfc, vkf);
  596. e->frame_locked[e->nb_frame_deps] = 1;
  597. e->frame_update[e->nb_frame_deps] = 0;
  598. e->nb_frame_deps++;
  599. for (int i = 0; i < nb_images; i++) {
  600. VkSemaphoreSubmitInfo *sem_wait;
  601. VkSemaphoreSubmitInfo *sem_sig;
  602. uint64_t **sem_sig_val_dst;
  603. ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
  604. ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
  605. ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
  606. e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) {
  607. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
  608. .semaphore = vkf->sem[i],
  609. .value = vkf->sem_value[i],
  610. .stageMask = wait_stage,
  611. };
  612. e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) {
  613. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
  614. .semaphore = vkf->sem[i],
  615. .value = vkf->sem_value[i] + 1,
  616. .stageMask = signal_stage,
  617. };
  618. e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i];
  619. e->sem_sig_val_dst_cnt++;
  620. }
  621. return 0;
  622. }
  623. void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
  624. VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar)
  625. {
  626. int i;
  627. for (i = 0; i < e->nb_frame_deps; i++)
  628. if (e->frame_deps[i]->data[0] == f->data[0])
  629. break;
  630. av_assert0(i < e->nb_frame_deps);
  631. /* Don't update duplicates */
  632. if (nb_img_bar && !e->frame_update[i])
  633. (*nb_img_bar)++;
  634. e->queue_family_dst[i] = bar->dstQueueFamilyIndex;
  635. e->access_dst[i] = bar->dstAccessMask;
  636. e->layout_dst[i] = bar->newLayout;
  637. e->frame_update[i] = 1;
  638. }
  639. int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
  640. VkSemaphore *dst, uint64_t *dst_val,
  641. AVFrame *f)
  642. {
  643. uint64_t **sem_sig_val_dst;
  644. AVVkFrame *vkf = (AVVkFrame *)f->data[0];
  645. /* Reject unknown frames */
  646. int i;
  647. for (i = 0; i < e->nb_frame_deps; i++)
  648. if (e->frame_deps[i]->data[0] == f->data[0])
  649. break;
  650. if (i == e->nb_frame_deps)
  651. return AVERROR(EINVAL);
  652. ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
  653. *dst = vkf->sem[0];
  654. *dst_val = vkf->sem_value[0];
  655. e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val;
  656. e->sem_sig_val_dst_cnt++;
  657. return 0;
  658. }
  659. int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
  660. {
  661. VkResult ret;
  662. FFVulkanFunctions *vk = &s->vkfn;
  663. VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) {
  664. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
  665. .commandBuffer = e->buf,
  666. };
  667. VkSubmitInfo2 submit_info = (VkSubmitInfo2) {
  668. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
  669. .pCommandBufferInfos = &cmd_buf_info,
  670. .commandBufferInfoCount = 1,
  671. .pWaitSemaphoreInfos = e->sem_wait,
  672. .waitSemaphoreInfoCount = e->sem_wait_cnt,
  673. .pSignalSemaphoreInfos = e->sem_sig,
  674. .signalSemaphoreInfoCount = e->sem_sig_cnt,
  675. };
  676. ret = vk->EndCommandBuffer(e->buf);
  677. if (ret != VK_SUCCESS) {
  678. av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
  679. ff_vk_ret2str(ret));
  680. ff_vk_exec_discard_deps(s, e);
  681. return AVERROR_EXTERNAL;
  682. }
  683. s->hwctx->lock_queue(s->device, e->qf, e->qi);
  684. ret = vk->QueueSubmit2(e->queue, 1, &submit_info, e->fence);
  685. s->hwctx->unlock_queue(s->device, e->qf, e->qi);
  686. if (ret != VK_SUCCESS) {
  687. av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
  688. ff_vk_ret2str(ret));
  689. ff_vk_exec_discard_deps(s, e);
  690. return AVERROR_EXTERNAL;
  691. }
  692. for (int i = 0; i < e->sem_sig_val_dst_cnt; i++)
  693. *e->sem_sig_val_dst[i] += 1;
  694. /* Unlock all frames */
  695. for (int j = 0; j < e->nb_frame_deps; j++) {
  696. if (e->frame_locked[j]) {
  697. AVFrame *f = e->frame_deps[j];
  698. AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
  699. AVVulkanFramesContext *vkfc = hwfc->hwctx;
  700. AVVkFrame *vkf = (AVVkFrame *)f->data[0];
  701. if (e->frame_update[j]) {
  702. int nb_images = ff_vk_count_images(vkf);
  703. for (int i = 0; i < nb_images; i++) {
  704. vkf->layout[i] = e->layout_dst[j];
  705. vkf->access[i] = e->access_dst[j];
  706. vkf->queue_family[i] = e->queue_family_dst[j];
  707. }
  708. }
  709. vkfc->unlock_frame(hwfc, vkf);
  710. e->frame_locked[j] = 0;
  711. }
  712. }
  713. e->had_submission = 1;
  714. return 0;
  715. }
  716. int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
  717. VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
  718. VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
  719. {
  720. VkResult ret;
  721. int index = -1;
  722. FFVulkanFunctions *vk = &s->vkfn;
  723. VkMemoryAllocateInfo alloc_info = {
  724. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  725. .pNext = alloc_extension,
  726. };
  727. /* Align if we need to */
  728. if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
  729. req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
  730. alloc_info.allocationSize = req->size;
  731. /* The vulkan spec requires memory types to be sorted in the "optimal"
  732. * order, so the first matching type we find will be the best/fastest one */
  733. for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
  734. /* The memory type must be supported by the requirements (bitfield) */
  735. if (!(req->memoryTypeBits & (1 << i)))
  736. continue;
  737. /* The memory type flags must include our properties */
  738. if ((req_flags != UINT32_MAX) &&
  739. ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
  740. continue;
  741. /* Found a suitable memory type */
  742. index = i;
  743. break;
  744. }
  745. if (index < 0) {
  746. av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
  747. req_flags);
  748. return AVERROR(EINVAL);
  749. }
  750. alloc_info.memoryTypeIndex = index;
  751. ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
  752. s->hwctx->alloc, mem);
  753. if (ret != VK_SUCCESS)
  754. return AVERROR(ENOMEM);
  755. if (mem_flags)
  756. *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
  757. return 0;
  758. }
  759. int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
  760. void *pNext, void *alloc_pNext,
  761. VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
  762. {
  763. int err;
  764. VkResult ret;
  765. int use_ded_mem;
  766. FFVulkanFunctions *vk = &s->vkfn;
  767. VkBufferCreateInfo buf_spawn = {
  768. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  769. .pNext = pNext,
  770. .usage = usage,
  771. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  772. .size = size, /* Gets FFALIGNED during alloc if host visible
  773. but should be ok */
  774. };
  775. VkMemoryAllocateFlagsInfo alloc_flags = {
  776. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
  777. .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
  778. };
  779. VkBufferMemoryRequirementsInfo2 req_desc = {
  780. .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
  781. };
  782. VkMemoryDedicatedAllocateInfo ded_alloc = {
  783. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  784. .pNext = alloc_pNext,
  785. };
  786. VkMemoryDedicatedRequirements ded_req = {
  787. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  788. };
  789. VkMemoryRequirements2 req = {
  790. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  791. .pNext = &ded_req,
  792. };
  793. ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &buf->buf);
  794. if (ret != VK_SUCCESS) {
  795. av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
  796. ff_vk_ret2str(ret));
  797. return AVERROR_EXTERNAL;
  798. }
  799. req_desc.buffer = buf->buf;
  800. vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
  801. /* In case the implementation prefers/requires dedicated allocation */
  802. use_ded_mem = ded_req.prefersDedicatedAllocation |
  803. ded_req.requiresDedicatedAllocation;
  804. if (use_ded_mem) {
  805. ded_alloc.buffer = buf->buf;
  806. ded_alloc.pNext = alloc_pNext;
  807. alloc_pNext = &ded_alloc;
  808. }
  809. if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
  810. alloc_flags.pNext = alloc_pNext;
  811. alloc_pNext = &alloc_flags;
  812. }
  813. err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext,
  814. &buf->flags, &buf->mem);
  815. if (err)
  816. return err;
  817. ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
  818. if (ret != VK_SUCCESS) {
  819. av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
  820. ff_vk_ret2str(ret));
  821. return AVERROR_EXTERNAL;
  822. }
  823. if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
  824. VkBufferDeviceAddressInfo address_info = {
  825. .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
  826. .buffer = buf->buf,
  827. };
  828. buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info);
  829. }
  830. buf->size = size;
  831. return 0;
  832. }
  833. static void destroy_avvkbuf(void *opaque, uint8_t *data)
  834. {
  835. FFVulkanContext *s = opaque;
  836. FFVkBuffer *buf = (FFVkBuffer *)data;
  837. ff_vk_free_buf(s, buf);
  838. av_free(buf);
  839. }
  840. int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size,
  841. void *pNext, void *alloc_pNext,
  842. VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
  843. {
  844. int err;
  845. AVBufferRef *buf;
  846. FFVkBuffer *vkb = av_mallocz(sizeof(*vkb));
  847. if (!vkb)
  848. return AVERROR(ENOMEM);
  849. err = ff_vk_create_buf(s, vkb, size, pNext, alloc_pNext, usage, flags);
  850. if (err < 0) {
  851. av_free(vkb);
  852. return err;
  853. }
  854. buf = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), destroy_avvkbuf, s, 0);
  855. if (!buf) {
  856. destroy_avvkbuf(s, (uint8_t *)vkb);
  857. return AVERROR(ENOMEM);
  858. }
  859. *ref = buf;
  860. return 0;
  861. }
  862. int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[],
  863. int nb_buffers, int invalidate)
  864. {
  865. VkResult ret;
  866. FFVulkanFunctions *vk = &s->vkfn;
  867. VkMappedMemoryRange inval_list[64];
  868. int inval_count = 0;
  869. for (int i = 0; i < nb_buffers; i++) {
  870. void *dst;
  871. ret = vk->MapMemory(s->hwctx->act_dev, buf[i]->mem, 0,
  872. VK_WHOLE_SIZE, 0, &dst);
  873. if (ret != VK_SUCCESS) {
  874. av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
  875. ff_vk_ret2str(ret));
  876. return AVERROR_EXTERNAL;
  877. }
  878. mem[i] = dst;
  879. }
  880. if (!invalidate)
  881. return 0;
  882. for (int i = 0; i < nb_buffers; i++) {
  883. const VkMappedMemoryRange ival_buf = {
  884. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  885. .memory = buf[i]->mem,
  886. .size = VK_WHOLE_SIZE,
  887. };
  888. if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  889. continue;
  890. inval_list[inval_count++] = ival_buf;
  891. }
  892. if (inval_count) {
  893. ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
  894. inval_list);
  895. if (ret != VK_SUCCESS) {
  896. av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
  897. ff_vk_ret2str(ret));
  898. return AVERROR_EXTERNAL;
  899. }
  900. }
  901. return 0;
  902. }
  903. int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers,
  904. int flush)
  905. {
  906. int err = 0;
  907. VkResult ret;
  908. FFVulkanFunctions *vk = &s->vkfn;
  909. VkMappedMemoryRange flush_list[64];
  910. int flush_count = 0;
  911. if (flush) {
  912. for (int i = 0; i < nb_buffers; i++) {
  913. const VkMappedMemoryRange flush_buf = {
  914. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  915. .memory = buf[i]->mem,
  916. .size = VK_WHOLE_SIZE,
  917. };
  918. if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  919. continue;
  920. flush_list[flush_count++] = flush_buf;
  921. }
  922. }
  923. if (flush_count) {
  924. ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
  925. flush_list);
  926. if (ret != VK_SUCCESS) {
  927. av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  928. ff_vk_ret2str(ret));
  929. err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
  930. }
  931. }
  932. for (int i = 0; i < nb_buffers; i++)
  933. vk->UnmapMemory(s->hwctx->act_dev, buf[i]->mem);
  934. return err;
  935. }
  936. void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
  937. {
  938. FFVulkanFunctions *vk = &s->vkfn;
  939. if (!buf || !s->hwctx)
  940. return;
  941. if (buf->mapped_mem)
  942. ff_vk_unmap_buffer(s, buf, 0);
  943. if (buf->buf != VK_NULL_HANDLE)
  944. vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
  945. if (buf->mem != VK_NULL_HANDLE)
  946. vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
  947. }
  948. static void free_data_buf(void *opaque, uint8_t *data)
  949. {
  950. FFVulkanContext *ctx = opaque;
  951. FFVkBuffer *buf = (FFVkBuffer *)data;
  952. ff_vk_free_buf(ctx, buf);
  953. av_free(data);
  954. }
  955. static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
  956. {
  957. AVBufferRef *ref;
  958. uint8_t *buf = av_mallocz(size);
  959. if (!buf)
  960. return NULL;
  961. ref = av_buffer_create(buf, size, free_data_buf, opaque, 0);
  962. if (!ref)
  963. av_free(buf);
  964. return ref;
  965. }
  966. int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool,
  967. AVBufferRef **buf, VkBufferUsageFlags usage,
  968. void *create_pNext, size_t size,
  969. VkMemoryPropertyFlagBits mem_props)
  970. {
  971. int err;
  972. AVBufferRef *ref;
  973. FFVkBuffer *data;
  974. if (!(*buf_pool)) {
  975. *buf_pool = av_buffer_pool_init2(sizeof(FFVkBuffer), ctx,
  976. alloc_data_buf, NULL);
  977. if (!(*buf_pool))
  978. return AVERROR(ENOMEM);
  979. }
  980. *buf = ref = av_buffer_pool_get(*buf_pool);
  981. if (!ref)
  982. return AVERROR(ENOMEM);
  983. data = (FFVkBuffer *)ref->data;
  984. data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
  985. data->access = VK_ACCESS_2_NONE;
  986. if (data->size >= size)
  987. return 0;
  988. ff_vk_free_buf(ctx, data);
  989. memset(data, 0, sizeof(*data));
  990. av_log(ctx, AV_LOG_DEBUG, "Allocating buffer of %"SIZE_SPECIFIER" bytes for pool %p\n",
  991. size, *buf_pool);
  992. err = ff_vk_create_buf(ctx, data, size,
  993. create_pNext, NULL, usage,
  994. mem_props);
  995. if (err < 0) {
  996. av_buffer_unref(&ref);
  997. return err;
  998. }
  999. if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
  1000. err = ff_vk_map_buffer(ctx, data, &data->mapped_mem, 0);
  1001. if (err < 0) {
  1002. av_buffer_unref(&ref);
  1003. return err;
  1004. }
  1005. }
  1006. return 0;
  1007. }
  1008. int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
  1009. VkShaderStageFlagBits stage)
  1010. {
  1011. VkPushConstantRange *pc;
  1012. pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
  1013. pl->push_consts_num + 1);
  1014. if (!pl->push_consts)
  1015. return AVERROR(ENOMEM);
  1016. pc = &pl->push_consts[pl->push_consts_num++];
  1017. memset(pc, 0, sizeof(*pc));
  1018. pc->stageFlags = stage;
  1019. pc->offset = offset;
  1020. pc->size = size;
  1021. return 0;
  1022. }
  1023. int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
  1024. int unnorm_coords, VkFilter filt)
  1025. {
  1026. VkResult ret;
  1027. FFVulkanFunctions *vk = &s->vkfn;
  1028. VkSamplerCreateInfo sampler_info = {
  1029. .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
  1030. .magFilter = filt,
  1031. .minFilter = sampler_info.magFilter,
  1032. .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
  1033. VK_SAMPLER_MIPMAP_MODE_LINEAR,
  1034. .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
  1035. .addressModeV = sampler_info.addressModeU,
  1036. .addressModeW = sampler_info.addressModeU,
  1037. .anisotropyEnable = VK_FALSE,
  1038. .compareOp = VK_COMPARE_OP_NEVER,
  1039. .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
  1040. .unnormalizedCoordinates = unnorm_coords,
  1041. };
  1042. ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
  1043. s->hwctx->alloc, sampler);
  1044. if (ret != VK_SUCCESS) {
  1045. av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
  1046. ff_vk_ret2str(ret));
  1047. return AVERROR_EXTERNAL;
  1048. }
  1049. return 0;
  1050. }
  1051. int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
  1052. {
  1053. if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA ||
  1054. pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 ||
  1055. pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 ||
  1056. pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 ||
  1057. pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 ||
  1058. pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0 ||
  1059. pix_fmt == AV_PIX_FMT_X2RGB10 || pix_fmt == AV_PIX_FMT_X2BGR10)
  1060. return 1;
  1061. return 0;
  1062. }
  1063. const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
  1064. {
  1065. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
  1066. const int high = desc->comp[0].depth > 8;
  1067. return high ? "rgba16f" : "rgba8";
  1068. }
  1069. typedef struct ImageViewCtx {
  1070. int nb_views;
  1071. VkImageView views[];
  1072. } ImageViewCtx;
  1073. static void destroy_imageviews(void *opaque, uint8_t *data)
  1074. {
  1075. FFVulkanContext *s = opaque;
  1076. FFVulkanFunctions *vk = &s->vkfn;
  1077. ImageViewCtx *iv = (ImageViewCtx *)data;
  1078. for (int i = 0; i < iv->nb_views; i++)
  1079. vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
  1080. av_free(iv);
  1081. }
  1082. int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
  1083. VkImageView views[AV_NUM_DATA_POINTERS],
  1084. AVFrame *f)
  1085. {
  1086. int err;
  1087. VkResult ret;
  1088. AVBufferRef *buf;
  1089. FFVulkanFunctions *vk = &s->vkfn;
  1090. AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
  1091. const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format);
  1092. AVVkFrame *vkf = (AVVkFrame *)f->data[0];
  1093. const int nb_images = ff_vk_count_images(vkf);
  1094. const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1095. const size_t buf_size = sizeof(int) + nb_planes*sizeof(VkImageView);
  1096. ImageViewCtx *iv = av_mallocz(buf_size);
  1097. if (!iv)
  1098. return AVERROR(ENOMEM);
  1099. for (int i = 0; i < nb_planes; i++) {
  1100. VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
  1101. VK_IMAGE_ASPECT_PLANE_0_BIT,
  1102. VK_IMAGE_ASPECT_PLANE_1_BIT,
  1103. VK_IMAGE_ASPECT_PLANE_2_BIT, };
  1104. VkImageViewCreateInfo view_create_info = {
  1105. .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
  1106. .pNext = NULL,
  1107. .image = vkf->img[FFMIN(i, nb_images - 1)],
  1108. .viewType = VK_IMAGE_VIEW_TYPE_2D,
  1109. .format = rep_fmts[i],
  1110. .components = ff_comp_identity_map,
  1111. .subresourceRange = {
  1112. .aspectMask = plane_aspect[(nb_planes != nb_images) +
  1113. i*(nb_planes != nb_images)],
  1114. .levelCount = 1,
  1115. .layerCount = 1,
  1116. },
  1117. };
  1118. ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info,
  1119. s->hwctx->alloc, &iv->views[i]);
  1120. if (ret != VK_SUCCESS) {
  1121. av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
  1122. ff_vk_ret2str(ret));
  1123. err = AVERROR_EXTERNAL;
  1124. goto fail;
  1125. }
  1126. iv->nb_views++;
  1127. }
  1128. buf = av_buffer_create((uint8_t *)iv, buf_size, destroy_imageviews, s, 0);
  1129. if (!buf) {
  1130. err = AVERROR(ENOMEM);
  1131. goto fail;
  1132. }
  1133. /* Add to queue dependencies */
  1134. err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
  1135. if (err < 0)
  1136. av_buffer_unref(&buf);
  1137. memcpy(views, iv->views, nb_planes*sizeof(*views));
  1138. return err;
  1139. fail:
  1140. for (int i = 0; i < iv->nb_views; i++)
  1141. vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
  1142. av_free(iv);
  1143. return err;
  1144. }
  1145. void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
  1146. AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
  1147. VkPipelineStageFlags src_stage,
  1148. VkPipelineStageFlags dst_stage,
  1149. VkAccessFlagBits new_access,
  1150. VkImageLayout new_layout,
  1151. uint32_t new_qf)
  1152. {
  1153. int found = -1;
  1154. AVVkFrame *vkf = (AVVkFrame *)pic->data[0];
  1155. const int nb_images = ff_vk_count_images(vkf);
  1156. for (int i = 0; i < e->nb_frame_deps; i++)
  1157. if (e->frame_deps[i]->data[0] == pic->data[0]) {
  1158. if (e->frame_update[i])
  1159. found = i;
  1160. break;
  1161. }
  1162. for (int i = 0; i < nb_images; i++) {
  1163. bar[*nb_bar] = (VkImageMemoryBarrier2) {
  1164. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
  1165. .pNext = NULL,
  1166. .srcStageMask = src_stage,
  1167. .dstStageMask = dst_stage,
  1168. .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i],
  1169. .dstAccessMask = new_access,
  1170. .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0],
  1171. .newLayout = new_layout,
  1172. .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0],
  1173. .dstQueueFamilyIndex = new_qf,
  1174. .image = vkf->img[i],
  1175. .subresourceRange = (VkImageSubresourceRange) {
  1176. .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  1177. .layerCount = 1,
  1178. .levelCount = 1,
  1179. },
  1180. };
  1181. *nb_bar += 1;
  1182. }
  1183. ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL);
  1184. }
  1185. int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
  1186. VkShaderStageFlags stage, uint32_t required_subgroup_size)
  1187. {
  1188. av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
  1189. shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
  1190. shd->shader.stage = stage;
  1191. if (required_subgroup_size) {
  1192. shd->shader.flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT;
  1193. shd->shader.pNext = &shd->subgroup_info;
  1194. shd->subgroup_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO;
  1195. shd->subgroup_info.requiredSubgroupSize = required_subgroup_size;
  1196. }
  1197. shd->name = name;
  1198. GLSLF(0, #version %i ,460);
  1199. GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
  1200. GLSLC(0, );
  1201. GLSLC(0, #extension GL_EXT_buffer_reference : require );
  1202. GLSLC(0, #extension GL_EXT_buffer_reference2 : require );
  1203. return 0;
  1204. }
  1205. void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z)
  1206. {
  1207. shd->local_size[0] = x;
  1208. shd->local_size[1] = y;
  1209. shd->local_size[2] = z;
  1210. av_bprintf(&shd->src, "layout (local_size_x = %i, "
  1211. "local_size_y = %i, local_size_z = %i) in;\n\n",
  1212. shd->local_size[0], shd->local_size[1], shd->local_size[2]);
  1213. }
  1214. void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio)
  1215. {
  1216. int line = 0;
  1217. const char *p = shd->src.str;
  1218. const char *start = p;
  1219. const size_t len = strlen(p);
  1220. AVBPrint buf;
  1221. av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
  1222. for (int i = 0; i < len; i++) {
  1223. if (p[i] == '\n') {
  1224. av_bprintf(&buf, "%i\t", ++line);
  1225. av_bprint_append_data(&buf, start, &p[i] - start + 1);
  1226. start = &p[i + 1];
  1227. }
  1228. }
  1229. av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str);
  1230. av_bprint_finalize(&buf, NULL);
  1231. }
  1232. void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
  1233. {
  1234. FFVulkanFunctions *vk = &s->vkfn;
  1235. av_bprint_finalize(&shd->src, NULL);
  1236. if (shd->shader.module)
  1237. vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc);
  1238. }
  1239. int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd,
  1240. uint8_t *spirv, size_t spirv_size, const char *entrypoint)
  1241. {
  1242. VkResult ret;
  1243. FFVulkanFunctions *vk = &s->vkfn;
  1244. VkShaderModuleCreateInfo shader_create;
  1245. shd->shader.pName = entrypoint;
  1246. av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n",
  1247. shd->name, spirv_size);
  1248. shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
  1249. shader_create.pNext = NULL;
  1250. shader_create.codeSize = spirv_size;
  1251. shader_create.flags = 0;
  1252. shader_create.pCode = (void *)spirv;
  1253. ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
  1254. &shd->shader.module);
  1255. if (ret != VK_SUCCESS) {
  1256. av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n",
  1257. ff_vk_ret2str(ret));
  1258. return AVERROR_EXTERNAL;
  1259. }
  1260. return 0;
  1261. }
  1262. static const struct descriptor_props {
  1263. size_t struct_size; /* Size of the opaque which updates the descriptor */
  1264. const char *type;
  1265. int is_uniform;
  1266. int mem_quali; /* Can use a memory qualifier */
  1267. int dim_needed; /* Must indicate dimension */
  1268. int buf_content; /* Must indicate buffer contents */
  1269. } descriptor_props[] = {
  1270. [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
  1271. [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
  1272. [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
  1273. [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
  1274. [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
  1275. [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
  1276. [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
  1277. [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
  1278. [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
  1279. [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
  1280. [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
  1281. };
  1282. int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
  1283. FFVkSPIRVShader *shd,
  1284. FFVulkanDescriptorSetBinding *desc, int nb,
  1285. int singular, int print_to_shader_only)
  1286. {
  1287. VkResult ret;
  1288. int has_sampler = 0;
  1289. FFVulkanFunctions *vk = &s->vkfn;
  1290. FFVulkanDescriptorSet *set;
  1291. VkDescriptorSetLayoutCreateInfo desc_create_layout;
  1292. if (print_to_shader_only)
  1293. goto print;
  1294. /* Actual layout allocated for the pipeline */
  1295. set = av_realloc_array(pl->desc_set, sizeof(*pl->desc_set),
  1296. pl->nb_descriptor_sets + 1);
  1297. if (!set)
  1298. return AVERROR(ENOMEM);
  1299. pl->desc_set = set;
  1300. set = &set[pl->nb_descriptor_sets];
  1301. memset(set, 0, sizeof(*set));
  1302. set->binding = av_calloc(nb, sizeof(*set->binding));
  1303. if (!set->binding)
  1304. return AVERROR(ENOMEM);
  1305. set->binding_offset = av_calloc(nb, sizeof(*set->binding_offset));
  1306. if (!set->binding_offset) {
  1307. av_freep(&set->binding);
  1308. return AVERROR(ENOMEM);
  1309. }
  1310. desc_create_layout = (VkDescriptorSetLayoutCreateInfo) {
  1311. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
  1312. .bindingCount = nb,
  1313. .pBindings = set->binding,
  1314. .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
  1315. };
  1316. for (int i = 0; i < nb; i++) {
  1317. set->binding[i].binding = i;
  1318. set->binding[i].descriptorType = desc[i].type;
  1319. set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
  1320. set->binding[i].stageFlags = desc[i].stages;
  1321. set->binding[i].pImmutableSamplers = desc[i].samplers;
  1322. if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
  1323. desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
  1324. has_sampler |= 1;
  1325. }
  1326. set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
  1327. VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
  1328. if (has_sampler)
  1329. set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT;
  1330. ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
  1331. s->hwctx->alloc, &set->layout);
  1332. if (ret != VK_SUCCESS) {
  1333. av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s",
  1334. ff_vk_ret2str(ret));
  1335. return AVERROR_EXTERNAL;
  1336. }
  1337. vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, set->layout, &set->layout_size);
  1338. set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment);
  1339. for (int i = 0; i < nb; i++)
  1340. vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, set->layout,
  1341. i, &set->binding_offset[i]);
  1342. set->singular = singular;
  1343. set->nb_bindings = nb;
  1344. pl->nb_descriptor_sets++;
  1345. print:
  1346. /* Write shader info */
  1347. for (int i = 0; i < nb; i++) {
  1348. const struct descriptor_props *prop = &descriptor_props[desc[i].type];
  1349. GLSLA("layout (set = %i, binding = %i", pl->nb_descriptor_sets - 1, i);
  1350. if (desc[i].mem_layout)
  1351. GLSLA(", %s", desc[i].mem_layout);
  1352. GLSLA(")");
  1353. if (prop->is_uniform)
  1354. GLSLA(" uniform");
  1355. if (prop->mem_quali && desc[i].mem_quali)
  1356. GLSLA(" %s", desc[i].mem_quali);
  1357. if (prop->type)
  1358. GLSLA(" %s", prop->type);
  1359. if (prop->dim_needed)
  1360. GLSLA("%iD", desc[i].dimensions);
  1361. GLSLA(" %s", desc[i].name);
  1362. if (prop->buf_content)
  1363. GLSLA(" {\n %s\n}", desc[i].buf_content);
  1364. else if (desc[i].elems > 0)
  1365. GLSLA("[%i]", desc[i].elems);
  1366. GLSLA(";");
  1367. GLSLA("\n");
  1368. }
  1369. GLSLA("\n");
  1370. return 0;
  1371. }
  1372. int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
  1373. FFVulkanPipeline *pl)
  1374. {
  1375. int err;
  1376. pl->desc_bind = av_calloc(pl->nb_descriptor_sets, sizeof(*pl->desc_bind));
  1377. if (!pl->desc_bind)
  1378. return AVERROR(ENOMEM);
  1379. pl->bound_buffer_indices = av_calloc(pl->nb_descriptor_sets,
  1380. sizeof(*pl->bound_buffer_indices));
  1381. if (!pl->bound_buffer_indices)
  1382. return AVERROR(ENOMEM);
  1383. for (int i = 0; i < pl->nb_descriptor_sets; i++) {
  1384. FFVulkanDescriptorSet *set = &pl->desc_set[i];
  1385. int nb = set->singular ? 1 : pool->pool_size;
  1386. err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb,
  1387. NULL, NULL, set->usage,
  1388. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
  1389. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
  1390. VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
  1391. if (err < 0)
  1392. return err;
  1393. err = ff_vk_map_buffer(s, &set->buf, &set->desc_mem, 0);
  1394. if (err < 0)
  1395. return err;
  1396. pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) {
  1397. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT,
  1398. .usage = set->usage,
  1399. .address = set->buf.address,
  1400. };
  1401. pl->bound_buffer_indices[i] = i;
  1402. }
  1403. return 0;
  1404. }
  1405. static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e,
  1406. FFVulkanDescriptorSet *set,
  1407. int bind_idx, int array_idx,
  1408. VkDescriptorGetInfoEXT *desc_get_info,
  1409. size_t desc_size)
  1410. {
  1411. FFVulkanFunctions *vk = &s->vkfn;
  1412. const size_t exec_offset = set->singular ? 0 : set->aligned_size*e->idx;
  1413. void *desc = set->desc_mem + /* Base */
  1414. exec_offset + /* Execution context */
  1415. set->binding_offset[bind_idx] + /* Descriptor binding */
  1416. array_idx*desc_size; /* Array position */
  1417. vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc);
  1418. }
  1419. static int vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl,
  1420. FFVkExecContext *e, int set, int bind, int offs,
  1421. VkImageView view, VkImageLayout layout,
  1422. VkSampler sampler)
  1423. {
  1424. FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
  1425. VkDescriptorGetInfoEXT desc_get_info = {
  1426. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
  1427. .type = desc_set->binding[bind].descriptorType,
  1428. };
  1429. VkDescriptorImageInfo desc_img_info = {
  1430. .imageView = view,
  1431. .sampler = sampler,
  1432. .imageLayout = layout,
  1433. };
  1434. size_t desc_size;
  1435. switch (desc_get_info.type) {
  1436. case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
  1437. desc_get_info.data.pSampledImage = &desc_img_info;
  1438. desc_size = s->desc_buf_props.sampledImageDescriptorSize;
  1439. break;
  1440. case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
  1441. desc_get_info.data.pStorageImage = &desc_img_info;
  1442. desc_size = s->desc_buf_props.storageImageDescriptorSize;
  1443. break;
  1444. case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
  1445. desc_get_info.data.pInputAttachmentImage = &desc_img_info;
  1446. desc_size = s->desc_buf_props.inputAttachmentDescriptorSize;
  1447. break;
  1448. case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
  1449. desc_get_info.data.pCombinedImageSampler = &desc_img_info;
  1450. desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize;
  1451. break;
  1452. default:
  1453. av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
  1454. set, bind, desc_get_info.type);
  1455. return AVERROR(EINVAL);
  1456. break;
  1457. };
  1458. update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size);
  1459. return 0;
  1460. }
  1461. int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl,
  1462. FFVkExecContext *e, int set, int bind, int offs,
  1463. VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt)
  1464. {
  1465. FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
  1466. VkDescriptorGetInfoEXT desc_get_info = {
  1467. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
  1468. .type = desc_set->binding[bind].descriptorType,
  1469. };
  1470. VkDescriptorAddressInfoEXT desc_buf_info = {
  1471. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT,
  1472. .address = addr,
  1473. .range = len,
  1474. .format = fmt,
  1475. };
  1476. size_t desc_size;
  1477. switch (desc_get_info.type) {
  1478. case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
  1479. desc_get_info.data.pUniformBuffer = &desc_buf_info;
  1480. desc_size = s->desc_buf_props.uniformBufferDescriptorSize;
  1481. break;
  1482. case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
  1483. desc_get_info.data.pStorageBuffer = &desc_buf_info;
  1484. desc_size = s->desc_buf_props.storageBufferDescriptorSize;
  1485. break;
  1486. case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
  1487. desc_get_info.data.pUniformTexelBuffer = &desc_buf_info;
  1488. desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize;
  1489. break;
  1490. case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
  1491. desc_get_info.data.pStorageTexelBuffer = &desc_buf_info;
  1492. desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize;
  1493. break;
  1494. default:
  1495. av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
  1496. set, bind, desc_get_info.type);
  1497. return AVERROR(EINVAL);
  1498. break;
  1499. };
  1500. update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size);
  1501. return 0;
  1502. }
  1503. void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl,
  1504. FFVkExecContext *e, AVFrame *f,
  1505. VkImageView *views, int set, int binding,
  1506. VkImageLayout layout, VkSampler sampler)
  1507. {
  1508. AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
  1509. const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1510. for (int i = 0; i < nb_planes; i++)
  1511. vk_set_descriptor_image(s, pl, e, set, binding, i,
  1512. views[i], layout, sampler);
  1513. }
  1514. void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
  1515. FFVulkanPipeline *pl,
  1516. VkShaderStageFlagBits stage,
  1517. int offset, size_t size, void *src)
  1518. {
  1519. FFVulkanFunctions *vk = &s->vkfn;
  1520. vk->CmdPushConstants(e->buf, pl->pipeline_layout,
  1521. stage, offset, size, src);
  1522. }
  1523. static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
  1524. {
  1525. VkResult ret;
  1526. FFVulkanFunctions *vk = &s->vkfn;
  1527. VkPipelineLayoutCreateInfo pipeline_layout_info;
  1528. VkDescriptorSetLayout *desc_layouts = av_malloc(pl->nb_descriptor_sets*
  1529. sizeof(desc_layouts));
  1530. if (!desc_layouts)
  1531. return AVERROR(ENOMEM);
  1532. for (int i = 0; i < pl->nb_descriptor_sets; i++)
  1533. desc_layouts[i] = pl->desc_set[i].layout;
  1534. /* Finally create the pipeline layout */
  1535. pipeline_layout_info = (VkPipelineLayoutCreateInfo) {
  1536. .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
  1537. .pSetLayouts = desc_layouts,
  1538. .setLayoutCount = pl->nb_descriptor_sets,
  1539. .pushConstantRangeCount = pl->push_consts_num,
  1540. .pPushConstantRanges = pl->push_consts,
  1541. };
  1542. ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info,
  1543. s->hwctx->alloc, &pl->pipeline_layout);
  1544. av_free(desc_layouts);
  1545. if (ret != VK_SUCCESS) {
  1546. av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
  1547. ff_vk_ret2str(ret));
  1548. return AVERROR_EXTERNAL;
  1549. }
  1550. return 0;
  1551. }
  1552. int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
  1553. FFVkSPIRVShader *shd)
  1554. {
  1555. int err;
  1556. VkResult ret;
  1557. FFVulkanFunctions *vk = &s->vkfn;
  1558. VkComputePipelineCreateInfo pipeline_create_info;
  1559. err = init_pipeline_layout(s, pl);
  1560. if (err < 0)
  1561. return err;
  1562. pipeline_create_info = (VkComputePipelineCreateInfo) {
  1563. .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
  1564. .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
  1565. .layout = pl->pipeline_layout,
  1566. .stage = shd->shader,
  1567. };
  1568. ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1,
  1569. &pipeline_create_info,
  1570. s->hwctx->alloc, &pl->pipeline);
  1571. if (ret != VK_SUCCESS) {
  1572. av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
  1573. ff_vk_ret2str(ret));
  1574. return AVERROR_EXTERNAL;
  1575. }
  1576. pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
  1577. pl->wg_size[0] = shd->local_size[0];
  1578. pl->wg_size[1] = shd->local_size[1];
  1579. pl->wg_size[2] = shd->local_size[2];
  1580. return 0;
  1581. }
  1582. void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
  1583. FFVulkanPipeline *pl)
  1584. {
  1585. FFVulkanFunctions *vk = &s->vkfn;
  1586. VkDeviceSize offsets[1024];
  1587. /* Bind pipeline */
  1588. vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
  1589. if (pl->nb_descriptor_sets) {
  1590. for (int i = 0; i < pl->nb_descriptor_sets; i++)
  1591. offsets[i] = pl->desc_set[i].singular ? 0 : pl->desc_set[i].aligned_size*e->idx;
  1592. /* Bind descriptor buffers */
  1593. vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind);
  1594. /* Binding offsets */
  1595. vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout,
  1596. 0, pl->nb_descriptor_sets,
  1597. pl->bound_buffer_indices, offsets);
  1598. }
  1599. }
  1600. void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
  1601. {
  1602. FFVulkanFunctions *vk = &s->vkfn;
  1603. if (pl->pipeline)
  1604. vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
  1605. if (pl->pipeline_layout)
  1606. vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
  1607. s->hwctx->alloc);
  1608. for (int i = 0; i < pl->nb_descriptor_sets; i++) {
  1609. FFVulkanDescriptorSet *set = &pl->desc_set[i];
  1610. if (set->buf.mem)
  1611. ff_vk_unmap_buffer(s, &set->buf, 0);
  1612. ff_vk_free_buf(s, &set->buf);
  1613. if (set->layout)
  1614. vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, set->layout,
  1615. s->hwctx->alloc);
  1616. av_free(set->binding);
  1617. av_free(set->binding_offset);
  1618. }
  1619. av_freep(&pl->desc_set);
  1620. av_freep(&pl->desc_bind);
  1621. av_freep(&pl->bound_buffer_indices);
  1622. av_freep(&pl->push_consts);
  1623. pl->push_consts_num = 0;
  1624. }
  1625. void ff_vk_uninit(FFVulkanContext *s)
  1626. {
  1627. av_freep(&s->query_props);
  1628. av_freep(&s->qf_props);
  1629. av_freep(&s->video_props);
  1630. av_freep(&s->coop_mat_props);
  1631. av_buffer_unref(&s->device_ref);
  1632. av_buffer_unref(&s->frames_ref);
  1633. }
  1634. int ff_vk_init(FFVulkanContext *s, void *log_parent,
  1635. AVBufferRef *device_ref, AVBufferRef *frames_ref)
  1636. {
  1637. int err;
  1638. static const AVClass vulkan_context_class = {
  1639. .class_name = "vk",
  1640. .version = LIBAVUTIL_VERSION_INT,
  1641. .parent_log_context_offset = offsetof(FFVulkanContext, log_parent),
  1642. };
  1643. memset(s, 0, sizeof(*s));
  1644. s->log_parent = log_parent;
  1645. s->class = &vulkan_context_class;
  1646. if (frames_ref) {
  1647. s->frames_ref = av_buffer_ref(frames_ref);
  1648. if (!s->frames_ref)
  1649. return AVERROR(ENOMEM);
  1650. s->frames = (AVHWFramesContext *)s->frames_ref->data;
  1651. s->hwfc = s->frames->hwctx;
  1652. device_ref = s->frames->device_ref;
  1653. }
  1654. s->device_ref = av_buffer_ref(device_ref);
  1655. if (!s->device_ref) {
  1656. ff_vk_uninit(s);
  1657. return AVERROR(ENOMEM);
  1658. }
  1659. s->device = (AVHWDeviceContext *)s->device_ref->data;
  1660. s->hwctx = s->device->hwctx;
  1661. s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions,
  1662. s->hwctx->nb_enabled_dev_extensions);
  1663. err = ff_vk_load_functions(s->device, &s->vkfn, s->extensions, 1, 1);
  1664. if (err < 0) {
  1665. ff_vk_uninit(s);
  1666. return err;
  1667. }
  1668. err = ff_vk_load_props(s);
  1669. if (err < 0) {
  1670. ff_vk_uninit(s);
  1671. return err;
  1672. }
  1673. return 0;
  1674. }