vulkan.c 91 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679
  1. /*
  2. * Copyright (c) Lynne
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "avassert.h"
  21. #include "mem.h"
  22. #include "vulkan.h"
  23. #include "libavutil/vulkan_loader.h"
  24. const VkComponentMapping ff_comp_identity_map = {
  25. .r = VK_COMPONENT_SWIZZLE_IDENTITY,
  26. .g = VK_COMPONENT_SWIZZLE_IDENTITY,
  27. .b = VK_COMPONENT_SWIZZLE_IDENTITY,
  28. .a = VK_COMPONENT_SWIZZLE_IDENTITY,
  29. };
  30. /* Converts return values to strings */
  31. const char *ff_vk_ret2str(VkResult res)
  32. {
  33. #define CASE(VAL) case VAL: return #VAL
  34. switch (res) {
  35. CASE(VK_SUCCESS);
  36. CASE(VK_NOT_READY);
  37. CASE(VK_TIMEOUT);
  38. CASE(VK_EVENT_SET);
  39. CASE(VK_EVENT_RESET);
  40. CASE(VK_INCOMPLETE);
  41. CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
  42. CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
  43. CASE(VK_ERROR_INITIALIZATION_FAILED);
  44. CASE(VK_ERROR_DEVICE_LOST);
  45. CASE(VK_ERROR_MEMORY_MAP_FAILED);
  46. CASE(VK_ERROR_LAYER_NOT_PRESENT);
  47. CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
  48. CASE(VK_ERROR_FEATURE_NOT_PRESENT);
  49. CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
  50. CASE(VK_ERROR_TOO_MANY_OBJECTS);
  51. CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
  52. CASE(VK_ERROR_FRAGMENTED_POOL);
  53. CASE(VK_ERROR_UNKNOWN);
  54. CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
  55. CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
  56. CASE(VK_ERROR_FRAGMENTATION);
  57. CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS);
  58. CASE(VK_PIPELINE_COMPILE_REQUIRED);
  59. CASE(VK_ERROR_SURFACE_LOST_KHR);
  60. CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
  61. CASE(VK_SUBOPTIMAL_KHR);
  62. CASE(VK_ERROR_OUT_OF_DATE_KHR);
  63. CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
  64. CASE(VK_ERROR_VALIDATION_FAILED_EXT);
  65. CASE(VK_ERROR_INVALID_SHADER_NV);
  66. CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR);
  67. CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR);
  68. CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
  69. CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
  70. CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
  71. CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
  72. CASE(VK_ERROR_NOT_PERMITTED_KHR);
  73. CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
  74. CASE(VK_THREAD_IDLE_KHR);
  75. CASE(VK_THREAD_DONE_KHR);
  76. CASE(VK_OPERATION_DEFERRED_KHR);
  77. CASE(VK_OPERATION_NOT_DEFERRED_KHR);
  78. default: return "Unknown error";
  79. }
  80. #undef CASE
  81. }
  82. static void load_enabled_qfs(FFVulkanContext *s)
  83. {
  84. s->nb_qfs = 0;
  85. for (int i = 0; i < s->hwctx->nb_qf; i++) {
  86. /* Skip duplicates */
  87. int skip = 0;
  88. for (int j = 0; j < s->nb_qfs; j++) {
  89. if (s->qfs[j] == s->hwctx->qf[i].idx) {
  90. skip = 1;
  91. break;
  92. }
  93. }
  94. if (skip)
  95. continue;
  96. s->qfs[s->nb_qfs++] = s->hwctx->qf[i].idx;
  97. }
  98. }
  99. int ff_vk_load_props(FFVulkanContext *s)
  100. {
  101. FFVulkanFunctions *vk = &s->vkfn;
  102. s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) {
  103. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT,
  104. };
  105. s->optical_flow_props = (VkPhysicalDeviceOpticalFlowPropertiesNV) {
  106. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_PROPERTIES_NV,
  107. .pNext = &s->hprops,
  108. };
  109. s->coop_matrix_props = (VkPhysicalDeviceCooperativeMatrixPropertiesKHR) {
  110. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
  111. .pNext = &s->optical_flow_props,
  112. };
  113. s->subgroup_props = (VkPhysicalDeviceSubgroupSizeControlProperties) {
  114. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES,
  115. .pNext = &s->coop_matrix_props,
  116. };
  117. s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) {
  118. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT,
  119. .pNext = &s->subgroup_props,
  120. };
  121. s->driver_props = (VkPhysicalDeviceDriverProperties) {
  122. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
  123. .pNext = &s->desc_buf_props,
  124. };
  125. s->props_11 = (VkPhysicalDeviceVulkan11Properties) {
  126. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
  127. .pNext = &s->driver_props,
  128. };
  129. s->props = (VkPhysicalDeviceProperties2) {
  130. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
  131. .pNext = &s->props_11,
  132. };
  133. s->atomic_float_feats = (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT) {
  134. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT,
  135. };
  136. s->feats_12 = (VkPhysicalDeviceVulkan12Features) {
  137. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
  138. .pNext = &s->atomic_float_feats,
  139. };
  140. s->feats = (VkPhysicalDeviceFeatures2) {
  141. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
  142. .pNext = &s->feats_12,
  143. };
  144. vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
  145. vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
  146. vk->GetPhysicalDeviceFeatures2(s->hwctx->phys_dev, &s->feats);
  147. load_enabled_qfs(s);
  148. if (s->qf_props)
  149. return 0;
  150. vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, NULL);
  151. s->qf_props = av_calloc(s->tot_nb_qfs, sizeof(*s->qf_props));
  152. if (!s->qf_props)
  153. return AVERROR(ENOMEM);
  154. s->query_props = av_calloc(s->tot_nb_qfs, sizeof(*s->query_props));
  155. if (!s->qf_props) {
  156. av_freep(&s->qf_props);
  157. return AVERROR(ENOMEM);
  158. }
  159. s->video_props = av_calloc(s->tot_nb_qfs, sizeof(*s->video_props));
  160. if (!s->video_props) {
  161. av_freep(&s->qf_props);
  162. av_freep(&s->query_props);
  163. return AVERROR(ENOMEM);
  164. }
  165. for (uint32_t i = 0; i < s->tot_nb_qfs; i++) {
  166. s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) {
  167. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR,
  168. };
  169. s->video_props[i] = (VkQueueFamilyVideoPropertiesKHR) {
  170. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
  171. .pNext = &s->query_props[i],
  172. };
  173. s->qf_props[i] = (VkQueueFamilyProperties2) {
  174. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
  175. .pNext = &s->video_props[i],
  176. };
  177. }
  178. vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, s->qf_props);
  179. if (s->extensions & FF_VK_EXT_COOP_MATRIX) {
  180. vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev,
  181. &s->coop_mat_props_nb, NULL);
  182. if (s->coop_mat_props_nb) {
  183. s->coop_mat_props = av_malloc_array(s->coop_mat_props_nb,
  184. sizeof(VkCooperativeMatrixPropertiesKHR));
  185. for (int i = 0; i < s->coop_mat_props_nb; i++) {
  186. s->coop_mat_props[i] = (VkCooperativeMatrixPropertiesKHR) {
  187. .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
  188. };
  189. }
  190. vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev,
  191. &s->coop_mat_props_nb,
  192. s->coop_mat_props);
  193. }
  194. }
  195. return 0;
  196. }
  197. static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
  198. {
  199. for (int i = 0; i < s->hwctx->nb_qf; i++) {
  200. if (s->hwctx->qf[i].flags & dev_family) {
  201. *nb = s->hwctx->qf[i].num;
  202. return s->hwctx->qf[i].idx;
  203. }
  204. }
  205. av_assert0(0); /* Should never happen */
  206. }
  207. int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
  208. VkQueueFlagBits dev_family)
  209. {
  210. /* Fill in queue families from context if not done yet */
  211. if (!s->nb_qfs)
  212. load_enabled_qfs(s);
  213. return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues));
  214. }
  215. void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
  216. {
  217. FFVulkanFunctions *vk = &s->vkfn;
  218. for (int i = 0; i < pool->pool_size; i++) {
  219. FFVkExecContext *e = &pool->contexts[i];
  220. if (e->fence) {
  221. vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
  222. vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
  223. }
  224. pthread_mutex_destroy(&e->lock);
  225. ff_vk_exec_discard_deps(s, e);
  226. av_free(e->frame_deps);
  227. av_free(e->sw_frame_deps);
  228. av_free(e->buf_deps);
  229. av_free(e->queue_family_dst);
  230. av_free(e->layout_dst);
  231. av_free(e->access_dst);
  232. av_free(e->frame_update);
  233. av_free(e->frame_locked);
  234. av_free(e->sem_sig);
  235. av_free(e->sem_sig_val_dst);
  236. av_free(e->sem_wait);
  237. }
  238. /* Free shader-specific data */
  239. for (int i = 0; i < pool->nb_reg_shd; i++) {
  240. FFVulkanShaderData *sd = &pool->reg_shd[i];
  241. if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
  242. for (int j = 0; j < sd->nb_descriptor_sets; j++) {
  243. FFVulkanDescriptorSetData *set_data = &sd->desc_set_buf[j];
  244. if (set_data->buf.mem)
  245. ff_vk_unmap_buffer(s, &set_data->buf, 0);
  246. ff_vk_free_buf(s, &set_data->buf);
  247. }
  248. }
  249. if (sd->desc_pool)
  250. vk->DestroyDescriptorPool(s->hwctx->act_dev, sd->desc_pool,
  251. s->hwctx->alloc);
  252. av_freep(&sd->desc_set_buf);
  253. av_freep(&sd->desc_bind);
  254. av_freep(&sd->desc_sets);
  255. }
  256. if (pool->cmd_bufs)
  257. vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool,
  258. pool->pool_size, pool->cmd_bufs);
  259. if (pool->cmd_buf_pool)
  260. vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc);
  261. if (pool->query_pool)
  262. vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc);
  263. av_free(pool->query_data);
  264. av_free(pool->cmd_bufs);
  265. av_free(pool->contexts);
  266. }
  267. int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
  268. FFVkExecPool *pool, int nb_contexts,
  269. int nb_queries, VkQueryType query_type, int query_64bit,
  270. const void *query_create_pnext)
  271. {
  272. int err;
  273. VkResult ret;
  274. FFVulkanFunctions *vk = &s->vkfn;
  275. VkCommandPoolCreateInfo cqueue_create;
  276. VkCommandBufferAllocateInfo cbuf_create;
  277. const VkQueryPoolVideoEncodeFeedbackCreateInfoKHR *ef = NULL;
  278. if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) {
  279. ef = ff_vk_find_struct(query_create_pnext,
  280. VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR);
  281. if (!ef)
  282. return AVERROR(EINVAL);
  283. }
  284. /* Create command pool */
  285. cqueue_create = (VkCommandPoolCreateInfo) {
  286. .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
  287. .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
  288. VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
  289. .queueFamilyIndex = qf->queue_family,
  290. };
  291. ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
  292. s->hwctx->alloc, &pool->cmd_buf_pool);
  293. if (ret != VK_SUCCESS) {
  294. av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
  295. ff_vk_ret2str(ret));
  296. err = AVERROR_EXTERNAL;
  297. goto fail;
  298. }
  299. /* Allocate space for command buffers */
  300. pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs));
  301. if (!pool->cmd_bufs) {
  302. err = AVERROR(ENOMEM);
  303. goto fail;
  304. }
  305. /* Allocate command buffer */
  306. cbuf_create = (VkCommandBufferAllocateInfo) {
  307. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
  308. .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
  309. .commandPool = pool->cmd_buf_pool,
  310. .commandBufferCount = nb_contexts,
  311. };
  312. ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create,
  313. pool->cmd_bufs);
  314. if (ret != VK_SUCCESS) {
  315. av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
  316. ff_vk_ret2str(ret));
  317. err = AVERROR_EXTERNAL;
  318. goto fail;
  319. }
  320. /* Query pool */
  321. if (nb_queries) {
  322. VkQueryPoolCreateInfo query_pool_info = {
  323. .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
  324. .pNext = query_create_pnext,
  325. .queryType = query_type,
  326. .queryCount = nb_queries*nb_contexts,
  327. };
  328. ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
  329. s->hwctx->alloc, &pool->query_pool);
  330. if (ret != VK_SUCCESS) {
  331. av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n",
  332. ff_vk_ret2str(ret));
  333. err = AVERROR_EXTERNAL;
  334. goto fail;
  335. }
  336. pool->nb_queries = nb_queries;
  337. pool->query_status_stride = 1 + 1; /* One result, one status by default */
  338. pool->query_results = nb_queries;
  339. pool->query_statuses = nb_queries;
  340. /* Video encode quieries produce two results per query */
  341. if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) {
  342. int nb_results = av_popcount(ef->encodeFeedbackFlags);
  343. pool->query_status_stride = nb_results + 1;
  344. pool->query_results *= nb_results;
  345. } else if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
  346. pool->query_status_stride = 1;
  347. pool->query_results = 0;
  348. }
  349. pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4);
  350. /* Allocate space for the query data */
  351. pool->query_data = av_calloc(nb_contexts, pool->qd_size);
  352. if (!pool->query_data) {
  353. err = AVERROR(ENOMEM);
  354. goto fail;
  355. }
  356. }
  357. /* Allocate space for the contexts */
  358. pool->contexts = av_calloc(nb_contexts, sizeof(*pool->contexts));
  359. if (!pool->contexts) {
  360. err = AVERROR(ENOMEM);
  361. goto fail;
  362. }
  363. pool->pool_size = nb_contexts;
  364. /* Init contexts */
  365. for (int i = 0; i < pool->pool_size; i++) {
  366. FFVkExecContext *e = &pool->contexts[i];
  367. VkFenceCreateInfo fence_create = {
  368. .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
  369. .flags = VK_FENCE_CREATE_SIGNALED_BIT,
  370. };
  371. /* Mutex */
  372. err = pthread_mutex_init(&e->lock, NULL);
  373. if (err != 0)
  374. return AVERROR(err);
  375. /* Fence */
  376. ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc,
  377. &e->fence);
  378. if (ret != VK_SUCCESS) {
  379. av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n",
  380. ff_vk_ret2str(ret));
  381. return AVERROR_EXTERNAL;
  382. }
  383. e->idx = i;
  384. e->parent = pool;
  385. /* Query data */
  386. e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i;
  387. e->query_idx = nb_queries*i;
  388. /* Command buffer */
  389. e->buf = pool->cmd_bufs[i];
  390. /* Queue index distribution */
  391. e->qi = i % qf->nb_queues;
  392. e->qf = qf->queue_family;
  393. vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
  394. e->qi, &e->queue);
  395. }
  396. return 0;
  397. fail:
  398. ff_vk_exec_pool_free(s, pool);
  399. return err;
  400. }
  401. VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e,
  402. void **data, VkQueryResultFlagBits flags)
  403. {
  404. FFVulkanFunctions *vk = &s->vkfn;
  405. const FFVkExecPool *pool = e->parent;
  406. VkQueryResultFlags qf = flags & ~(VK_QUERY_RESULT_64_BIT |
  407. VK_QUERY_RESULT_WITH_STATUS_BIT_KHR);
  408. if (!e->query_data) {
  409. av_log(s, AV_LOG_ERROR, "Requested a query with a NULL query_data pointer!\n");
  410. return VK_INCOMPLETE;
  411. }
  412. qf |= pool->query_64bit ?
  413. VK_QUERY_RESULT_64_BIT : 0x0;
  414. qf |= pool->query_statuses ?
  415. VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
  416. if (data)
  417. *data = e->query_data;
  418. return vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool,
  419. e->query_idx,
  420. pool->nb_queries,
  421. pool->qd_size, e->query_data,
  422. pool->qd_size, qf);
  423. }
  424. FFVkExecContext *ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool)
  425. {
  426. FFVulkanFunctions *vk = &s->vkfn;
  427. FFVkExecContext *e = &pool->contexts[pool->idx];
  428. /* Check if last submission has already finished.
  429. * If so, don't waste resources and reuse the same buffer. */
  430. if (vk->GetFenceStatus(s->hwctx->act_dev, e->fence) == VK_SUCCESS)
  431. return e;
  432. pool->idx = (pool->idx + 1) % pool->pool_size;
  433. return &pool->contexts[pool->idx];
  434. }
  435. void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e)
  436. {
  437. FFVulkanFunctions *vk = &s->vkfn;
  438. pthread_mutex_lock(&e->lock);
  439. vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
  440. ff_vk_exec_discard_deps(s, e);
  441. pthread_mutex_unlock(&e->lock);
  442. }
  443. int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
  444. {
  445. VkResult ret;
  446. FFVulkanFunctions *vk = &s->vkfn;
  447. const FFVkExecPool *pool = e->parent;
  448. VkCommandBufferBeginInfo cmd_start = {
  449. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  450. .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
  451. };
  452. /* Wait for the fence to be signalled */
  453. vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
  454. /* vkResetFences is defined as being host-synchronized */
  455. pthread_mutex_lock(&e->lock);
  456. vk->ResetFences(s->hwctx->act_dev, 1, &e->fence);
  457. pthread_mutex_unlock(&e->lock);
  458. /* Discard queue dependencies */
  459. ff_vk_exec_discard_deps(s, e);
  460. ret = vk->BeginCommandBuffer(e->buf, &cmd_start);
  461. if (ret != VK_SUCCESS) {
  462. av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
  463. ff_vk_ret2str(ret));
  464. return AVERROR_EXTERNAL;
  465. }
  466. if (pool->nb_queries)
  467. vk->CmdResetQueryPool(e->buf, pool->query_pool,
  468. e->query_idx, pool->nb_queries);
  469. return 0;
  470. }
  471. void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
  472. {
  473. for (int j = 0; j < e->nb_buf_deps; j++)
  474. av_buffer_unref(&e->buf_deps[j]);
  475. e->nb_buf_deps = 0;
  476. for (int j = 0; j < e->nb_sw_frame_deps; j++)
  477. av_frame_free(&e->sw_frame_deps[j]);
  478. e->nb_sw_frame_deps = 0;
  479. for (int j = 0; j < e->nb_frame_deps; j++) {
  480. AVFrame *f = e->frame_deps[j];
  481. if (e->frame_locked[j]) {
  482. AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
  483. AVVulkanFramesContext *vkfc = hwfc->hwctx;
  484. AVVkFrame *vkf = (AVVkFrame *)f->data[0];
  485. vkfc->unlock_frame(hwfc, vkf);
  486. e->frame_locked[j] = 0;
  487. }
  488. e->frame_update[j] = 0;
  489. if (f->buf[0])
  490. av_frame_free(&e->frame_deps[j]);
  491. }
  492. e->nb_frame_deps = 0;
  493. e->sem_wait_cnt = 0;
  494. e->sem_sig_cnt = 0;
  495. e->sem_sig_val_dst_cnt = 0;
  496. }
  497. int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
  498. AVBufferRef **deps, int nb_deps, int ref)
  499. {
  500. AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size,
  501. (e->nb_buf_deps + nb_deps) * sizeof(*dst));
  502. if (!dst) {
  503. ff_vk_exec_discard_deps(s, e);
  504. return AVERROR(ENOMEM);
  505. }
  506. e->buf_deps = dst;
  507. for (int i = 0; i < nb_deps; i++) {
  508. e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i];
  509. if (!e->buf_deps[e->nb_buf_deps]) {
  510. ff_vk_exec_discard_deps(s, e);
  511. return AVERROR(ENOMEM);
  512. }
  513. e->nb_buf_deps++;
  514. }
  515. return 0;
  516. }
  517. int ff_vk_exec_add_dep_sw_frame(FFVulkanContext *s, FFVkExecContext *e,
  518. AVFrame *f)
  519. {
  520. AVFrame **dst = av_fast_realloc(e->sw_frame_deps, &e->sw_frame_deps_alloc_size,
  521. (e->nb_sw_frame_deps + 1) * sizeof(*dst));
  522. if (!dst) {
  523. ff_vk_exec_discard_deps(s, e);
  524. return AVERROR(ENOMEM);
  525. }
  526. e->sw_frame_deps = dst;
  527. e->sw_frame_deps[e->nb_sw_frame_deps] = av_frame_clone(f);
  528. if (!e->sw_frame_deps[e->nb_sw_frame_deps]) {
  529. ff_vk_exec_discard_deps(s, e);
  530. return AVERROR(ENOMEM);
  531. }
  532. e->nb_sw_frame_deps++;
  533. return 0;
  534. }
  535. #define ARR_REALLOC(str, arr, alloc_s, cnt) \
  536. do { \
  537. arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \
  538. if (!arr) { \
  539. ff_vk_exec_discard_deps(s, e); \
  540. return AVERROR(ENOMEM); \
  541. } \
  542. str->arr = arr; \
  543. } while (0)
  544. typedef struct TempSyncCtx {
  545. int nb_sem;
  546. VkSemaphore sem[];
  547. } TempSyncCtx;
  548. static void destroy_tmp_semaphores(void *opaque, uint8_t *data)
  549. {
  550. FFVulkanContext *s = opaque;
  551. FFVulkanFunctions *vk = &s->vkfn;
  552. TempSyncCtx *ts = (TempSyncCtx *)data;
  553. for (int i = 0; i < ts->nb_sem; i++)
  554. vk->DestroySemaphore(s->hwctx->act_dev, ts->sem[i], s->hwctx->alloc);
  555. av_free(ts);
  556. }
  557. int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e,
  558. VkSemaphore *sem, int nb,
  559. VkPipelineStageFlagBits2 stage,
  560. int wait)
  561. {
  562. int err;
  563. size_t buf_size;
  564. AVBufferRef *buf;
  565. TempSyncCtx *ts;
  566. FFVulkanFunctions *vk = &s->vkfn;
  567. /* Do not transfer ownership if we're signalling a binary semaphore,
  568. * since we're probably exporting it. */
  569. if (!wait) {
  570. for (int i = 0; i < nb; i++) {
  571. VkSemaphoreSubmitInfo *sem_sig;
  572. ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
  573. e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) {
  574. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
  575. .semaphore = sem[i],
  576. .stageMask = stage,
  577. };
  578. }
  579. return 0;
  580. }
  581. buf_size = sizeof(*ts) + sizeof(VkSemaphore)*nb;
  582. ts = av_mallocz(buf_size);
  583. if (!ts) {
  584. err = AVERROR(ENOMEM);
  585. goto fail;
  586. }
  587. memcpy(ts->sem, sem, nb*sizeof(*sem));
  588. ts->nb_sem = nb;
  589. buf = av_buffer_create((uint8_t *)ts, buf_size, destroy_tmp_semaphores, s, 0);
  590. if (!buf) {
  591. av_free(ts);
  592. err = AVERROR(ENOMEM);
  593. goto fail;
  594. }
  595. err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
  596. if (err < 0) {
  597. av_buffer_unref(&buf);
  598. return err;
  599. }
  600. for (int i = 0; i < nb; i++) {
  601. VkSemaphoreSubmitInfo *sem_wait;
  602. ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
  603. e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) {
  604. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
  605. .semaphore = sem[i],
  606. .stageMask = stage,
  607. };
  608. }
  609. return 0;
  610. fail:
  611. for (int i = 0; i < nb; i++)
  612. vk->DestroySemaphore(s->hwctx->act_dev, sem[i], s->hwctx->alloc);
  613. return err;
  614. }
  615. int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
  616. VkPipelineStageFlagBits2 wait_stage,
  617. VkPipelineStageFlagBits2 signal_stage)
  618. {
  619. uint8_t *frame_locked;
  620. uint8_t *frame_update;
  621. AVFrame **frame_deps;
  622. VkImageLayout *layout_dst;
  623. uint32_t *queue_family_dst;
  624. VkAccessFlagBits *access_dst;
  625. AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
  626. AVVulkanFramesContext *vkfc = hwfc->hwctx;
  627. AVVkFrame *vkf = (AVVkFrame *)f->data[0];
  628. int nb_images = ff_vk_count_images(vkf);
  629. /* Don't add duplicates */
  630. for (int i = 0; i < e->nb_frame_deps; i++)
  631. if (e->frame_deps[i]->data[0] == f->data[0])
  632. return 1;
  633. ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps);
  634. ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps);
  635. ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps);
  636. ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps);
  637. ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps);
  638. ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps);
  639. e->frame_deps[e->nb_frame_deps] = f->buf[0] ? av_frame_clone(f) : f;
  640. if (!e->frame_deps[e->nb_frame_deps]) {
  641. ff_vk_exec_discard_deps(s, e);
  642. return AVERROR(ENOMEM);
  643. }
  644. vkfc->lock_frame(hwfc, vkf);
  645. e->frame_locked[e->nb_frame_deps] = 1;
  646. e->frame_update[e->nb_frame_deps] = 0;
  647. e->nb_frame_deps++;
  648. for (int i = 0; i < nb_images; i++) {
  649. VkSemaphoreSubmitInfo *sem_wait;
  650. VkSemaphoreSubmitInfo *sem_sig;
  651. uint64_t **sem_sig_val_dst;
  652. ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
  653. ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
  654. ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
  655. e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) {
  656. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
  657. .semaphore = vkf->sem[i],
  658. .value = vkf->sem_value[i],
  659. .stageMask = wait_stage,
  660. };
  661. e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) {
  662. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
  663. .semaphore = vkf->sem[i],
  664. .value = vkf->sem_value[i] + 1,
  665. .stageMask = signal_stage,
  666. };
  667. e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i];
  668. e->sem_sig_val_dst_cnt++;
  669. }
  670. return 0;
  671. }
  672. void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
  673. VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar)
  674. {
  675. int i;
  676. for (i = 0; i < e->nb_frame_deps; i++)
  677. if (e->frame_deps[i]->data[0] == f->data[0])
  678. break;
  679. av_assert0(i < e->nb_frame_deps);
  680. /* Don't update duplicates */
  681. if (nb_img_bar && !e->frame_update[i])
  682. (*nb_img_bar)++;
  683. e->queue_family_dst[i] = bar->dstQueueFamilyIndex;
  684. e->access_dst[i] = bar->dstAccessMask;
  685. e->layout_dst[i] = bar->newLayout;
  686. e->frame_update[i] = 1;
  687. }
  688. int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
  689. VkSemaphore *dst, uint64_t *dst_val,
  690. AVFrame *f)
  691. {
  692. uint64_t **sem_sig_val_dst;
  693. AVVkFrame *vkf = (AVVkFrame *)f->data[0];
  694. /* Reject unknown frames */
  695. int i;
  696. for (i = 0; i < e->nb_frame_deps; i++)
  697. if (e->frame_deps[i]->data[0] == f->data[0])
  698. break;
  699. if (i == e->nb_frame_deps)
  700. return AVERROR(EINVAL);
  701. ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
  702. *dst = vkf->sem[0];
  703. *dst_val = vkf->sem_value[0];
  704. e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val;
  705. e->sem_sig_val_dst_cnt++;
  706. return 0;
  707. }
  708. int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
  709. {
  710. VkResult ret;
  711. FFVulkanFunctions *vk = &s->vkfn;
  712. VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) {
  713. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
  714. .commandBuffer = e->buf,
  715. };
  716. VkSubmitInfo2 submit_info = (VkSubmitInfo2) {
  717. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
  718. .pCommandBufferInfos = &cmd_buf_info,
  719. .commandBufferInfoCount = 1,
  720. .pWaitSemaphoreInfos = e->sem_wait,
  721. .waitSemaphoreInfoCount = e->sem_wait_cnt,
  722. .pSignalSemaphoreInfos = e->sem_sig,
  723. .signalSemaphoreInfoCount = e->sem_sig_cnt,
  724. };
  725. ret = vk->EndCommandBuffer(e->buf);
  726. if (ret != VK_SUCCESS) {
  727. av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
  728. ff_vk_ret2str(ret));
  729. ff_vk_exec_discard_deps(s, e);
  730. return AVERROR_EXTERNAL;
  731. }
  732. s->hwctx->lock_queue(s->device, e->qf, e->qi);
  733. ret = vk->QueueSubmit2(e->queue, 1, &submit_info, e->fence);
  734. s->hwctx->unlock_queue(s->device, e->qf, e->qi);
  735. if (ret != VK_SUCCESS) {
  736. av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
  737. ff_vk_ret2str(ret));
  738. ff_vk_exec_discard_deps(s, e);
  739. return AVERROR_EXTERNAL;
  740. }
  741. for (int i = 0; i < e->sem_sig_val_dst_cnt; i++)
  742. *e->sem_sig_val_dst[i] += 1;
  743. /* Unlock all frames */
  744. for (int j = 0; j < e->nb_frame_deps; j++) {
  745. if (e->frame_locked[j]) {
  746. AVFrame *f = e->frame_deps[j];
  747. AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
  748. AVVulkanFramesContext *vkfc = hwfc->hwctx;
  749. AVVkFrame *vkf = (AVVkFrame *)f->data[0];
  750. if (e->frame_update[j]) {
  751. int nb_images = ff_vk_count_images(vkf);
  752. for (int i = 0; i < nb_images; i++) {
  753. vkf->layout[i] = e->layout_dst[j];
  754. vkf->access[i] = e->access_dst[j];
  755. vkf->queue_family[i] = e->queue_family_dst[j];
  756. }
  757. }
  758. vkfc->unlock_frame(hwfc, vkf);
  759. e->frame_locked[j] = 0;
  760. }
  761. }
  762. e->had_submission = 1;
  763. return 0;
  764. }
  765. int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
  766. VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
  767. VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
  768. {
  769. VkResult ret;
  770. int index = -1;
  771. FFVulkanFunctions *vk = &s->vkfn;
  772. VkMemoryAllocateInfo alloc_info = {
  773. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  774. .pNext = alloc_extension,
  775. };
  776. alloc_info.allocationSize = req->size;
  777. /* The vulkan spec requires memory types to be sorted in the "optimal"
  778. * order, so the first matching type we find will be the best/fastest one */
  779. for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
  780. /* The memory type must be supported by the requirements (bitfield) */
  781. if (!(req->memoryTypeBits & (1 << i)))
  782. continue;
  783. /* The memory type flags must include our properties */
  784. if ((req_flags != UINT32_MAX) &&
  785. ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
  786. continue;
  787. /* Found a suitable memory type */
  788. index = i;
  789. break;
  790. }
  791. if (index < 0) {
  792. av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
  793. req_flags);
  794. return AVERROR(EINVAL);
  795. }
  796. alloc_info.memoryTypeIndex = index;
  797. ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
  798. s->hwctx->alloc, mem);
  799. if (ret != VK_SUCCESS)
  800. return AVERROR(ENOMEM);
  801. if (mem_flags)
  802. *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
  803. return 0;
  804. }
  805. int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
  806. void *pNext, void *alloc_pNext,
  807. VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
  808. {
  809. int err;
  810. VkResult ret;
  811. int use_ded_mem;
  812. FFVulkanFunctions *vk = &s->vkfn;
  813. VkBufferCreateInfo buf_spawn = {
  814. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  815. .pNext = pNext,
  816. .usage = usage,
  817. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  818. .size = flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT ?
  819. FFALIGN(size, s->props.properties.limits.minMemoryMapAlignment) :
  820. size,
  821. };
  822. VkMemoryAllocateFlagsInfo alloc_flags = {
  823. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
  824. .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
  825. };
  826. VkBufferMemoryRequirementsInfo2 req_desc = {
  827. .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
  828. };
  829. VkMemoryDedicatedAllocateInfo ded_alloc = {
  830. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  831. .pNext = alloc_pNext,
  832. };
  833. VkMemoryDedicatedRequirements ded_req = {
  834. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  835. };
  836. VkMemoryRequirements2 req = {
  837. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  838. .pNext = &ded_req,
  839. };
  840. av_log(s, AV_LOG_DEBUG, "Creating a buffer of %"SIZE_SPECIFIER" bytes, "
  841. "usage: 0x%x, flags: 0x%x\n",
  842. size, usage, flags);
  843. ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &buf->buf);
  844. if (ret != VK_SUCCESS) {
  845. av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
  846. ff_vk_ret2str(ret));
  847. return AVERROR_EXTERNAL;
  848. }
  849. req_desc.buffer = buf->buf;
  850. vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
  851. /* In case the implementation prefers/requires dedicated allocation */
  852. use_ded_mem = ded_req.prefersDedicatedAllocation |
  853. ded_req.requiresDedicatedAllocation;
  854. if (use_ded_mem) {
  855. ded_alloc.buffer = buf->buf;
  856. ded_alloc.pNext = alloc_pNext;
  857. alloc_pNext = &ded_alloc;
  858. }
  859. if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
  860. alloc_flags.pNext = alloc_pNext;
  861. alloc_pNext = &alloc_flags;
  862. }
  863. err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext,
  864. &buf->flags, &buf->mem);
  865. if (err)
  866. return err;
  867. ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
  868. if (ret != VK_SUCCESS) {
  869. av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
  870. ff_vk_ret2str(ret));
  871. return AVERROR_EXTERNAL;
  872. }
  873. if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
  874. VkBufferDeviceAddressInfo address_info = {
  875. .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
  876. .buffer = buf->buf,
  877. };
  878. buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info);
  879. }
  880. buf->size = size;
  881. return 0;
  882. }
  883. static void destroy_avvkbuf(void *opaque, uint8_t *data)
  884. {
  885. FFVulkanContext *s = opaque;
  886. FFVkBuffer *buf = (FFVkBuffer *)data;
  887. ff_vk_free_buf(s, buf);
  888. av_free(buf);
  889. }
  890. int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size,
  891. void *pNext, void *alloc_pNext,
  892. VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
  893. {
  894. int err;
  895. AVBufferRef *buf;
  896. FFVkBuffer *vkb = av_mallocz(sizeof(*vkb));
  897. if (!vkb)
  898. return AVERROR(ENOMEM);
  899. err = ff_vk_create_buf(s, vkb, size, pNext, alloc_pNext, usage, flags);
  900. if (err < 0) {
  901. av_free(vkb);
  902. return err;
  903. }
  904. buf = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), destroy_avvkbuf, s, 0);
  905. if (!buf) {
  906. destroy_avvkbuf(s, (uint8_t *)vkb);
  907. return AVERROR(ENOMEM);
  908. }
  909. *ref = buf;
  910. return 0;
  911. }
  912. int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[],
  913. int nb_buffers, int invalidate)
  914. {
  915. VkResult ret;
  916. FFVulkanFunctions *vk = &s->vkfn;
  917. VkMappedMemoryRange inval_list[64];
  918. int inval_count = 0;
  919. for (int i = 0; i < nb_buffers; i++) {
  920. void *dst;
  921. ret = vk->MapMemory(s->hwctx->act_dev, buf[i]->mem, 0,
  922. VK_WHOLE_SIZE, 0, &dst);
  923. if (ret != VK_SUCCESS) {
  924. av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
  925. ff_vk_ret2str(ret));
  926. return AVERROR_EXTERNAL;
  927. }
  928. mem[i] = buf[i]->mapped_mem = dst;
  929. }
  930. if (!invalidate)
  931. return 0;
  932. for (int i = 0; i < nb_buffers; i++) {
  933. const VkMappedMemoryRange ival_buf = {
  934. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  935. .memory = buf[i]->mem,
  936. .size = VK_WHOLE_SIZE,
  937. };
  938. if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  939. continue;
  940. inval_list[inval_count++] = ival_buf;
  941. }
  942. if (inval_count) {
  943. ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
  944. inval_list);
  945. if (ret != VK_SUCCESS) {
  946. av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
  947. ff_vk_ret2str(ret));
  948. return AVERROR_EXTERNAL;
  949. }
  950. }
  951. return 0;
  952. }
  953. int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers,
  954. int flush)
  955. {
  956. int err = 0;
  957. VkResult ret;
  958. FFVulkanFunctions *vk = &s->vkfn;
  959. VkMappedMemoryRange flush_list[64];
  960. int flush_count = 0;
  961. if (flush) {
  962. for (int i = 0; i < nb_buffers; i++) {
  963. const VkMappedMemoryRange flush_buf = {
  964. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  965. .memory = buf[i]->mem,
  966. .size = VK_WHOLE_SIZE,
  967. };
  968. if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  969. continue;
  970. flush_list[flush_count++] = flush_buf;
  971. }
  972. }
  973. if (flush_count) {
  974. ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
  975. flush_list);
  976. if (ret != VK_SUCCESS) {
  977. av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  978. ff_vk_ret2str(ret));
  979. err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
  980. }
  981. }
  982. for (int i = 0; i < nb_buffers; i++) {
  983. vk->UnmapMemory(s->hwctx->act_dev, buf[i]->mem);
  984. buf[i]->mapped_mem = NULL;
  985. }
  986. return err;
  987. }
  988. void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
  989. {
  990. FFVulkanFunctions *vk = &s->vkfn;
  991. if (!buf || !s->hwctx)
  992. return;
  993. if (buf->mapped_mem)
  994. ff_vk_unmap_buffer(s, buf, 0);
  995. if (buf->buf != VK_NULL_HANDLE)
  996. vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
  997. if (buf->mem != VK_NULL_HANDLE)
  998. vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
  999. }
  1000. static void free_data_buf(void *opaque, uint8_t *data)
  1001. {
  1002. FFVulkanContext *ctx = opaque;
  1003. FFVkBuffer *buf = (FFVkBuffer *)data;
  1004. ff_vk_free_buf(ctx, buf);
  1005. av_free(data);
  1006. }
  1007. static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
  1008. {
  1009. AVBufferRef *ref;
  1010. uint8_t *buf = av_mallocz(size);
  1011. if (!buf)
  1012. return NULL;
  1013. ref = av_buffer_create(buf, size, free_data_buf, opaque, 0);
  1014. if (!ref)
  1015. av_free(buf);
  1016. return ref;
  1017. }
  1018. int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool,
  1019. AVBufferRef **buf, VkBufferUsageFlags usage,
  1020. void *create_pNext, size_t size,
  1021. VkMemoryPropertyFlagBits mem_props)
  1022. {
  1023. int err;
  1024. AVBufferRef *ref;
  1025. FFVkBuffer *data;
  1026. if (!(*buf_pool)) {
  1027. *buf_pool = av_buffer_pool_init2(sizeof(FFVkBuffer), ctx,
  1028. alloc_data_buf, NULL);
  1029. if (!(*buf_pool))
  1030. return AVERROR(ENOMEM);
  1031. }
  1032. *buf = ref = av_buffer_pool_get(*buf_pool);
  1033. if (!ref)
  1034. return AVERROR(ENOMEM);
  1035. data = (FFVkBuffer *)ref->data;
  1036. data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
  1037. data->access = VK_ACCESS_2_NONE;
  1038. if (data->size >= size)
  1039. return 0;
  1040. ff_vk_free_buf(ctx, data);
  1041. memset(data, 0, sizeof(*data));
  1042. err = ff_vk_create_buf(ctx, data, size,
  1043. create_pNext, NULL, usage,
  1044. mem_props);
  1045. if (err < 0) {
  1046. av_buffer_unref(&ref);
  1047. return err;
  1048. }
  1049. if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
  1050. err = ff_vk_map_buffer(ctx, data, &data->mapped_mem, 0);
  1051. if (err < 0) {
  1052. av_buffer_unref(&ref);
  1053. return err;
  1054. }
  1055. }
  1056. return 0;
  1057. }
  1058. int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size,
  1059. VkShaderStageFlagBits stage)
  1060. {
  1061. VkPushConstantRange *pc;
  1062. shd->push_consts = av_realloc_array(shd->push_consts,
  1063. sizeof(*shd->push_consts),
  1064. shd->push_consts_num + 1);
  1065. if (!shd->push_consts)
  1066. return AVERROR(ENOMEM);
  1067. pc = &shd->push_consts[shd->push_consts_num++];
  1068. memset(pc, 0, sizeof(*pc));
  1069. pc->stageFlags = stage;
  1070. pc->offset = offset;
  1071. pc->size = size;
  1072. return 0;
  1073. }
  1074. int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
  1075. int unnorm_coords, VkFilter filt)
  1076. {
  1077. VkResult ret;
  1078. FFVulkanFunctions *vk = &s->vkfn;
  1079. VkSamplerCreateInfo sampler_info = {
  1080. .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
  1081. .magFilter = filt,
  1082. .minFilter = sampler_info.magFilter,
  1083. .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
  1084. VK_SAMPLER_MIPMAP_MODE_LINEAR,
  1085. .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
  1086. .addressModeV = sampler_info.addressModeU,
  1087. .addressModeW = sampler_info.addressModeU,
  1088. .anisotropyEnable = VK_FALSE,
  1089. .compareOp = VK_COMPARE_OP_NEVER,
  1090. .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
  1091. .unnormalizedCoordinates = unnorm_coords,
  1092. };
  1093. ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
  1094. s->hwctx->alloc, sampler);
  1095. if (ret != VK_SUCCESS) {
  1096. av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
  1097. ff_vk_ret2str(ret));
  1098. return AVERROR_EXTERNAL;
  1099. }
  1100. return 0;
  1101. }
  1102. VkImageAspectFlags ff_vk_aspect_flag(AVFrame *f, int p)
  1103. {
  1104. AVVkFrame *vkf = (AVVkFrame *)f->data[0];
  1105. AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
  1106. int nb_images = ff_vk_count_images(vkf);
  1107. int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1108. static const VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_PLANE_0_BIT,
  1109. VK_IMAGE_ASPECT_PLANE_1_BIT,
  1110. VK_IMAGE_ASPECT_PLANE_2_BIT, };
  1111. if (ff_vk_mt_is_np_rgb(hwfc->sw_format) || (nb_planes == nb_images))
  1112. return VK_IMAGE_ASPECT_COLOR_BIT;
  1113. return plane_aspect[p];
  1114. }
  1115. int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
  1116. {
  1117. if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA ||
  1118. pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 ||
  1119. pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 ||
  1120. pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 ||
  1121. pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 ||
  1122. pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0 ||
  1123. pix_fmt == AV_PIX_FMT_GBRP10 ||
  1124. pix_fmt == AV_PIX_FMT_GBRAP || pix_fmt == AV_PIX_FMT_GBRAP16 ||
  1125. pix_fmt == AV_PIX_FMT_GBRPF32 || pix_fmt == AV_PIX_FMT_GBRAPF32 ||
  1126. pix_fmt == AV_PIX_FMT_X2RGB10 || pix_fmt == AV_PIX_FMT_X2BGR10 ||
  1127. pix_fmt == AV_PIX_FMT_RGBAF32 || pix_fmt == AV_PIX_FMT_RGBF32 ||
  1128. pix_fmt == AV_PIX_FMT_RGBA128 || pix_fmt == AV_PIX_FMT_RGB96)
  1129. return 1;
  1130. return 0;
  1131. }
  1132. const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt,
  1133. enum FFVkShaderRepFormat rep_fmt)
  1134. {
  1135. switch (pix_fmt) {
  1136. case AV_PIX_FMT_RGBA:
  1137. case AV_PIX_FMT_BGRA:
  1138. case AV_PIX_FMT_RGB24:
  1139. case AV_PIX_FMT_BGR24:
  1140. case AV_PIX_FMT_BGR0:
  1141. case AV_PIX_FMT_RGB0:
  1142. case AV_PIX_FMT_RGB565:
  1143. case AV_PIX_FMT_BGR565:
  1144. case AV_PIX_FMT_UYVA:
  1145. case AV_PIX_FMT_YUYV422:
  1146. case AV_PIX_FMT_UYVY422: {
  1147. const char *rep_tab[] = {
  1148. [FF_VK_REP_NATIVE] = "rgba8ui",
  1149. [FF_VK_REP_FLOAT] = "rgba8",
  1150. [FF_VK_REP_INT] = "rgba8i",
  1151. [FF_VK_REP_UINT] = "rgba8ui",
  1152. };
  1153. return rep_tab[rep_fmt];
  1154. }
  1155. case AV_PIX_FMT_X2RGB10:
  1156. case AV_PIX_FMT_X2BGR10:
  1157. case AV_PIX_FMT_Y210:
  1158. case AV_PIX_FMT_XV30: {
  1159. const char *rep_tab[] = {
  1160. [FF_VK_REP_NATIVE] = "rgb10_a2ui",
  1161. [FF_VK_REP_FLOAT] = "rgb10_a2",
  1162. [FF_VK_REP_INT] = NULL,
  1163. [FF_VK_REP_UINT] = "rgb10_a2ui",
  1164. };
  1165. return rep_tab[rep_fmt];
  1166. }
  1167. case AV_PIX_FMT_RGB48:
  1168. case AV_PIX_FMT_RGBA64:
  1169. case AV_PIX_FMT_Y212:
  1170. case AV_PIX_FMT_Y216:
  1171. case AV_PIX_FMT_XV36:
  1172. case AV_PIX_FMT_XV48: {
  1173. const char *rep_tab[] = {
  1174. [FF_VK_REP_NATIVE] = "rgba16ui",
  1175. [FF_VK_REP_FLOAT] = "rgba16",
  1176. [FF_VK_REP_INT] = "rgba16i",
  1177. [FF_VK_REP_UINT] = "rgba16ui",
  1178. };
  1179. return rep_tab[rep_fmt];
  1180. }
  1181. case AV_PIX_FMT_RGBF32:
  1182. case AV_PIX_FMT_RGBAF32: {
  1183. const char *rep_tab[] = {
  1184. [FF_VK_REP_NATIVE] = "rgba32f",
  1185. [FF_VK_REP_FLOAT] = "rgba32f",
  1186. [FF_VK_REP_INT] = "rgba32i",
  1187. [FF_VK_REP_UINT] = "rgba32ui",
  1188. };
  1189. return rep_tab[rep_fmt];
  1190. }
  1191. case AV_PIX_FMT_RGB96:
  1192. case AV_PIX_FMT_RGBA128: {
  1193. const char *rep_tab[] = {
  1194. [FF_VK_REP_NATIVE] = "rgba32ui",
  1195. [FF_VK_REP_FLOAT] = NULL,
  1196. [FF_VK_REP_INT] = "rgba32i",
  1197. [FF_VK_REP_UINT] = "rgba32ui",
  1198. };
  1199. return rep_tab[rep_fmt];
  1200. }
  1201. case AV_PIX_FMT_GRAY8:
  1202. case AV_PIX_FMT_GBRAP:
  1203. case AV_PIX_FMT_YUV420P:
  1204. case AV_PIX_FMT_YUV422P:
  1205. case AV_PIX_FMT_YUV444P: {
  1206. const char *rep_tab[] = {
  1207. [FF_VK_REP_NATIVE] = "r8ui",
  1208. [FF_VK_REP_FLOAT] = "r8",
  1209. [FF_VK_REP_INT] = "r8i",
  1210. [FF_VK_REP_UINT] = "r8ui",
  1211. };
  1212. return rep_tab[rep_fmt];
  1213. };
  1214. case AV_PIX_FMT_GRAY16:
  1215. case AV_PIX_FMT_GBRAP16:
  1216. case AV_PIX_FMT_GBRP10:
  1217. case AV_PIX_FMT_YUV420P10:
  1218. case AV_PIX_FMT_YUV420P12:
  1219. case AV_PIX_FMT_YUV420P16:
  1220. case AV_PIX_FMT_YUV422P10:
  1221. case AV_PIX_FMT_YUV422P12:
  1222. case AV_PIX_FMT_YUV422P16:
  1223. case AV_PIX_FMT_YUV444P10:
  1224. case AV_PIX_FMT_YUV444P12:
  1225. case AV_PIX_FMT_YUV444P16: {
  1226. const char *rep_tab[] = {
  1227. [FF_VK_REP_NATIVE] = "r16ui",
  1228. [FF_VK_REP_FLOAT] = "r16f",
  1229. [FF_VK_REP_INT] = "r16i",
  1230. [FF_VK_REP_UINT] = "r16ui",
  1231. };
  1232. return rep_tab[rep_fmt];
  1233. };
  1234. case AV_PIX_FMT_GRAYF32:
  1235. case AV_PIX_FMT_GBRPF32:
  1236. case AV_PIX_FMT_GBRAPF32: {
  1237. const char *rep_tab[] = {
  1238. [FF_VK_REP_NATIVE] = "r32f",
  1239. [FF_VK_REP_FLOAT] = "r32f",
  1240. [FF_VK_REP_INT] = "r32i",
  1241. [FF_VK_REP_UINT] = "r32ui",
  1242. };
  1243. return rep_tab[rep_fmt];
  1244. };
  1245. case AV_PIX_FMT_NV12:
  1246. case AV_PIX_FMT_NV16:
  1247. case AV_PIX_FMT_NV24: {
  1248. const char *rep_tab[] = {
  1249. [FF_VK_REP_NATIVE] = "rg8ui",
  1250. [FF_VK_REP_FLOAT] = "rg8",
  1251. [FF_VK_REP_INT] = "rg8i",
  1252. [FF_VK_REP_UINT] = "rg8ui",
  1253. };
  1254. return rep_tab[rep_fmt];
  1255. };
  1256. case AV_PIX_FMT_P010:
  1257. case AV_PIX_FMT_P210:
  1258. case AV_PIX_FMT_P410: {
  1259. const char *rep_tab[] = {
  1260. [FF_VK_REP_NATIVE] = "rgb10_a2ui",
  1261. [FF_VK_REP_FLOAT] = "rgb10_a2",
  1262. [FF_VK_REP_INT] = NULL,
  1263. [FF_VK_REP_UINT] = "rgb10_a2ui",
  1264. };
  1265. return rep_tab[rep_fmt];
  1266. };
  1267. case AV_PIX_FMT_P012:
  1268. case AV_PIX_FMT_P016:
  1269. case AV_PIX_FMT_P212:
  1270. case AV_PIX_FMT_P216:
  1271. case AV_PIX_FMT_P412:
  1272. case AV_PIX_FMT_P416: {
  1273. const char *rep_tab[] = {
  1274. [FF_VK_REP_NATIVE] = "rg16ui",
  1275. [FF_VK_REP_FLOAT] = "rg16",
  1276. [FF_VK_REP_INT] = "rg16i",
  1277. [FF_VK_REP_UINT] = "rg16ui",
  1278. };
  1279. return rep_tab[rep_fmt];
  1280. };
  1281. default:
  1282. return "rgba32f";
  1283. }
  1284. }
  1285. typedef struct ImageViewCtx {
  1286. int nb_views;
  1287. VkImageView views[];
  1288. } ImageViewCtx;
  1289. static void destroy_imageviews(void *opaque, uint8_t *data)
  1290. {
  1291. FFVulkanContext *s = opaque;
  1292. FFVulkanFunctions *vk = &s->vkfn;
  1293. ImageViewCtx *iv = (ImageViewCtx *)data;
  1294. for (int i = 0; i < iv->nb_views; i++)
  1295. vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
  1296. av_free(iv);
  1297. }
  1298. static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt)
  1299. {
  1300. #define REPS_FMT(fmt) \
  1301. [FF_VK_REP_NATIVE] = fmt ## _UINT, \
  1302. [FF_VK_REP_FLOAT] = fmt ## _UNORM, \
  1303. [FF_VK_REP_INT] = fmt ## _SINT, \
  1304. [FF_VK_REP_UINT] = fmt ## _UINT,
  1305. #define REPS_FMT_PACK(fmt, num) \
  1306. [FF_VK_REP_NATIVE] = fmt ## _UINT_PACK ## num, \
  1307. [FF_VK_REP_FLOAT] = fmt ## _UNORM_PACK ## num, \
  1308. [FF_VK_REP_INT] = fmt ## _SINT_PACK ## num, \
  1309. [FF_VK_REP_UINT] = fmt ## _UINT_PACK ## num,
  1310. const VkFormat fmts_map[][4] = {
  1311. { REPS_FMT_PACK(VK_FORMAT_A2B10G10R10, 32) },
  1312. { REPS_FMT_PACK(VK_FORMAT_A2R10G10B10, 32) },
  1313. {
  1314. VK_FORMAT_B5G6R5_UNORM_PACK16,
  1315. VK_FORMAT_B5G6R5_UNORM_PACK16,
  1316. VK_FORMAT_UNDEFINED,
  1317. VK_FORMAT_UNDEFINED,
  1318. },
  1319. {
  1320. VK_FORMAT_R5G6B5_UNORM_PACK16,
  1321. VK_FORMAT_R5G6B5_UNORM_PACK16,
  1322. VK_FORMAT_UNDEFINED,
  1323. VK_FORMAT_UNDEFINED,
  1324. },
  1325. { REPS_FMT(VK_FORMAT_B8G8R8) },
  1326. { REPS_FMT(VK_FORMAT_B8G8R8A8) },
  1327. { REPS_FMT(VK_FORMAT_R8) },
  1328. { REPS_FMT(VK_FORMAT_R8G8) },
  1329. { REPS_FMT(VK_FORMAT_R8G8B8) },
  1330. { REPS_FMT(VK_FORMAT_R8G8B8A8) },
  1331. { REPS_FMT(VK_FORMAT_R16) },
  1332. { REPS_FMT(VK_FORMAT_R16G16) },
  1333. { REPS_FMT(VK_FORMAT_R16G16B16) },
  1334. { REPS_FMT(VK_FORMAT_R16G16B16A16) },
  1335. {
  1336. VK_FORMAT_R32_SFLOAT,
  1337. VK_FORMAT_R32_SFLOAT,
  1338. VK_FORMAT_UNDEFINED,
  1339. VK_FORMAT_UNDEFINED,
  1340. },
  1341. {
  1342. VK_FORMAT_R32G32B32_SFLOAT,
  1343. VK_FORMAT_R32G32B32_SFLOAT,
  1344. VK_FORMAT_UNDEFINED,
  1345. VK_FORMAT_UNDEFINED,
  1346. },
  1347. {
  1348. VK_FORMAT_R32G32B32A32_SFLOAT,
  1349. VK_FORMAT_R32G32B32A32_SFLOAT,
  1350. VK_FORMAT_UNDEFINED,
  1351. VK_FORMAT_UNDEFINED,
  1352. },
  1353. {
  1354. VK_FORMAT_R32G32B32_UINT,
  1355. VK_FORMAT_UNDEFINED,
  1356. VK_FORMAT_R32G32B32_SINT,
  1357. VK_FORMAT_R32G32B32_UINT,
  1358. },
  1359. {
  1360. VK_FORMAT_R32G32B32A32_UINT,
  1361. VK_FORMAT_UNDEFINED,
  1362. VK_FORMAT_R32G32B32A32_SINT,
  1363. VK_FORMAT_R32G32B32A32_UINT,
  1364. },
  1365. };
  1366. #undef REPS_FMT_PACK
  1367. #undef REPS_FMT
  1368. if (fmt == VK_FORMAT_UNDEFINED)
  1369. return VK_FORMAT_UNDEFINED;
  1370. for (int i = 0; i < FF_ARRAY_ELEMS(fmts_map); i++) {
  1371. if (fmts_map[i][FF_VK_REP_NATIVE] == fmt ||
  1372. fmts_map[i][FF_VK_REP_FLOAT] == fmt ||
  1373. fmts_map[i][FF_VK_REP_INT] == fmt ||
  1374. fmts_map[i][FF_VK_REP_UINT] == fmt)
  1375. return fmts_map[i][rep_fmt];
  1376. }
  1377. return VK_FORMAT_UNDEFINED;
  1378. }
  1379. int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
  1380. VkImageView views[AV_NUM_DATA_POINTERS],
  1381. AVFrame *f, enum FFVkShaderRepFormat rep_fmt)
  1382. {
  1383. int err;
  1384. VkResult ret;
  1385. AVBufferRef *buf;
  1386. FFVulkanFunctions *vk = &s->vkfn;
  1387. AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
  1388. AVVulkanFramesContext *vkfc = hwfc->hwctx;
  1389. const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format);
  1390. AVVkFrame *vkf = (AVVkFrame *)f->data[0];
  1391. const int nb_images = ff_vk_count_images(vkf);
  1392. const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1393. ImageViewCtx *iv;
  1394. const size_t buf_size = sizeof(*iv) + nb_planes*sizeof(VkImageView);
  1395. iv = av_mallocz(buf_size);
  1396. if (!iv)
  1397. return AVERROR(ENOMEM);
  1398. for (int i = 0; i < nb_planes; i++) {
  1399. VkImageViewUsageCreateInfo view_usage_info = {
  1400. .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
  1401. .usage = vkfc->usage &
  1402. (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR |
  1403. VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)),
  1404. };
  1405. VkImageViewCreateInfo view_create_info = {
  1406. .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
  1407. .pNext = &view_usage_info,
  1408. .image = vkf->img[FFMIN(i, nb_images - 1)],
  1409. .viewType = VK_IMAGE_VIEW_TYPE_2D,
  1410. .format = map_fmt_to_rep(rep_fmts[i], rep_fmt),
  1411. .components = ff_comp_identity_map,
  1412. .subresourceRange = {
  1413. .aspectMask = ff_vk_aspect_flag(f, i),
  1414. .levelCount = 1,
  1415. .layerCount = 1,
  1416. },
  1417. };
  1418. if (view_create_info.format == VK_FORMAT_UNDEFINED) {
  1419. av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation "
  1420. "of format %i and mode %i\n",
  1421. rep_fmts[i], rep_fmt);
  1422. err = AVERROR(EINVAL);
  1423. goto fail;
  1424. }
  1425. ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info,
  1426. s->hwctx->alloc, &iv->views[i]);
  1427. if (ret != VK_SUCCESS) {
  1428. av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
  1429. ff_vk_ret2str(ret));
  1430. err = AVERROR_EXTERNAL;
  1431. goto fail;
  1432. }
  1433. iv->nb_views++;
  1434. }
  1435. buf = av_buffer_create((uint8_t *)iv, buf_size, destroy_imageviews, s, 0);
  1436. if (!buf) {
  1437. err = AVERROR(ENOMEM);
  1438. goto fail;
  1439. }
  1440. /* Add to queue dependencies */
  1441. err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
  1442. if (err < 0)
  1443. av_buffer_unref(&buf);
  1444. memcpy(views, iv->views, nb_planes*sizeof(*views));
  1445. return err;
  1446. fail:
  1447. for (int i = 0; i < iv->nb_views; i++)
  1448. vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
  1449. av_free(iv);
  1450. return err;
  1451. }
  1452. void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
  1453. AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
  1454. VkPipelineStageFlags src_stage,
  1455. VkPipelineStageFlags dst_stage,
  1456. VkAccessFlagBits new_access,
  1457. VkImageLayout new_layout,
  1458. uint32_t new_qf)
  1459. {
  1460. int found = -1;
  1461. AVVkFrame *vkf = (AVVkFrame *)pic->data[0];
  1462. const int nb_images = ff_vk_count_images(vkf);
  1463. for (int i = 0; i < e->nb_frame_deps; i++)
  1464. if (e->frame_deps[i]->data[0] == pic->data[0]) {
  1465. if (e->frame_update[i])
  1466. found = i;
  1467. break;
  1468. }
  1469. for (int i = 0; i < nb_images; i++) {
  1470. bar[*nb_bar] = (VkImageMemoryBarrier2) {
  1471. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
  1472. .pNext = NULL,
  1473. .srcStageMask = src_stage,
  1474. .dstStageMask = dst_stage,
  1475. .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i],
  1476. .dstAccessMask = new_access,
  1477. .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0],
  1478. .newLayout = new_layout,
  1479. .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0],
  1480. .dstQueueFamilyIndex = new_qf,
  1481. .image = vkf->img[i],
  1482. .subresourceRange = (VkImageSubresourceRange) {
  1483. .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  1484. .layerCount = 1,
  1485. .levelCount = 1,
  1486. },
  1487. };
  1488. *nb_bar += 1;
  1489. }
  1490. ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL);
  1491. }
  1492. int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name,
  1493. VkPipelineStageFlags stage,
  1494. const char *extensions[], int nb_extensions,
  1495. int lg_x, int lg_y, int lg_z,
  1496. uint32_t required_subgroup_size)
  1497. {
  1498. av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
  1499. shd->name = name;
  1500. shd->stage = stage;
  1501. shd->lg_size[0] = lg_x;
  1502. shd->lg_size[1] = lg_y;
  1503. shd->lg_size[2] = lg_z;
  1504. switch (shd->stage) {
  1505. case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
  1506. case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
  1507. case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
  1508. case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
  1509. case VK_SHADER_STAGE_MISS_BIT_KHR:
  1510. case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
  1511. shd->bind_point = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR;
  1512. break;
  1513. case VK_SHADER_STAGE_COMPUTE_BIT:
  1514. shd->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
  1515. break;
  1516. default:
  1517. shd->bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS;
  1518. break;
  1519. };
  1520. if (required_subgroup_size) {
  1521. shd->subgroup_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO;
  1522. shd->subgroup_info.requiredSubgroupSize = required_subgroup_size;
  1523. }
  1524. av_bprintf(&shd->src, "/* %s shader: %s */\n",
  1525. (stage == VK_SHADER_STAGE_TASK_BIT_EXT ||
  1526. stage == VK_SHADER_STAGE_MESH_BIT_EXT) ?
  1527. "Mesh" :
  1528. (shd->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) ?
  1529. "Raytrace" :
  1530. (shd->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) ?
  1531. "Compute" : "Graphics",
  1532. name);
  1533. GLSLF(0, #version %i ,460);
  1534. GLSLC(0, );
  1535. /* Common utilities */
  1536. GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
  1537. GLSLC(0, );
  1538. GLSLC(0, #extension GL_EXT_scalar_block_layout : require );
  1539. GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : require );
  1540. GLSLC(0, #extension GL_EXT_control_flow_attributes : require );
  1541. if ((s->extensions & FF_VK_EXT_DEBUG_UTILS) &&
  1542. (s->extensions & FF_VK_EXT_RELAXED_EXTENDED_INSTR)) {
  1543. GLSLC(0, #extension GL_EXT_debug_printf : require );
  1544. GLSLC(0, #define DEBUG );
  1545. }
  1546. if (stage == VK_SHADER_STAGE_TASK_BIT_EXT ||
  1547. stage == VK_SHADER_STAGE_MESH_BIT_EXT)
  1548. GLSLC(0, #extension GL_EXT_mesh_shader : require );
  1549. for (int i = 0; i < nb_extensions; i++)
  1550. GLSLF(0, #extension %s : %s ,extensions[i], "require");
  1551. GLSLC(0, );
  1552. GLSLF(0, layout (local_size_x = %i, local_size_y = %i, local_size_z = %i) in;
  1553. , shd->lg_size[0], shd->lg_size[1], shd->lg_size[2]);
  1554. GLSLC(0, );
  1555. return 0;
  1556. }
  1557. void ff_vk_shader_print(void *ctx, FFVulkanShader *shd, int prio)
  1558. {
  1559. int line = 0;
  1560. const char *p = shd->src.str;
  1561. const char *start = p;
  1562. const size_t len = strlen(p);
  1563. AVBPrint buf;
  1564. av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
  1565. for (int i = 0; i < len; i++) {
  1566. if (p[i] == '\n') {
  1567. av_bprintf(&buf, "%i\t", ++line);
  1568. av_bprint_append_data(&buf, start, &p[i] - start + 1);
  1569. start = &p[i + 1];
  1570. }
  1571. }
  1572. av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str);
  1573. av_bprint_finalize(&buf, NULL);
  1574. }
  1575. static int init_pipeline_layout(FFVulkanContext *s, FFVulkanShader *shd)
  1576. {
  1577. VkResult ret;
  1578. FFVulkanFunctions *vk = &s->vkfn;
  1579. VkPipelineLayoutCreateInfo pipeline_layout_info;
  1580. /* Finally create the pipeline layout */
  1581. pipeline_layout_info = (VkPipelineLayoutCreateInfo) {
  1582. .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
  1583. .pSetLayouts = shd->desc_layout,
  1584. .setLayoutCount = shd->nb_descriptor_sets,
  1585. .pushConstantRangeCount = shd->push_consts_num,
  1586. .pPushConstantRanges = shd->push_consts,
  1587. };
  1588. ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info,
  1589. s->hwctx->alloc, &shd->pipeline_layout);
  1590. if (ret != VK_SUCCESS) {
  1591. av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
  1592. ff_vk_ret2str(ret));
  1593. return AVERROR_EXTERNAL;
  1594. }
  1595. return 0;
  1596. }
  1597. static int create_shader_module(FFVulkanContext *s, FFVulkanShader *shd,
  1598. VkShaderModule *mod,
  1599. uint8_t *spirv, size_t spirv_len)
  1600. {
  1601. VkResult ret;
  1602. FFVulkanFunctions *vk = &s->vkfn;
  1603. VkShaderModuleCreateInfo shader_module_info = {
  1604. .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
  1605. .pNext = NULL,
  1606. .flags = 0x0,
  1607. .pCode = (void *)spirv,
  1608. .codeSize = spirv_len,
  1609. };
  1610. ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_module_info,
  1611. s->hwctx->alloc, mod);
  1612. if (ret != VK_SUCCESS) {
  1613. av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n",
  1614. ff_vk_ret2str(ret));
  1615. return AVERROR_EXTERNAL;
  1616. }
  1617. return 0;
  1618. }
  1619. static int init_compute_pipeline(FFVulkanContext *s, FFVulkanShader *shd,
  1620. VkShaderModule mod, const char *entrypoint)
  1621. {
  1622. VkResult ret;
  1623. FFVulkanFunctions *vk = &s->vkfn;
  1624. VkComputePipelineCreateInfo pipeline_create_info = {
  1625. .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
  1626. .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ?
  1627. VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0x0,
  1628. .layout = shd->pipeline_layout,
  1629. .stage = (VkPipelineShaderStageCreateInfo) {
  1630. .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
  1631. .pNext = shd->subgroup_info.requiredSubgroupSize ?
  1632. &shd->subgroup_info : NULL,
  1633. .pName = entrypoint,
  1634. .flags = shd->subgroup_info.requiredSubgroupSize ?
  1635. VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT : 0x0,
  1636. .stage = shd->stage,
  1637. .module = mod,
  1638. },
  1639. };
  1640. ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1,
  1641. &pipeline_create_info,
  1642. s->hwctx->alloc, &shd->pipeline);
  1643. if (ret != VK_SUCCESS) {
  1644. av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
  1645. ff_vk_ret2str(ret));
  1646. return AVERROR_EXTERNAL;
  1647. }
  1648. return 0;
  1649. }
  1650. static int create_shader_object(FFVulkanContext *s, FFVulkanShader *shd,
  1651. uint8_t *spirv, size_t spirv_len,
  1652. const char *entrypoint)
  1653. {
  1654. VkResult ret;
  1655. FFVulkanFunctions *vk = &s->vkfn;
  1656. size_t shader_size = 0;
  1657. VkShaderCreateInfoEXT shader_obj_create = {
  1658. .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT,
  1659. .flags = shd->subgroup_info.requiredSubgroupSize ?
  1660. VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT : 0x0,
  1661. .stage = shd->stage,
  1662. .nextStage = 0,
  1663. .codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT,
  1664. .pCode = spirv,
  1665. .codeSize = spirv_len,
  1666. .pName = entrypoint,
  1667. .pSetLayouts = shd->desc_layout,
  1668. .setLayoutCount = shd->nb_descriptor_sets,
  1669. .pushConstantRangeCount = shd->push_consts_num,
  1670. .pPushConstantRanges = shd->push_consts,
  1671. .pSpecializationInfo = NULL,
  1672. };
  1673. ret = vk->CreateShadersEXT(s->hwctx->act_dev, 1, &shader_obj_create,
  1674. s->hwctx->alloc, &shd->object);
  1675. if (ret != VK_SUCCESS) {
  1676. av_log(s, AV_LOG_ERROR, "Unable to create shader object: %s\n",
  1677. ff_vk_ret2str(ret));
  1678. return AVERROR_EXTERNAL;
  1679. }
  1680. if (vk->GetShaderBinaryDataEXT(s->hwctx->act_dev, shd->object,
  1681. &shader_size, NULL) == VK_SUCCESS)
  1682. av_log(s, AV_LOG_VERBOSE, "Shader %s size: %zu binary (%zu SPIR-V)\n",
  1683. shd->name, shader_size, spirv_len);
  1684. return 0;
  1685. }
  1686. static int init_descriptors(FFVulkanContext *s, FFVulkanShader *shd)
  1687. {
  1688. VkResult ret;
  1689. FFVulkanFunctions *vk = &s->vkfn;
  1690. shd->desc_layout = av_malloc_array(shd->nb_descriptor_sets,
  1691. sizeof(*shd->desc_layout));
  1692. if (!shd->desc_layout)
  1693. return AVERROR(ENOMEM);
  1694. if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) {
  1695. int has_singular = 0;
  1696. for (int i = 0; i < shd->nb_descriptor_sets; i++) {
  1697. if (shd->desc_set[i].singular) {
  1698. has_singular = 1;
  1699. break;
  1700. }
  1701. }
  1702. shd->use_push = (s->extensions & FF_VK_EXT_PUSH_DESCRIPTOR) &&
  1703. (shd->nb_descriptor_sets == 1) &&
  1704. !has_singular;
  1705. }
  1706. for (int i = 0; i < shd->nb_descriptor_sets; i++) {
  1707. FFVulkanDescriptorSet *set = &shd->desc_set[i];
  1708. VkDescriptorSetLayoutCreateInfo desc_layout_create = {
  1709. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
  1710. .bindingCount = set->nb_bindings,
  1711. .pBindings = set->binding,
  1712. .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ?
  1713. VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT :
  1714. (shd->use_push) ?
  1715. VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR :
  1716. 0x0,
  1717. };
  1718. ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev,
  1719. &desc_layout_create,
  1720. s->hwctx->alloc,
  1721. &shd->desc_layout[i]);
  1722. if (ret != VK_SUCCESS) {
  1723. av_log(s, AV_LOG_ERROR, "Unable to create descriptor set layout: %s",
  1724. ff_vk_ret2str(ret));
  1725. return AVERROR_EXTERNAL;
  1726. }
  1727. if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
  1728. vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, shd->desc_layout[i],
  1729. &set->layout_size);
  1730. set->aligned_size = FFALIGN(set->layout_size,
  1731. s->desc_buf_props.descriptorBufferOffsetAlignment);
  1732. for (int j = 0; j < set->nb_bindings; j++)
  1733. vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev,
  1734. shd->desc_layout[i],
  1735. j,
  1736. &set->binding_offset[j]);
  1737. }
  1738. }
  1739. return 0;
  1740. }
  1741. int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd,
  1742. uint8_t *spirv, size_t spirv_len,
  1743. const char *entrypoint)
  1744. {
  1745. int err;
  1746. FFVulkanFunctions *vk = &s->vkfn;
  1747. err = init_descriptors(s, shd);
  1748. if (err < 0)
  1749. return err;
  1750. err = init_pipeline_layout(s, shd);
  1751. if (err < 0)
  1752. return err;
  1753. if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
  1754. shd->bound_buffer_indices = av_calloc(shd->nb_descriptor_sets,
  1755. sizeof(*shd->bound_buffer_indices));
  1756. if (!shd->bound_buffer_indices)
  1757. return AVERROR(ENOMEM);
  1758. for (int i = 0; i < shd->nb_descriptor_sets; i++)
  1759. shd->bound_buffer_indices[i] = i;
  1760. }
  1761. if (s->extensions & FF_VK_EXT_SHADER_OBJECT) {
  1762. err = create_shader_object(s, shd, spirv, spirv_len, entrypoint);
  1763. if (err < 0)
  1764. return err;
  1765. } else {
  1766. VkShaderModule mod;
  1767. err = create_shader_module(s, shd, &mod, spirv, spirv_len);
  1768. if (err < 0)
  1769. return err;
  1770. switch (shd->bind_point) {
  1771. case VK_PIPELINE_BIND_POINT_COMPUTE:
  1772. err = init_compute_pipeline(s, shd, mod, entrypoint);
  1773. break;
  1774. default:
  1775. av_log(s, AV_LOG_ERROR, "Unsupported shader type: %i\n",
  1776. shd->bind_point);
  1777. err = AVERROR(EINVAL);
  1778. break;
  1779. };
  1780. vk->DestroyShaderModule(s->hwctx->act_dev, mod, s->hwctx->alloc);
  1781. if (err < 0)
  1782. return err;
  1783. }
  1784. return 0;
  1785. }
  1786. static const struct descriptor_props {
  1787. size_t struct_size; /* Size of the opaque which updates the descriptor */
  1788. const char *type;
  1789. int is_uniform;
  1790. int mem_quali; /* Can use a memory qualifier */
  1791. int dim_needed; /* Must indicate dimension */
  1792. int buf_content; /* Must indicate buffer contents */
  1793. } descriptor_props[] = {
  1794. [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
  1795. [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
  1796. [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
  1797. [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
  1798. [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
  1799. [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
  1800. [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
  1801. [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
  1802. [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
  1803. [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
  1804. [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
  1805. };
  1806. int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd,
  1807. FFVulkanDescriptorSetBinding *desc, int nb,
  1808. int singular, int print_to_shader_only)
  1809. {
  1810. int has_sampler = 0;
  1811. FFVulkanDescriptorSet *set;
  1812. if (print_to_shader_only)
  1813. goto print;
  1814. /* Actual layout allocated for the pipeline */
  1815. set = av_realloc_array(shd->desc_set,
  1816. sizeof(*shd->desc_set),
  1817. shd->nb_descriptor_sets + 1);
  1818. if (!set)
  1819. return AVERROR(ENOMEM);
  1820. shd->desc_set = set;
  1821. set = &set[shd->nb_descriptor_sets];
  1822. memset(set, 0, sizeof(*set));
  1823. set->binding = av_calloc(nb, sizeof(*set->binding));
  1824. if (!set->binding)
  1825. return AVERROR(ENOMEM);
  1826. set->binding_offset = av_calloc(nb, sizeof(*set->binding_offset));
  1827. if (!set->binding_offset) {
  1828. av_freep(&set->binding);
  1829. return AVERROR(ENOMEM);
  1830. }
  1831. for (int i = 0; i < nb; i++) {
  1832. set->binding[i].binding = i;
  1833. set->binding[i].descriptorType = desc[i].type;
  1834. set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
  1835. set->binding[i].stageFlags = desc[i].stages;
  1836. set->binding[i].pImmutableSamplers = desc[i].samplers;
  1837. if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
  1838. desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
  1839. has_sampler |= 1;
  1840. }
  1841. set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
  1842. VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
  1843. if (has_sampler)
  1844. set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT;
  1845. if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) {
  1846. for (int i = 0; i < nb; i++) {
  1847. int j;
  1848. VkDescriptorPoolSize *desc_pool_size;
  1849. for (j = 0; j < shd->nb_desc_pool_size; j++)
  1850. if (shd->desc_pool_size[j].type == desc[i].type)
  1851. break;
  1852. if (j >= shd->nb_desc_pool_size) {
  1853. desc_pool_size = av_realloc_array(shd->desc_pool_size,
  1854. sizeof(*desc_pool_size),
  1855. shd->nb_desc_pool_size + 1);
  1856. if (!desc_pool_size)
  1857. return AVERROR(ENOMEM);
  1858. shd->desc_pool_size = desc_pool_size;
  1859. shd->nb_desc_pool_size++;
  1860. memset(&desc_pool_size[j], 0, sizeof(VkDescriptorPoolSize));
  1861. }
  1862. shd->desc_pool_size[j].type = desc[i].type;
  1863. shd->desc_pool_size[j].descriptorCount += FFMAX(desc[i].elems, 1);
  1864. }
  1865. }
  1866. set->singular = singular;
  1867. set->nb_bindings = nb;
  1868. shd->nb_descriptor_sets++;
  1869. print:
  1870. /* Write shader info */
  1871. for (int i = 0; i < nb; i++) {
  1872. const struct descriptor_props *prop = &descriptor_props[desc[i].type];
  1873. GLSLA("layout (set = %i, binding = %i", FFMAX(shd->nb_descriptor_sets - 1, 0), i);
  1874. if (desc[i].mem_layout)
  1875. GLSLA(", %s", desc[i].mem_layout);
  1876. GLSLA(")");
  1877. if (prop->is_uniform)
  1878. GLSLA(" uniform");
  1879. if (prop->mem_quali && desc[i].mem_quali)
  1880. GLSLA(" %s", desc[i].mem_quali);
  1881. if (prop->type) {
  1882. GLSLA(" ");
  1883. if (desc[i].type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
  1884. if (desc[i].mem_layout) {
  1885. int len = strlen(desc[i].mem_layout);
  1886. if (desc[i].mem_layout[len - 1] == 'i' &&
  1887. desc[i].mem_layout[len - 2] == 'u') {
  1888. GLSLA("u");
  1889. } else if (desc[i].mem_layout[len - 1] == 'i') {
  1890. GLSLA("i");
  1891. }
  1892. }
  1893. }
  1894. GLSLA("%s", prop->type);
  1895. }
  1896. if (prop->dim_needed)
  1897. GLSLA("%iD", desc[i].dimensions);
  1898. GLSLA(" %s", desc[i].name);
  1899. if (prop->buf_content)
  1900. GLSLA(" {\n %s\n}", desc[i].buf_content);
  1901. else if (desc[i].elems > 0)
  1902. GLSLA("[%i]", desc[i].elems);
  1903. GLSLA(";");
  1904. GLSLA("\n");
  1905. }
  1906. GLSLA("\n");
  1907. return 0;
  1908. }
  1909. int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool,
  1910. FFVulkanShader *shd)
  1911. {
  1912. int err;
  1913. FFVulkanShaderData *sd;
  1914. if (!shd->nb_descriptor_sets)
  1915. return 0;
  1916. sd = av_realloc_array(pool->reg_shd,
  1917. sizeof(*pool->reg_shd),
  1918. pool->nb_reg_shd + 1);
  1919. if (!sd)
  1920. return AVERROR(ENOMEM);
  1921. pool->reg_shd = sd;
  1922. sd = &sd[pool->nb_reg_shd++];
  1923. memset(sd, 0, sizeof(*sd));
  1924. sd->shd = shd;
  1925. sd->nb_descriptor_sets = shd->nb_descriptor_sets;
  1926. if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
  1927. sd->desc_bind = av_malloc_array(sd->nb_descriptor_sets, sizeof(*sd->desc_bind));
  1928. if (!sd->desc_bind)
  1929. return AVERROR(ENOMEM);
  1930. sd->desc_set_buf = av_calloc(sd->nb_descriptor_sets, sizeof(*sd->desc_set_buf));
  1931. if (!sd->desc_set_buf)
  1932. return AVERROR(ENOMEM);
  1933. for (int i = 0; i < sd->nb_descriptor_sets; i++) {
  1934. FFVulkanDescriptorSet *set = &shd->desc_set[i];
  1935. FFVulkanDescriptorSetData *sdb = &sd->desc_set_buf[i];
  1936. int nb = set->singular ? 1 : pool->pool_size;
  1937. err = ff_vk_create_buf(s, &sdb->buf,
  1938. set->aligned_size*nb,
  1939. NULL, NULL, set->usage,
  1940. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
  1941. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
  1942. VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
  1943. if (err < 0)
  1944. return err;
  1945. err = ff_vk_map_buffer(s, &sdb->buf, &sdb->desc_mem, 0);
  1946. if (err < 0)
  1947. return err;
  1948. sd->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) {
  1949. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT,
  1950. .usage = set->usage,
  1951. .address = sdb->buf.address,
  1952. };
  1953. }
  1954. } else if (!shd->use_push) {
  1955. VkResult ret;
  1956. FFVulkanFunctions *vk = &s->vkfn;
  1957. VkDescriptorSetLayout *tmp_layouts;
  1958. VkDescriptorSetAllocateInfo set_alloc_info;
  1959. VkDescriptorPoolCreateInfo pool_create_info;
  1960. for (int i = 0; i < shd->nb_desc_pool_size; i++)
  1961. shd->desc_pool_size[i].descriptorCount *= pool->pool_size;
  1962. pool_create_info = (VkDescriptorPoolCreateInfo) {
  1963. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
  1964. .flags = 0,
  1965. .pPoolSizes = shd->desc_pool_size,
  1966. .poolSizeCount = shd->nb_desc_pool_size,
  1967. .maxSets = sd->nb_descriptor_sets*pool->pool_size,
  1968. };
  1969. ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
  1970. s->hwctx->alloc, &sd->desc_pool);
  1971. if (ret != VK_SUCCESS) {
  1972. av_log(s, AV_LOG_ERROR, "Unable to create descriptor pool: %s\n",
  1973. ff_vk_ret2str(ret));
  1974. return AVERROR_EXTERNAL;
  1975. }
  1976. tmp_layouts = av_malloc_array(pool_create_info.maxSets, sizeof(*tmp_layouts));
  1977. if (!tmp_layouts)
  1978. return AVERROR(ENOMEM);
  1979. /* Colate each execution context's descriptor set layouts */
  1980. for (int i = 0; i < pool->pool_size; i++)
  1981. for (int j = 0; j < sd->nb_descriptor_sets; j++)
  1982. tmp_layouts[i*sd->nb_descriptor_sets + j] = shd->desc_layout[j];
  1983. set_alloc_info = (VkDescriptorSetAllocateInfo) {
  1984. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
  1985. .descriptorPool = sd->desc_pool,
  1986. .pSetLayouts = tmp_layouts,
  1987. .descriptorSetCount = pool_create_info.maxSets,
  1988. };
  1989. sd->desc_sets = av_malloc_array(pool_create_info.maxSets,
  1990. sizeof(*tmp_layouts));
  1991. if (!sd->desc_sets) {
  1992. av_free(tmp_layouts);
  1993. return AVERROR(ENOMEM);
  1994. }
  1995. ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &set_alloc_info,
  1996. sd->desc_sets);
  1997. av_free(tmp_layouts);
  1998. if (ret != VK_SUCCESS) {
  1999. av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
  2000. ff_vk_ret2str(ret));
  2001. av_freep(&sd->desc_sets);
  2002. return AVERROR_EXTERNAL;
  2003. }
  2004. }
  2005. return 0;
  2006. }
  2007. static inline FFVulkanShaderData *get_shd_data(FFVkExecContext *e,
  2008. FFVulkanShader *shd)
  2009. {
  2010. for (int i = 0; i < e->parent->nb_reg_shd; i++)
  2011. if (e->parent->reg_shd[i].shd == shd)
  2012. return &e->parent->reg_shd[i];
  2013. return NULL;
  2014. }
  2015. static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e,
  2016. FFVulkanShader *shd, int set,
  2017. int bind_idx, int array_idx,
  2018. VkDescriptorGetInfoEXT *desc_get_info,
  2019. size_t desc_size)
  2020. {
  2021. FFVulkanFunctions *vk = &s->vkfn;
  2022. FFVulkanDescriptorSet *desc_set = &shd->desc_set[set];
  2023. FFVulkanShaderData *sd = get_shd_data(e, shd);
  2024. const size_t exec_offset = desc_set->singular ? 0 : desc_set->aligned_size*e->idx;
  2025. void *desc = sd->desc_set_buf[set].desc_mem + /* Base */
  2026. exec_offset + /* Execution context */
  2027. desc_set->binding_offset[bind_idx] + /* Descriptor binding */
  2028. array_idx*desc_size; /* Array position */
  2029. vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc);
  2030. }
  2031. static inline void update_set_pool_write(FFVulkanContext *s, FFVkExecContext *e,
  2032. FFVulkanShader *shd, int set,
  2033. VkWriteDescriptorSet *write_info)
  2034. {
  2035. FFVulkanFunctions *vk = &s->vkfn;
  2036. FFVulkanDescriptorSet *desc_set = &shd->desc_set[set];
  2037. FFVulkanShaderData *sd = get_shd_data(e, shd);
  2038. if (desc_set->singular) {
  2039. for (int i = 0; i < e->parent->pool_size; i++) {
  2040. write_info->dstSet = sd->desc_sets[i*sd->nb_descriptor_sets + set];
  2041. vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
  2042. }
  2043. } else {
  2044. if (shd->use_push) {
  2045. vk->CmdPushDescriptorSetKHR(e->buf,
  2046. shd->bind_point,
  2047. shd->pipeline_layout,
  2048. set, 1,
  2049. write_info);
  2050. } else {
  2051. write_info->dstSet = sd->desc_sets[e->idx*sd->nb_descriptor_sets + set];
  2052. vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
  2053. }
  2054. }
  2055. }
  2056. static int vk_set_descriptor_image(FFVulkanContext *s, FFVulkanShader *shd,
  2057. FFVkExecContext *e, int set, int bind, int offs,
  2058. VkImageView view, VkImageLayout layout,
  2059. VkSampler sampler)
  2060. {
  2061. FFVulkanDescriptorSet *desc_set = &shd->desc_set[set];
  2062. if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
  2063. VkDescriptorGetInfoEXT desc_get_info = {
  2064. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
  2065. .type = desc_set->binding[bind].descriptorType,
  2066. };
  2067. VkDescriptorImageInfo desc_img_info = {
  2068. .imageView = view,
  2069. .sampler = sampler,
  2070. .imageLayout = layout,
  2071. };
  2072. size_t desc_size;
  2073. switch (desc_get_info.type) {
  2074. case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
  2075. desc_get_info.data.pSampledImage = &desc_img_info;
  2076. desc_size = s->desc_buf_props.sampledImageDescriptorSize;
  2077. break;
  2078. case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
  2079. desc_get_info.data.pStorageImage = &desc_img_info;
  2080. desc_size = s->desc_buf_props.storageImageDescriptorSize;
  2081. break;
  2082. case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
  2083. desc_get_info.data.pInputAttachmentImage = &desc_img_info;
  2084. desc_size = s->desc_buf_props.inputAttachmentDescriptorSize;
  2085. break;
  2086. case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
  2087. desc_get_info.data.pCombinedImageSampler = &desc_img_info;
  2088. desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize;
  2089. break;
  2090. default:
  2091. av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
  2092. set, bind, desc_get_info.type);
  2093. return AVERROR(EINVAL);
  2094. break;
  2095. };
  2096. update_set_descriptor(s, e, shd, set, bind, offs,
  2097. &desc_get_info, desc_size);
  2098. } else {
  2099. VkDescriptorImageInfo desc_pool_write_info_img = {
  2100. .sampler = sampler,
  2101. .imageView = view,
  2102. .imageLayout = layout,
  2103. };
  2104. VkWriteDescriptorSet desc_pool_write_info = {
  2105. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2106. .dstBinding = bind,
  2107. .descriptorCount = 1,
  2108. .dstArrayElement = offs,
  2109. .descriptorType = desc_set->binding[bind].descriptorType,
  2110. .pImageInfo = &desc_pool_write_info_img,
  2111. };
  2112. update_set_pool_write(s, e, shd, set, &desc_pool_write_info);
  2113. }
  2114. return 0;
  2115. }
  2116. int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e,
  2117. FFVulkanShader *shd,
  2118. int set, int bind, int elem,
  2119. FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len,
  2120. VkFormat fmt)
  2121. {
  2122. FFVulkanDescriptorSet *desc_set = &shd->desc_set[set];
  2123. if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
  2124. VkDescriptorGetInfoEXT desc_get_info = {
  2125. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
  2126. .type = desc_set->binding[bind].descriptorType,
  2127. };
  2128. VkDescriptorAddressInfoEXT desc_buf_info = {
  2129. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT,
  2130. .address = buf->address + offset,
  2131. .range = len,
  2132. .format = fmt,
  2133. };
  2134. size_t desc_size;
  2135. switch (desc_get_info.type) {
  2136. case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
  2137. desc_get_info.data.pUniformBuffer = &desc_buf_info;
  2138. desc_size = s->desc_buf_props.uniformBufferDescriptorSize;
  2139. break;
  2140. case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
  2141. desc_get_info.data.pStorageBuffer = &desc_buf_info;
  2142. desc_size = s->desc_buf_props.storageBufferDescriptorSize;
  2143. break;
  2144. case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
  2145. desc_get_info.data.pUniformTexelBuffer = &desc_buf_info;
  2146. desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize;
  2147. break;
  2148. case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
  2149. desc_get_info.data.pStorageTexelBuffer = &desc_buf_info;
  2150. desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize;
  2151. break;
  2152. default:
  2153. av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
  2154. set, bind, desc_get_info.type);
  2155. return AVERROR(EINVAL);
  2156. break;
  2157. };
  2158. update_set_descriptor(s, e, shd, set, bind, elem, &desc_get_info, desc_size);
  2159. } else {
  2160. VkDescriptorBufferInfo desc_pool_write_info_buf = {
  2161. .buffer = buf->buf,
  2162. .offset = offset,
  2163. .range = len,
  2164. };
  2165. VkWriteDescriptorSet desc_pool_write_info = {
  2166. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2167. .dstBinding = bind,
  2168. .descriptorCount = 1,
  2169. .dstArrayElement = elem,
  2170. .descriptorType = desc_set->binding[bind].descriptorType,
  2171. .pBufferInfo = &desc_pool_write_info_buf,
  2172. };
  2173. update_set_pool_write(s, e, shd, set, &desc_pool_write_info);
  2174. }
  2175. return 0;
  2176. }
  2177. void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e,
  2178. FFVulkanShader *shd, AVFrame *f,
  2179. VkImageView *views, int set, int binding,
  2180. VkImageLayout layout, VkSampler sampler)
  2181. {
  2182. AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
  2183. const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2184. for (int i = 0; i < nb_planes; i++)
  2185. vk_set_descriptor_image(s, shd, e, set, binding, i,
  2186. views[i], layout, sampler);
  2187. }
  2188. void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e,
  2189. FFVulkanShader *shd,
  2190. VkShaderStageFlagBits stage,
  2191. int offset, size_t size, void *src)
  2192. {
  2193. FFVulkanFunctions *vk = &s->vkfn;
  2194. vk->CmdPushConstants(e->buf, shd->pipeline_layout,
  2195. stage, offset, size, src);
  2196. }
  2197. void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e,
  2198. FFVulkanShader *shd)
  2199. {
  2200. FFVulkanFunctions *vk = &s->vkfn;
  2201. VkDeviceSize offsets[1024];
  2202. FFVulkanShaderData *sd = get_shd_data(e, shd);
  2203. if (s->extensions & FF_VK_EXT_SHADER_OBJECT) {
  2204. VkShaderStageFlagBits stages = shd->stage;
  2205. vk->CmdBindShadersEXT(e->buf, 1, &stages, &shd->object);
  2206. } else {
  2207. vk->CmdBindPipeline(e->buf, shd->bind_point, shd->pipeline);
  2208. }
  2209. if (sd && sd->nb_descriptor_sets) {
  2210. if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
  2211. for (int i = 0; i < sd->nb_descriptor_sets; i++)
  2212. offsets[i] = shd->desc_set[i].singular ? 0 : shd->desc_set[i].aligned_size*e->idx;
  2213. /* Bind descriptor buffers */
  2214. vk->CmdBindDescriptorBuffersEXT(e->buf, sd->nb_descriptor_sets, sd->desc_bind);
  2215. /* Binding offsets */
  2216. vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, shd->bind_point, shd->pipeline_layout,
  2217. 0, sd->nb_descriptor_sets,
  2218. shd->bound_buffer_indices, offsets);
  2219. } else if (!shd->use_push) {
  2220. vk->CmdBindDescriptorSets(e->buf, shd->bind_point, shd->pipeline_layout,
  2221. 0, sd->nb_descriptor_sets,
  2222. &sd->desc_sets[e->idx*sd->nb_descriptor_sets],
  2223. 0, NULL);
  2224. }
  2225. }
  2226. }
  2227. void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd)
  2228. {
  2229. FFVulkanFunctions *vk = &s->vkfn;
  2230. av_bprint_finalize(&shd->src, NULL);
  2231. #if 0
  2232. if (shd->shader.module)
  2233. vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
  2234. s->hwctx->alloc);
  2235. #endif
  2236. if (shd->object)
  2237. vk->DestroyShaderEXT(s->hwctx->act_dev, shd->object, s->hwctx->alloc);
  2238. if (shd->pipeline)
  2239. vk->DestroyPipeline(s->hwctx->act_dev, shd->pipeline, s->hwctx->alloc);
  2240. if (shd->pipeline_layout)
  2241. vk->DestroyPipelineLayout(s->hwctx->act_dev, shd->pipeline_layout,
  2242. s->hwctx->alloc);
  2243. for (int i = 0; i < shd->nb_descriptor_sets; i++) {
  2244. FFVulkanDescriptorSet *set = &shd->desc_set[i];
  2245. av_free(set->binding);
  2246. av_free(set->binding_offset);
  2247. }
  2248. for (int i = 0; i < shd->nb_descriptor_sets; i++)
  2249. if (shd->desc_layout[i])
  2250. vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, shd->desc_layout[i],
  2251. s->hwctx->alloc);
  2252. av_freep(&shd->desc_pool_size);
  2253. av_freep(&shd->desc_layout);
  2254. av_freep(&shd->desc_set);
  2255. av_freep(&shd->bound_buffer_indices);
  2256. av_freep(&shd->push_consts);
  2257. shd->push_consts_num = 0;
  2258. }
  2259. void ff_vk_uninit(FFVulkanContext *s)
  2260. {
  2261. av_freep(&s->query_props);
  2262. av_freep(&s->qf_props);
  2263. av_freep(&s->video_props);
  2264. av_freep(&s->coop_mat_props);
  2265. av_buffer_unref(&s->device_ref);
  2266. av_buffer_unref(&s->frames_ref);
  2267. }
  2268. int ff_vk_init(FFVulkanContext *s, void *log_parent,
  2269. AVBufferRef *device_ref, AVBufferRef *frames_ref)
  2270. {
  2271. int err;
  2272. static const AVClass vulkan_context_class = {
  2273. .class_name = "vk",
  2274. .version = LIBAVUTIL_VERSION_INT,
  2275. .parent_log_context_offset = offsetof(FFVulkanContext, log_parent),
  2276. };
  2277. memset(s, 0, sizeof(*s));
  2278. s->log_parent = log_parent;
  2279. s->class = &vulkan_context_class;
  2280. if (frames_ref) {
  2281. s->frames_ref = av_buffer_ref(frames_ref);
  2282. if (!s->frames_ref)
  2283. return AVERROR(ENOMEM);
  2284. s->frames = (AVHWFramesContext *)s->frames_ref->data;
  2285. s->hwfc = s->frames->hwctx;
  2286. device_ref = s->frames->device_ref;
  2287. }
  2288. s->device_ref = av_buffer_ref(device_ref);
  2289. if (!s->device_ref) {
  2290. ff_vk_uninit(s);
  2291. return AVERROR(ENOMEM);
  2292. }
  2293. s->device = (AVHWDeviceContext *)s->device_ref->data;
  2294. s->hwctx = s->device->hwctx;
  2295. s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions,
  2296. s->hwctx->nb_enabled_dev_extensions);
  2297. s->extensions |= ff_vk_extensions_to_mask(s->hwctx->enabled_inst_extensions,
  2298. s->hwctx->nb_enabled_inst_extensions);
  2299. err = ff_vk_load_functions(s->device, &s->vkfn, s->extensions, 1, 1);
  2300. if (err < 0) {
  2301. ff_vk_uninit(s);
  2302. return err;
  2303. }
  2304. err = ff_vk_load_props(s);
  2305. if (err < 0) {
  2306. ff_vk_uninit(s);
  2307. return err;
  2308. }
  2309. return 0;
  2310. }