hwcontext_vulkan.c 169 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377
  1. /*
  2. * Copyright (c) Lynne
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #define VK_NO_PROTOTYPES
  21. #define VK_ENABLE_BETA_EXTENSIONS
  22. #ifdef _WIN32
  23. #include <windows.h> /* Included to prevent conflicts with CreateSemaphore */
  24. #include <versionhelpers.h>
  25. #include "compat/w32dlfcn.h"
  26. #else
  27. #include <dlfcn.h>
  28. #include <unistd.h>
  29. #endif
  30. #include "thread.h"
  31. #include "config.h"
  32. #include "pixdesc.h"
  33. #include "avstring.h"
  34. #include "imgutils.h"
  35. #include "hwcontext.h"
  36. #include "hwcontext_internal.h"
  37. #include "hwcontext_vulkan.h"
  38. #include "mem.h"
  39. #include "vulkan.h"
  40. #include "vulkan_loader.h"
  41. #if CONFIG_VAAPI
  42. #include "hwcontext_vaapi.h"
  43. #endif
  44. #if CONFIG_LIBDRM
  45. #if CONFIG_VAAPI
  46. #include <va/va_drmcommon.h>
  47. #endif
  48. #ifdef __linux__
  49. #include <sys/sysmacros.h>
  50. #endif
  51. #include <sys/stat.h>
  52. #include <xf86drm.h>
  53. #include <drm_fourcc.h>
  54. #include "hwcontext_drm.h"
  55. #endif
  56. #if HAVE_LINUX_DMA_BUF_H
  57. #include <sys/ioctl.h>
  58. #include <linux/dma-buf.h>
  59. #endif
  60. #if CONFIG_CUDA
  61. #include "hwcontext_cuda_internal.h"
  62. #include "cuda_check.h"
  63. #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
  64. #endif
  65. typedef struct VulkanDevicePriv {
  66. /**
  67. * The public AVVulkanDeviceContext. See hwcontext_vulkan.h for it.
  68. */
  69. AVVulkanDeviceContext p;
  70. /* Vulkan library and loader functions */
  71. void *libvulkan;
  72. FFVulkanContext vkctx;
  73. FFVkQueueFamilyCtx compute_qf;
  74. FFVkQueueFamilyCtx transfer_qf;
  75. /* Properties */
  76. VkPhysicalDeviceProperties2 props;
  77. VkPhysicalDeviceMemoryProperties mprops;
  78. VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
  79. /* Features */
  80. VkPhysicalDeviceVulkan11Features device_features_1_1;
  81. VkPhysicalDeviceVulkan12Features device_features_1_2;
  82. VkPhysicalDeviceVulkan13Features device_features_1_3;
  83. VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features;
  84. VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features;
  85. VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_matrix_features;
  86. VkPhysicalDeviceOpticalFlowFeaturesNV optical_flow_features;
  87. VkPhysicalDeviceShaderObjectFeaturesEXT shader_object_features;
  88. VkPhysicalDeviceVideoMaintenance1FeaturesKHR video_maint_1_features;
  89. /* Queues */
  90. pthread_mutex_t **qf_mutex;
  91. uint32_t nb_tot_qfs;
  92. uint32_t img_qfs[5];
  93. uint32_t nb_img_qfs;
  94. /* Debug callback */
  95. VkDebugUtilsMessengerEXT debug_ctx;
  96. /* Settings */
  97. int use_linear_images;
  98. /* Option to allocate all image planes in a single allocation */
  99. int contiguous_planes;
  100. /* Disable multiplane images */
  101. int disable_multiplane;
  102. /* Nvidia */
  103. int dev_is_nvidia;
  104. } VulkanDevicePriv;
  105. typedef struct VulkanFramesPriv {
  106. /**
  107. * The public AVVulkanFramesContext. See hwcontext_vulkan.h for it.
  108. */
  109. AVVulkanFramesContext p;
  110. /* Image conversions */
  111. FFVkExecPool compute_exec;
  112. /* Image transfers */
  113. FFVkExecPool upload_exec;
  114. FFVkExecPool download_exec;
  115. /* Temporary buffer pools */
  116. AVBufferPool *tmp;
  117. /* Modifier info list to free at uninit */
  118. VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
  119. } VulkanFramesPriv;
  120. typedef struct AVVkFrameInternal {
  121. pthread_mutex_t update_mutex;
  122. #if CONFIG_CUDA
  123. /* Importing external memory into cuda is really expensive so we keep the
  124. * memory imported all the time */
  125. AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
  126. CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
  127. CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
  128. CUarray cu_array[AV_NUM_DATA_POINTERS];
  129. CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
  130. #ifdef _WIN32
  131. HANDLE ext_mem_handle[AV_NUM_DATA_POINTERS];
  132. HANDLE ext_sem_handle[AV_NUM_DATA_POINTERS];
  133. #endif
  134. #endif
  135. } AVVkFrameInternal;
  136. #define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT)
  137. #define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)
  138. static const struct FFVkFormatEntry {
  139. VkFormat vkf;
  140. enum AVPixelFormat pixfmt;
  141. VkImageAspectFlags aspect;
  142. int vk_planes;
  143. int nb_images;
  144. int nb_images_fallback;
  145. const VkFormat fallback[5];
  146. } vk_formats_list[] = {
  147. /* Gray formats */
  148. { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } },
  149. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
  150. { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } },
  151. /* RGB formats */
  152. { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  153. { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
  154. { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  155. { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } },
  156. { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } },
  157. { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } },
  158. { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  159. { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
  160. { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
  161. { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
  162. { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  163. { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
  164. { VK_FORMAT_A2B10G10R10_UNORM_PACK32, AV_PIX_FMT_X2BGR10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2B10G10R10_UNORM_PACK32 } },
  165. /* Planar RGB */
  166. { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  167. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  168. { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
  169. { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
  170. /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */
  171. { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  172. { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  173. { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  174. { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  175. /* Two-plane 422 YUV at 8, 10 and 16 bits */
  176. { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  177. { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  178. { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  179. { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  180. /* Two-plane 444 YUV at 8, 10 and 16 bits */
  181. { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  182. { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  183. { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  184. { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  185. /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */
  186. { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  187. { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  188. { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  189. { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  190. { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  191. { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  192. { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  193. { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  194. { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  195. { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  196. { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  197. { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  198. /* Single plane 422 at 8, 10 and 12 bits */
  199. { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  200. { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  201. { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  202. { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  203. };
  204. static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list);
  205. const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
  206. {
  207. for (int i = 0; i < nb_vk_formats_list; i++)
  208. if (vk_formats_list[i].pixfmt == p)
  209. return vk_formats_list[i].fallback;
  210. return NULL;
  211. }
  212. static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p)
  213. {
  214. for (int i = 0; i < nb_vk_formats_list; i++)
  215. if (vk_formats_list[i].pixfmt == p)
  216. return &vk_formats_list[i];
  217. return NULL;
  218. }
  219. /* Malitia pura, Khronos */
  220. #define FN_MAP_TO(dst_t, dst_name, src_t, src_name) \
  221. static av_unused dst_t map_ ##src_name## _to_ ##dst_name(src_t src) \
  222. { \
  223. dst_t dst = 0x0; \
  224. MAP_TO(VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT, \
  225. VK_IMAGE_USAGE_SAMPLED_BIT); \
  226. MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT, \
  227. VK_IMAGE_USAGE_TRANSFER_SRC_BIT); \
  228. MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT, \
  229. VK_IMAGE_USAGE_TRANSFER_DST_BIT); \
  230. MAP_TO(VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT, \
  231. VK_IMAGE_USAGE_STORAGE_BIT); \
  232. MAP_TO(VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT, \
  233. VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); \
  234. MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \
  235. VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \
  236. MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \
  237. VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \
  238. MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \
  239. VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \
  240. MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \
  241. VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \
  242. return dst; \
  243. }
  244. #define MAP_TO(flag1, flag2) if (src & flag2) dst |= flag1;
  245. FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage)
  246. #undef MAP_TO
  247. #define MAP_TO(flag1, flag2) if (src & flag1) dst |= flag2;
  248. FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats)
  249. #undef MAP_TO
  250. #undef FN_MAP_TO
  251. static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
  252. VkImageTiling tiling,
  253. VkFormat fmts[AV_NUM_DATA_POINTERS], /* Output format list */
  254. int *nb_images, /* Output number of images */
  255. VkImageAspectFlags *aspect, /* Output aspect */
  256. VkImageUsageFlags *supported_usage, /* Output supported usage */
  257. int disable_multiplane, int need_storage)
  258. {
  259. VulkanDevicePriv *priv = dev_ctx->hwctx;
  260. AVVulkanDeviceContext *hwctx = &priv->p;
  261. FFVulkanFunctions *vk = &priv->vkctx.vkfn;
  262. const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
  263. VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
  264. VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
  265. for (int i = 0; i < nb_vk_formats_list; i++) {
  266. if (vk_formats_list[i].pixfmt == p) {
  267. VkFormatProperties3 fprops = {
  268. .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3,
  269. };
  270. VkFormatProperties2 prop = {
  271. .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
  272. .pNext = &fprops,
  273. };
  274. VkFormatFeatureFlagBits2 feats_primary, feats_secondary;
  275. int basics_primary = 0, basics_secondary = 0;
  276. int storage_primary = 0, storage_secondary = 0;
  277. vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
  278. vk_formats_list[i].vkf,
  279. &prop);
  280. feats_primary = tiling == VK_IMAGE_TILING_LINEAR ?
  281. fprops.linearTilingFeatures : fprops.optimalTilingFeatures;
  282. basics_primary = (feats_primary & basic_flags) == basic_flags;
  283. storage_primary = !!(feats_primary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
  284. if (vk_formats_list[i].vkf != vk_formats_list[i].fallback[0]) {
  285. vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
  286. vk_formats_list[i].fallback[0],
  287. &prop);
  288. feats_secondary = tiling == VK_IMAGE_TILING_LINEAR ?
  289. fprops.linearTilingFeatures : fprops.optimalTilingFeatures;
  290. basics_secondary = (feats_secondary & basic_flags) == basic_flags;
  291. storage_secondary = !!(feats_secondary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
  292. } else {
  293. basics_secondary = basics_primary;
  294. storage_secondary = storage_primary;
  295. }
  296. if (basics_primary &&
  297. !(disable_multiplane && vk_formats_list[i].vk_planes > 1) &&
  298. (!need_storage || (need_storage && (storage_primary | storage_secondary)))) {
  299. if (fmts)
  300. fmts[0] = vk_formats_list[i].vkf;
  301. if (nb_images)
  302. *nb_images = 1;
  303. if (aspect)
  304. *aspect = vk_formats_list[i].aspect;
  305. if (supported_usage)
  306. *supported_usage = map_feats_to_usage(feats_primary) |
  307. ((need_storage && (storage_primary | storage_secondary)) ?
  308. VK_IMAGE_USAGE_STORAGE_BIT : 0);
  309. return 0;
  310. } else if (basics_secondary &&
  311. (!need_storage || (need_storage && storage_secondary))) {
  312. if (fmts) {
  313. for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++)
  314. fmts[j] = vk_formats_list[i].fallback[j];
  315. }
  316. if (nb_images)
  317. *nb_images = vk_formats_list[i].nb_images_fallback;
  318. if (aspect)
  319. *aspect = vk_formats_list[i].aspect;
  320. if (supported_usage)
  321. *supported_usage = map_feats_to_usage(feats_secondary);
  322. return 0;
  323. } else {
  324. return AVERROR(ENOTSUP);
  325. }
  326. }
  327. }
  328. return AVERROR(EINVAL);
  329. }
  330. static int load_libvulkan(AVHWDeviceContext *ctx)
  331. {
  332. VulkanDevicePriv *p = ctx->hwctx;
  333. AVVulkanDeviceContext *hwctx = &p->p;
  334. static const char *lib_names[] = {
  335. #if defined(_WIN32)
  336. "vulkan-1.dll",
  337. #elif defined(__APPLE__)
  338. "libvulkan.dylib",
  339. "libvulkan.1.dylib",
  340. "libMoltenVK.dylib",
  341. #else
  342. "libvulkan.so.1",
  343. "libvulkan.so",
  344. #endif
  345. };
  346. for (int i = 0; i < FF_ARRAY_ELEMS(lib_names); i++) {
  347. p->libvulkan = dlopen(lib_names[i], RTLD_NOW | RTLD_LOCAL);
  348. if (p->libvulkan)
  349. break;
  350. }
  351. if (!p->libvulkan) {
  352. av_log(ctx, AV_LOG_ERROR, "Unable to open the libvulkan library!\n");
  353. return AVERROR_UNKNOWN;
  354. }
  355. hwctx->get_proc_addr = (PFN_vkGetInstanceProcAddr)dlsym(p->libvulkan, "vkGetInstanceProcAddr");
  356. return 0;
  357. }
  358. typedef struct VulkanOptExtension {
  359. const char *name;
  360. FFVulkanExtensions flag;
  361. } VulkanOptExtension;
  362. static const VulkanOptExtension optional_instance_exts[] = {
  363. { VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
  364. };
  365. static const VulkanOptExtension optional_device_exts[] = {
  366. /* Misc or required by other extensions */
  367. { VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
  368. { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR },
  369. { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, },
  370. { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM },
  371. { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT },
  372. { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
  373. { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW },
  374. { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT },
  375. { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 },
  376. /* Imports/exports */
  377. { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
  378. { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_DMABUF_MEMORY },
  379. { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, FF_VK_EXT_DRM_MODIFIER_FLAGS },
  380. { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM },
  381. { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY },
  382. #ifdef _WIN32
  383. { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
  384. { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
  385. #endif
  386. /* Video encoding/decoding */
  387. { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE },
  388. { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE },
  389. { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE },
  390. { VK_KHR_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 },
  391. { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
  392. { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 },
  393. { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
  394. { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 },
  395. };
  396. static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
  397. VkDebugUtilsMessageTypeFlagsEXT messageType,
  398. const VkDebugUtilsMessengerCallbackDataEXT *data,
  399. void *priv)
  400. {
  401. int l;
  402. AVHWDeviceContext *ctx = priv;
  403. /* Ignore false positives */
  404. switch (data->messageIdNumber) {
  405. case 0x086974c1: /* BestPractices-vkCreateCommandPool-command-buffer-reset */
  406. case 0xfd92477a: /* BestPractices-vkAllocateMemory-small-allocation */
  407. case 0x618ab1e7: /* VUID-VkImageViewCreateInfo-usage-02275 */
  408. case 0x30f4ac70: /* VUID-VkImageCreateInfo-pNext-06811 */
  409. return VK_FALSE;
  410. default:
  411. break;
  412. }
  413. switch (severity) {
  414. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
  415. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
  416. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
  417. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
  418. default: l = AV_LOG_DEBUG; break;
  419. }
  420. av_log(ctx, l, "%s\n", data->pMessage);
  421. for (int i = 0; i < data->cmdBufLabelCount; i++)
  422. av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
  423. return VK_FALSE;
  424. }
  425. #define ADD_VAL_TO_LIST(list, count, val) \
  426. do { \
  427. list = av_realloc_array(list, sizeof(*list), ++count); \
  428. if (!list) { \
  429. err = AVERROR(ENOMEM); \
  430. goto fail; \
  431. } \
  432. list[count - 1] = av_strdup(val); \
  433. if (!list[count - 1]) { \
  434. err = AVERROR(ENOMEM); \
  435. goto fail; \
  436. } \
  437. } while(0)
  438. #define RELEASE_PROPS(props, count) \
  439. if (props) { \
  440. for (int i = 0; i < count; i++) \
  441. av_free((void *)((props)[i])); \
  442. av_free((void *)props); \
  443. }
  444. enum FFVulkanDebugMode {
  445. FF_VULKAN_DEBUG_NONE = 0,
  446. /* Standard GPU-assisted validation */
  447. FF_VULKAN_DEBUG_VALIDATE = 1,
  448. /* Passes printfs in shaders to the debug callback */
  449. FF_VULKAN_DEBUG_PRINTF = 2,
  450. /* Enables extra printouts */
  451. FF_VULKAN_DEBUG_PRACTICES = 3,
  452. };
  453. static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
  454. const char * const **dst, uint32_t *num,
  455. enum FFVulkanDebugMode debug_mode)
  456. {
  457. const char *tstr;
  458. const char **extension_names = NULL;
  459. VulkanDevicePriv *p = ctx->hwctx;
  460. AVVulkanDeviceContext *hwctx = &p->p;
  461. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  462. int err = 0, found, extensions_found = 0;
  463. const char *mod;
  464. int optional_exts_num;
  465. uint32_t sup_ext_count;
  466. char *user_exts_str = NULL;
  467. AVDictionaryEntry *user_exts;
  468. VkExtensionProperties *sup_ext;
  469. const VulkanOptExtension *optional_exts;
  470. if (!dev) {
  471. mod = "instance";
  472. optional_exts = optional_instance_exts;
  473. optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
  474. user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
  475. if (user_exts) {
  476. user_exts_str = av_strdup(user_exts->value);
  477. if (!user_exts_str) {
  478. err = AVERROR(ENOMEM);
  479. goto fail;
  480. }
  481. }
  482. vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
  483. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  484. if (!sup_ext)
  485. return AVERROR(ENOMEM);
  486. vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
  487. } else {
  488. mod = "device";
  489. optional_exts = optional_device_exts;
  490. optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
  491. user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
  492. if (user_exts) {
  493. user_exts_str = av_strdup(user_exts->value);
  494. if (!user_exts_str) {
  495. err = AVERROR(ENOMEM);
  496. goto fail;
  497. }
  498. }
  499. vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  500. &sup_ext_count, NULL);
  501. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  502. if (!sup_ext)
  503. return AVERROR(ENOMEM);
  504. vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  505. &sup_ext_count, sup_ext);
  506. }
  507. for (int i = 0; i < optional_exts_num; i++) {
  508. tstr = optional_exts[i].name;
  509. found = 0;
  510. if (dev && debug_mode &&
  511. !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME)) {
  512. continue;
  513. }
  514. for (int j = 0; j < sup_ext_count; j++) {
  515. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  516. found = 1;
  517. break;
  518. }
  519. }
  520. if (!found)
  521. continue;
  522. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
  523. p->vkctx.extensions |= optional_exts[i].flag;
  524. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  525. }
  526. if (!dev &&
  527. ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) ||
  528. (debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
  529. (debug_mode == FF_VULKAN_DEBUG_PRACTICES))) {
  530. tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
  531. found = 0;
  532. for (int j = 0; j < sup_ext_count; j++) {
  533. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  534. found = 1;
  535. break;
  536. }
  537. }
  538. if (found) {
  539. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
  540. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  541. p->vkctx.extensions |= FF_VK_EXT_DEBUG_UTILS;
  542. } else {
  543. av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
  544. tstr);
  545. err = AVERROR(EINVAL);
  546. goto fail;
  547. }
  548. }
  549. if (user_exts_str) {
  550. char *save, *token = av_strtok(user_exts_str, "+", &save);
  551. while (token) {
  552. found = 0;
  553. for (int j = 0; j < sup_ext_count; j++) {
  554. if (!strcmp(token, sup_ext[j].extensionName)) {
  555. found = 1;
  556. break;
  557. }
  558. }
  559. if (found) {
  560. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token);
  561. ADD_VAL_TO_LIST(extension_names, extensions_found, token);
  562. } else {
  563. av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
  564. mod, token);
  565. }
  566. token = av_strtok(NULL, "+", &save);
  567. }
  568. }
  569. *dst = extension_names;
  570. *num = extensions_found;
  571. av_free(user_exts_str);
  572. av_free(sup_ext);
  573. return 0;
  574. fail:
  575. RELEASE_PROPS(extension_names, extensions_found);
  576. av_free(user_exts_str);
  577. av_free(sup_ext);
  578. return err;
  579. }
  580. static int check_layers(AVHWDeviceContext *ctx, AVDictionary *opts,
  581. const char * const **dst, uint32_t *num,
  582. enum FFVulkanDebugMode *debug_mode)
  583. {
  584. int err = 0;
  585. VulkanDevicePriv *priv = ctx->hwctx;
  586. FFVulkanFunctions *vk = &priv->vkctx.vkfn;
  587. static const char layer_standard_validation[] = { "VK_LAYER_KHRONOS_validation" };
  588. int layer_standard_validation_found = 0;
  589. uint32_t sup_layer_count;
  590. VkLayerProperties *sup_layers;
  591. AVDictionaryEntry *user_layers = av_dict_get(opts, "layers", NULL, 0);
  592. char *user_layers_str = NULL;
  593. char *save, *token;
  594. const char **enabled_layers = NULL;
  595. uint32_t enabled_layers_count = 0;
  596. AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
  597. enum FFVulkanDebugMode mode;
  598. *debug_mode = mode = FF_VULKAN_DEBUG_NONE;
  599. /* Get a list of all layers */
  600. vk->EnumerateInstanceLayerProperties(&sup_layer_count, NULL);
  601. sup_layers = av_malloc_array(sup_layer_count, sizeof(VkLayerProperties));
  602. if (!sup_layers)
  603. return AVERROR(ENOMEM);
  604. vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers);
  605. av_log(ctx, AV_LOG_VERBOSE, "Supported layers:\n");
  606. for (int i = 0; i < sup_layer_count; i++)
  607. av_log(ctx, AV_LOG_VERBOSE, "\t%s\n", sup_layers[i].layerName);
  608. /* If no user layers or debug layers are given, return */
  609. if (!debug_opt && !user_layers)
  610. goto end;
  611. /* Check for any properly supported validation layer */
  612. if (debug_opt) {
  613. if (!strcmp(debug_opt->value, "printf")) {
  614. mode = FF_VULKAN_DEBUG_PRINTF;
  615. } else if (!strcmp(debug_opt->value, "validate")) {
  616. mode = FF_VULKAN_DEBUG_VALIDATE;
  617. } else if (!strcmp(debug_opt->value, "practices")) {
  618. mode = FF_VULKAN_DEBUG_PRACTICES;
  619. } else {
  620. char *end_ptr = NULL;
  621. int idx = strtol(debug_opt->value, &end_ptr, 10);
  622. if (end_ptr == debug_opt->value || end_ptr[0] != '\0' ||
  623. idx < 0 || idx > FF_VULKAN_DEBUG_PRACTICES) {
  624. av_log(ctx, AV_LOG_ERROR, "Invalid debugging mode \"%s\"\n",
  625. debug_opt->value);
  626. err = AVERROR(EINVAL);
  627. goto end;
  628. }
  629. mode = idx;
  630. }
  631. }
  632. /* If mode is VALIDATE or PRINTF, try to find the standard validation layer extension */
  633. if ((mode == FF_VULKAN_DEBUG_VALIDATE) ||
  634. (mode == FF_VULKAN_DEBUG_PRINTF) ||
  635. (mode == FF_VULKAN_DEBUG_PRACTICES)) {
  636. for (int i = 0; i < sup_layer_count; i++) {
  637. if (!strcmp(layer_standard_validation, sup_layers[i].layerName)) {
  638. av_log(ctx, AV_LOG_VERBOSE, "Standard validation layer %s is enabled\n",
  639. layer_standard_validation);
  640. ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, layer_standard_validation);
  641. *debug_mode = mode;
  642. layer_standard_validation_found = 1;
  643. break;
  644. }
  645. }
  646. if (!layer_standard_validation_found) {
  647. av_log(ctx, AV_LOG_ERROR,
  648. "Validation Layer \"%s\" not supported\n", layer_standard_validation);
  649. err = AVERROR(ENOTSUP);
  650. goto end;
  651. }
  652. }
  653. /* Process any custom layers enabled */
  654. if (user_layers) {
  655. int found;
  656. user_layers_str = av_strdup(user_layers->value);
  657. if (!user_layers_str) {
  658. err = AVERROR(ENOMEM);
  659. goto fail;
  660. }
  661. token = av_strtok(user_layers_str, "+", &save);
  662. while (token) {
  663. found = 0;
  664. /* If debug=1/2 was specified as an option, skip this layer */
  665. if (!strcmp(layer_standard_validation, token) && layer_standard_validation_found) {
  666. token = av_strtok(NULL, "+", &save);
  667. break;
  668. }
  669. /* Try to find the layer in the list of supported layers */
  670. for (int j = 0; j < sup_layer_count; j++) {
  671. if (!strcmp(token, sup_layers[j].layerName)) {
  672. found = 1;
  673. break;
  674. }
  675. }
  676. if (found) {
  677. av_log(ctx, AV_LOG_VERBOSE, "Using layer: %s\n", token);
  678. ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, token);
  679. /* If debug was not set as an option, force it */
  680. if (!strcmp(layer_standard_validation, token))
  681. *debug_mode = FF_VULKAN_DEBUG_VALIDATE;
  682. } else {
  683. av_log(ctx, AV_LOG_ERROR,
  684. "Layer \"%s\" not supported\n", token);
  685. err = AVERROR(EINVAL);
  686. goto end;
  687. }
  688. token = av_strtok(NULL, "+", &save);
  689. }
  690. }
  691. fail:
  692. end:
  693. av_free(sup_layers);
  694. av_free(user_layers_str);
  695. if (err < 0) {
  696. RELEASE_PROPS(enabled_layers, enabled_layers_count);
  697. } else {
  698. *dst = enabled_layers;
  699. *num = enabled_layers_count;
  700. }
  701. return err;
  702. }
  703. /* Creates a VkInstance */
  704. static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts,
  705. enum FFVulkanDebugMode *debug_mode)
  706. {
  707. int err = 0;
  708. VkResult ret;
  709. VulkanDevicePriv *p = ctx->hwctx;
  710. AVVulkanDeviceContext *hwctx = &p->p;
  711. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  712. VkApplicationInfo application_info = {
  713. .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
  714. .pApplicationName = "ffmpeg",
  715. .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
  716. LIBAVUTIL_VERSION_MINOR,
  717. LIBAVUTIL_VERSION_MICRO),
  718. .pEngineName = "libavutil",
  719. .apiVersion = VK_API_VERSION_1_3,
  720. .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
  721. LIBAVUTIL_VERSION_MINOR,
  722. LIBAVUTIL_VERSION_MICRO),
  723. };
  724. VkValidationFeaturesEXT validation_features = {
  725. .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
  726. };
  727. VkInstanceCreateInfo inst_props = {
  728. .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
  729. .pApplicationInfo = &application_info,
  730. };
  731. if (!hwctx->get_proc_addr) {
  732. err = load_libvulkan(ctx);
  733. if (err < 0)
  734. return err;
  735. }
  736. err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 0, 0);
  737. if (err < 0) {
  738. av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n");
  739. return err;
  740. }
  741. err = check_layers(ctx, opts, &inst_props.ppEnabledLayerNames,
  742. &inst_props.enabledLayerCount, debug_mode);
  743. if (err)
  744. goto fail;
  745. /* Check for present/missing extensions */
  746. err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
  747. &inst_props.enabledExtensionCount, *debug_mode);
  748. hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
  749. hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
  750. if (err < 0)
  751. goto fail;
  752. /* Enable debug features if needed */
  753. if (*debug_mode == FF_VULKAN_DEBUG_VALIDATE) {
  754. static const VkValidationFeatureEnableEXT feat_list_validate[] = {
  755. VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
  756. VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
  757. VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
  758. };
  759. validation_features.pEnabledValidationFeatures = feat_list_validate;
  760. validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_validate);
  761. inst_props.pNext = &validation_features;
  762. } else if (*debug_mode == FF_VULKAN_DEBUG_PRINTF) {
  763. static const VkValidationFeatureEnableEXT feat_list_debug[] = {
  764. VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
  765. VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
  766. VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT,
  767. };
  768. validation_features.pEnabledValidationFeatures = feat_list_debug;
  769. validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_debug);
  770. inst_props.pNext = &validation_features;
  771. } else if (*debug_mode == FF_VULKAN_DEBUG_PRACTICES) {
  772. static const VkValidationFeatureEnableEXT feat_list_practices[] = {
  773. VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
  774. VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT,
  775. };
  776. validation_features.pEnabledValidationFeatures = feat_list_practices;
  777. validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_practices);
  778. inst_props.pNext = &validation_features;
  779. }
  780. #ifdef __APPLE__
  781. for (int i = 0; i < inst_props.enabledExtensionCount; i++) {
  782. if (!strcmp(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME,
  783. inst_props.ppEnabledExtensionNames[i])) {
  784. inst_props.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
  785. break;
  786. }
  787. }
  788. #endif
  789. /* Try to create the instance */
  790. ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
  791. /* Check for errors */
  792. if (ret != VK_SUCCESS) {
  793. av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
  794. ff_vk_ret2str(ret));
  795. err = AVERROR_EXTERNAL;
  796. goto fail;
  797. }
  798. err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 0);
  799. if (err < 0) {
  800. av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n");
  801. goto fail;
  802. }
  803. /* Setup debugging callback if needed */
  804. if ((*debug_mode == FF_VULKAN_DEBUG_VALIDATE) ||
  805. (*debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
  806. (*debug_mode == FF_VULKAN_DEBUG_PRACTICES)) {
  807. VkDebugUtilsMessengerCreateInfoEXT dbg = {
  808. .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
  809. .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
  810. VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
  811. VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
  812. VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
  813. .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
  814. VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
  815. VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
  816. .pfnUserCallback = vk_dbg_callback,
  817. .pUserData = ctx,
  818. };
  819. vk->CreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
  820. hwctx->alloc, &p->debug_ctx);
  821. }
  822. err = 0;
  823. fail:
  824. RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount);
  825. return err;
  826. }
  827. typedef struct VulkanDeviceSelection {
  828. uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
  829. int has_uuid;
  830. uint32_t drm_major; /* Will use this second unless !has_drm */
  831. uint32_t drm_minor; /* Will use this second unless !has_drm */
  832. uint32_t has_drm; /* has drm node info */
  833. const char *name; /* Will use this third unless NULL */
  834. uint32_t pci_device; /* Will use this fourth unless 0x0 */
  835. uint32_t vendor_id; /* Last resort to find something deterministic */
  836. int index; /* Finally fall back to index */
  837. } VulkanDeviceSelection;
  838. static const char *vk_dev_type(enum VkPhysicalDeviceType type)
  839. {
  840. switch (type) {
  841. case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
  842. case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete";
  843. case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual";
  844. case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software";
  845. default: return "unknown";
  846. }
  847. }
  848. /* Finds a device */
  849. static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
  850. {
  851. int err = 0, choice = -1;
  852. uint32_t num;
  853. VkResult ret;
  854. VulkanDevicePriv *p = ctx->hwctx;
  855. AVVulkanDeviceContext *hwctx = &p->p;
  856. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  857. VkPhysicalDevice *devices = NULL;
  858. VkPhysicalDeviceIDProperties *idp = NULL;
  859. VkPhysicalDeviceProperties2 *prop = NULL;
  860. VkPhysicalDeviceDrmPropertiesEXT *drm_prop = NULL;
  861. ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL);
  862. if (ret != VK_SUCCESS || !num) {
  863. av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", ff_vk_ret2str(ret));
  864. return AVERROR(ENODEV);
  865. }
  866. devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
  867. if (!devices)
  868. return AVERROR(ENOMEM);
  869. ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices);
  870. if (ret != VK_SUCCESS) {
  871. av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
  872. ff_vk_ret2str(ret));
  873. err = AVERROR(ENODEV);
  874. goto end;
  875. }
  876. prop = av_calloc(num, sizeof(*prop));
  877. if (!prop) {
  878. err = AVERROR(ENOMEM);
  879. goto end;
  880. }
  881. idp = av_calloc(num, sizeof(*idp));
  882. if (!idp) {
  883. err = AVERROR(ENOMEM);
  884. goto end;
  885. }
  886. if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) {
  887. drm_prop = av_calloc(num, sizeof(*drm_prop));
  888. if (!drm_prop) {
  889. err = AVERROR(ENOMEM);
  890. goto end;
  891. }
  892. }
  893. av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
  894. for (int i = 0; i < num; i++) {
  895. if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) {
  896. drm_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT;
  897. idp[i].pNext = &drm_prop[i];
  898. }
  899. idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
  900. prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  901. prop[i].pNext = &idp[i];
  902. vk->GetPhysicalDeviceProperties2(devices[i], &prop[i]);
  903. av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i,
  904. prop[i].properties.deviceName,
  905. vk_dev_type(prop[i].properties.deviceType),
  906. prop[i].properties.deviceID);
  907. }
  908. if (select->has_uuid) {
  909. for (int i = 0; i < num; i++) {
  910. if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
  911. choice = i;
  912. goto end;
  913. }
  914. }
  915. av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
  916. err = AVERROR(ENODEV);
  917. goto end;
  918. } else if ((p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) && select->has_drm) {
  919. for (int i = 0; i < num; i++) {
  920. if ((select->drm_major == drm_prop[i].primaryMajor &&
  921. select->drm_minor == drm_prop[i].primaryMinor) ||
  922. (select->drm_major == drm_prop[i].renderMajor &&
  923. select->drm_minor == drm_prop[i].renderMinor)) {
  924. choice = i;
  925. goto end;
  926. }
  927. }
  928. av_log(ctx, AV_LOG_ERROR, "Unable to find device by given DRM node numbers %i:%i!\n",
  929. select->drm_major, select->drm_minor);
  930. err = AVERROR(ENODEV);
  931. goto end;
  932. } else if (select->name) {
  933. av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
  934. for (int i = 0; i < num; i++) {
  935. if (strstr(prop[i].properties.deviceName, select->name)) {
  936. choice = i;
  937. goto end;
  938. }
  939. }
  940. av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
  941. select->name);
  942. err = AVERROR(ENODEV);
  943. goto end;
  944. } else if (select->pci_device) {
  945. av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
  946. for (int i = 0; i < num; i++) {
  947. if (select->pci_device == prop[i].properties.deviceID) {
  948. choice = i;
  949. goto end;
  950. }
  951. }
  952. av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
  953. select->pci_device);
  954. err = AVERROR(EINVAL);
  955. goto end;
  956. } else if (select->vendor_id) {
  957. av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
  958. for (int i = 0; i < num; i++) {
  959. if (select->vendor_id == prop[i].properties.vendorID) {
  960. choice = i;
  961. goto end;
  962. }
  963. }
  964. av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
  965. select->vendor_id);
  966. err = AVERROR(ENODEV);
  967. goto end;
  968. } else {
  969. if (select->index < num) {
  970. choice = select->index;
  971. goto end;
  972. }
  973. av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
  974. select->index);
  975. err = AVERROR(ENODEV);
  976. goto end;
  977. }
  978. end:
  979. if (choice > -1) {
  980. av_log(ctx, AV_LOG_VERBOSE, "Device %d selected: %s (%s) (0x%x)\n",
  981. choice, prop[choice].properties.deviceName,
  982. vk_dev_type(prop[choice].properties.deviceType),
  983. prop[choice].properties.deviceID);
  984. hwctx->phys_dev = devices[choice];
  985. }
  986. av_free(devices);
  987. av_free(prop);
  988. av_free(idp);
  989. av_free(drm_prop);
  990. return err;
  991. }
  992. /* Picks the least used qf with the fewest unneeded flags, or -1 if none found */
  993. static inline int pick_queue_family(VkQueueFamilyProperties2 *qf, uint32_t num_qf,
  994. VkQueueFlagBits flags)
  995. {
  996. int index = -1;
  997. uint32_t min_score = UINT32_MAX;
  998. for (int i = 0; i < num_qf; i++) {
  999. VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
  1000. /* Per the spec, reporting transfer caps is optional for these 2 types */
  1001. if ((flags & VK_QUEUE_TRANSFER_BIT) &&
  1002. (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)))
  1003. qflags |= VK_QUEUE_TRANSFER_BIT;
  1004. if (qflags & flags) {
  1005. uint32_t score = av_popcount(qflags) + qf[i].queueFamilyProperties.timestampValidBits;
  1006. if (score < min_score) {
  1007. index = i;
  1008. min_score = score;
  1009. }
  1010. }
  1011. }
  1012. if (index > -1)
  1013. qf[index].queueFamilyProperties.timestampValidBits++;
  1014. return index;
  1015. }
  1016. static inline int pick_video_queue_family(VkQueueFamilyProperties2 *qf,
  1017. VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf,
  1018. VkVideoCodecOperationFlagBitsKHR flags)
  1019. {
  1020. int index = -1;
  1021. uint32_t min_score = UINT32_MAX;
  1022. for (int i = 0; i < num_qf; i++) {
  1023. const VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
  1024. const VkQueueFlagBits vflags = qf_vid[i].videoCodecOperations;
  1025. if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR)))
  1026. continue;
  1027. if (vflags & flags) {
  1028. uint32_t score = av_popcount(vflags) + qf[i].queueFamilyProperties.timestampValidBits;
  1029. if (score < min_score) {
  1030. index = i;
  1031. min_score = score;
  1032. }
  1033. }
  1034. }
  1035. if (index > -1)
  1036. qf[index].queueFamilyProperties.timestampValidBits++;
  1037. return index;
  1038. }
  1039. static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
  1040. {
  1041. uint32_t num;
  1042. VulkanDevicePriv *p = ctx->hwctx;
  1043. AVVulkanDeviceContext *hwctx = &p->p;
  1044. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1045. VkQueueFamilyProperties2 *qf = NULL;
  1046. VkQueueFamilyVideoPropertiesKHR *qf_vid = NULL;
  1047. /* First get the number of queue families */
  1048. vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
  1049. if (!num) {
  1050. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  1051. return AVERROR_EXTERNAL;
  1052. }
  1053. /* Then allocate memory */
  1054. qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties2));
  1055. if (!qf)
  1056. return AVERROR(ENOMEM);
  1057. qf_vid = av_malloc_array(num, sizeof(VkQueueFamilyVideoPropertiesKHR));
  1058. if (!qf_vid)
  1059. return AVERROR(ENOMEM);
  1060. for (uint32_t i = 0; i < num; i++) {
  1061. qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
  1062. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
  1063. };
  1064. qf[i] = (VkQueueFamilyProperties2) {
  1065. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
  1066. .pNext = &qf_vid[i],
  1067. };
  1068. }
  1069. /* Finally retrieve the queue families */
  1070. vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &num, qf);
  1071. av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n");
  1072. for (int i = 0; i < num; i++) {
  1073. av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s%s (queues: %i)\n", i,
  1074. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
  1075. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
  1076. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
  1077. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
  1078. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
  1079. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
  1080. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_OPTICAL_FLOW_BIT_NV) ? " optical_flow" : "",
  1081. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
  1082. qf[i].queueFamilyProperties.queueCount);
  1083. /* We use this field to keep a score of how many times we've used that
  1084. * queue family in order to make better choices. */
  1085. qf[i].queueFamilyProperties.timestampValidBits = 0;
  1086. }
  1087. hwctx->nb_qf = 0;
  1088. /* Pick each queue family to use */
  1089. #define PICK_QF(type, vid_op) \
  1090. do { \
  1091. uint32_t i; \
  1092. uint32_t idx; \
  1093. \
  1094. if (vid_op) \
  1095. idx = pick_video_queue_family(qf, qf_vid, num, vid_op); \
  1096. else \
  1097. idx = pick_queue_family(qf, num, type); \
  1098. \
  1099. if (idx == -1) \
  1100. continue; \
  1101. \
  1102. for (i = 0; i < hwctx->nb_qf; i++) { \
  1103. if (hwctx->qf[i].idx == idx) { \
  1104. hwctx->qf[i].flags |= type; \
  1105. hwctx->qf[i].video_caps |= vid_op; \
  1106. break; \
  1107. } \
  1108. } \
  1109. if (i == hwctx->nb_qf) { \
  1110. hwctx->qf[i].idx = idx; \
  1111. hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \
  1112. hwctx->qf[i].flags = type; \
  1113. hwctx->qf[i].video_caps = vid_op; \
  1114. hwctx->nb_qf++; \
  1115. } \
  1116. } while (0)
  1117. PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1118. PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1119. PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1120. PICK_QF(VK_QUEUE_OPTICAL_FLOW_BIT_NV, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1121. PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR);
  1122. PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR);
  1123. PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR);
  1124. PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR);
  1125. PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
  1126. av_free(qf);
  1127. av_free(qf_vid);
  1128. #undef PICK_QF
  1129. cd->pQueueCreateInfos = av_malloc_array(hwctx->nb_qf,
  1130. sizeof(VkDeviceQueueCreateInfo));
  1131. if (!cd->pQueueCreateInfos)
  1132. return AVERROR(ENOMEM);
  1133. for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
  1134. int dup = 0;
  1135. float *weights = NULL;
  1136. VkDeviceQueueCreateInfo *pc;
  1137. for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) {
  1138. if (hwctx->qf[i].idx == cd->pQueueCreateInfos[j].queueFamilyIndex) {
  1139. dup = 1;
  1140. break;
  1141. }
  1142. }
  1143. if (dup)
  1144. continue;
  1145. weights = av_malloc_array(hwctx->qf[i].num, sizeof(float));
  1146. if (!weights) {
  1147. for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++)
  1148. av_free((void *)cd->pQueueCreateInfos[i].pQueuePriorities);
  1149. av_free((void *)cd->pQueueCreateInfos);
  1150. return AVERROR(ENOMEM);
  1151. }
  1152. for (uint32_t j = 0; j < hwctx->qf[i].num; j++)
  1153. weights[j] = 1.0;
  1154. pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
  1155. pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) {
  1156. .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
  1157. .queueFamilyIndex = hwctx->qf[i].idx,
  1158. .queueCount = hwctx->qf[i].num,
  1159. .pQueuePriorities = weights,
  1160. };
  1161. }
  1162. #if FF_API_VULKAN_FIXED_QUEUES
  1163. FF_DISABLE_DEPRECATION_WARNINGS
  1164. /* Setup deprecated fields */
  1165. hwctx->queue_family_index = -1;
  1166. hwctx->queue_family_comp_index = -1;
  1167. hwctx->queue_family_tx_index = -1;
  1168. hwctx->queue_family_encode_index = -1;
  1169. hwctx->queue_family_decode_index = -1;
  1170. #define SET_OLD_QF(field, nb_field, type) \
  1171. do { \
  1172. if (field < 0 && hwctx->qf[i].flags & type) { \
  1173. field = hwctx->qf[i].idx; \
  1174. nb_field = hwctx->qf[i].num; \
  1175. } \
  1176. } while (0)
  1177. for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
  1178. SET_OLD_QF(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
  1179. SET_OLD_QF(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
  1180. SET_OLD_QF(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
  1181. SET_OLD_QF(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
  1182. SET_OLD_QF(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
  1183. }
  1184. #undef SET_OLD_QF
  1185. FF_ENABLE_DEPRECATION_WARNINGS
  1186. #endif
  1187. return 0;
  1188. }
  1189. /* Only resources created by vulkan_device_create should be released here,
  1190. * resources created by vulkan_device_init should be released by
  1191. * vulkan_device_uninit, to make sure we don't free user provided resources,
  1192. * and there is no leak.
  1193. */
  1194. static void vulkan_device_free(AVHWDeviceContext *ctx)
  1195. {
  1196. VulkanDevicePriv *p = ctx->hwctx;
  1197. AVVulkanDeviceContext *hwctx = &p->p;
  1198. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1199. if (hwctx->act_dev)
  1200. vk->DestroyDevice(hwctx->act_dev, hwctx->alloc);
  1201. if (p->debug_ctx)
  1202. vk->DestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
  1203. hwctx->alloc);
  1204. if (hwctx->inst)
  1205. vk->DestroyInstance(hwctx->inst, hwctx->alloc);
  1206. if (p->libvulkan)
  1207. dlclose(p->libvulkan);
  1208. RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
  1209. RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
  1210. }
  1211. static void vulkan_device_uninit(AVHWDeviceContext *ctx)
  1212. {
  1213. VulkanDevicePriv *p = ctx->hwctx;
  1214. for (uint32_t i = 0; i < p->nb_tot_qfs; i++) {
  1215. pthread_mutex_destroy(p->qf_mutex[i]);
  1216. av_freep(&p->qf_mutex[i]);
  1217. }
  1218. av_freep(&p->qf_mutex);
  1219. ff_vk_uninit(&p->vkctx);
  1220. }
  1221. static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
  1222. VulkanDeviceSelection *dev_select,
  1223. int disable_multiplane,
  1224. AVDictionary *opts, int flags)
  1225. {
  1226. int err = 0;
  1227. VkResult ret;
  1228. AVDictionaryEntry *opt_d;
  1229. VulkanDevicePriv *p = ctx->hwctx;
  1230. AVVulkanDeviceContext *hwctx = &p->p;
  1231. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1232. enum FFVulkanDebugMode debug_mode = FF_VULKAN_DEBUG_NONE;
  1233. /*
  1234. * VkPhysicalDeviceVulkan12Features has a timelineSemaphore field, but
  1235. * MoltenVK doesn't implement VkPhysicalDeviceVulkan12Features yet, so we
  1236. * use VkPhysicalDeviceTimelineSemaphoreFeatures directly.
  1237. */
  1238. VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
  1239. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
  1240. };
  1241. VkPhysicalDeviceVideoMaintenance1FeaturesKHR video_maint_1_features = {
  1242. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR,
  1243. .pNext = &timeline_features,
  1244. };
  1245. VkPhysicalDeviceShaderObjectFeaturesEXT shader_object_features = {
  1246. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT,
  1247. .pNext = &video_maint_1_features,
  1248. };
  1249. VkPhysicalDeviceOpticalFlowFeaturesNV optical_flow_features = {
  1250. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV,
  1251. .pNext = &shader_object_features,
  1252. };
  1253. VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_matrix_features = {
  1254. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR,
  1255. .pNext = &optical_flow_features,
  1256. };
  1257. VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features = {
  1258. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT,
  1259. .pNext = &coop_matrix_features,
  1260. };
  1261. VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features = {
  1262. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT,
  1263. .pNext = &atomic_float_features,
  1264. };
  1265. VkPhysicalDeviceVulkan13Features dev_features_1_3 = {
  1266. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
  1267. .pNext = &desc_buf_features,
  1268. };
  1269. VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
  1270. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
  1271. .pNext = &dev_features_1_3,
  1272. };
  1273. VkPhysicalDeviceVulkan11Features dev_features_1_1 = {
  1274. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
  1275. .pNext = &dev_features_1_2,
  1276. };
  1277. VkPhysicalDeviceFeatures2 dev_features = {
  1278. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
  1279. .pNext = &dev_features_1_1,
  1280. };
  1281. VkDeviceCreateInfo dev_info = {
  1282. .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
  1283. };
  1284. ctx->free = vulkan_device_free;
  1285. /* Create an instance if not given one */
  1286. if ((err = create_instance(ctx, opts, &debug_mode)))
  1287. goto end;
  1288. /* Find a device (if not given one) */
  1289. if ((err = find_device(ctx, dev_select)))
  1290. goto end;
  1291. vk->GetPhysicalDeviceFeatures2(hwctx->phys_dev, &dev_features);
  1292. /* Try to keep in sync with libplacebo */
  1293. #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.features.NAME;
  1294. COPY_FEATURE(hwctx->device_features, shaderImageGatherExtended)
  1295. COPY_FEATURE(hwctx->device_features, shaderStorageImageReadWithoutFormat)
  1296. COPY_FEATURE(hwctx->device_features, shaderStorageImageWriteWithoutFormat)
  1297. COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics)
  1298. COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics)
  1299. COPY_FEATURE(hwctx->device_features, shaderInt64)
  1300. COPY_FEATURE(hwctx->device_features, shaderInt16)
  1301. COPY_FEATURE(hwctx->device_features, shaderFloat64)
  1302. #undef COPY_FEATURE
  1303. /* We require timeline semaphores */
  1304. if (!timeline_features.timelineSemaphore) {
  1305. av_log(ctx, AV_LOG_ERROR, "Device does not support timeline semaphores!\n");
  1306. err = AVERROR(ENOSYS);
  1307. goto end;
  1308. }
  1309. p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
  1310. p->device_features_1_1.storagePushConstant16 = dev_features_1_1.storagePushConstant16;
  1311. p->device_features_1_1.storageBuffer16BitAccess = dev_features_1_1.storageBuffer16BitAccess;
  1312. p->device_features_1_1.uniformAndStorageBuffer16BitAccess = dev_features_1_1.uniformAndStorageBuffer16BitAccess;
  1313. p->device_features_1_2.timelineSemaphore = 1;
  1314. p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress;
  1315. p->device_features_1_2.hostQueryReset = dev_features_1_2.hostQueryReset;
  1316. p->device_features_1_2.storagePushConstant8 = dev_features_1_2.storagePushConstant8;
  1317. p->device_features_1_2.shaderInt8 = dev_features_1_2.shaderInt8;
  1318. p->device_features_1_2.storageBuffer8BitAccess = dev_features_1_2.storageBuffer8BitAccess;
  1319. p->device_features_1_2.uniformAndStorageBuffer8BitAccess = dev_features_1_2.uniformAndStorageBuffer8BitAccess;
  1320. p->device_features_1_2.shaderFloat16 = dev_features_1_2.shaderFloat16;
  1321. p->device_features_1_2.shaderSharedInt64Atomics = dev_features_1_2.shaderSharedInt64Atomics;
  1322. p->device_features_1_2.vulkanMemoryModel = dev_features_1_2.vulkanMemoryModel;
  1323. p->device_features_1_2.vulkanMemoryModelDeviceScope = dev_features_1_2.vulkanMemoryModelDeviceScope;
  1324. p->device_features_1_2.hostQueryReset = dev_features_1_2.hostQueryReset;
  1325. p->device_features_1_3.dynamicRendering = dev_features_1_3.dynamicRendering;
  1326. p->device_features_1_3.maintenance4 = dev_features_1_3.maintenance4;
  1327. p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2;
  1328. p->device_features_1_3.computeFullSubgroups = dev_features_1_3.computeFullSubgroups;
  1329. p->device_features_1_3.shaderZeroInitializeWorkgroupMemory = dev_features_1_3.shaderZeroInitializeWorkgroupMemory;
  1330. p->device_features_1_3.dynamicRendering = dev_features_1_3.dynamicRendering;
  1331. p->video_maint_1_features.videoMaintenance1 = video_maint_1_features.videoMaintenance1;
  1332. p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer;
  1333. p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors;
  1334. p->atomic_float_features.shaderBufferFloat32Atomics = atomic_float_features.shaderBufferFloat32Atomics;
  1335. p->atomic_float_features.shaderBufferFloat32AtomicAdd = atomic_float_features.shaderBufferFloat32AtomicAdd;
  1336. p->coop_matrix_features.cooperativeMatrix = coop_matrix_features.cooperativeMatrix;
  1337. p->optical_flow_features.opticalFlow = optical_flow_features.opticalFlow;
  1338. p->shader_object_features.shaderObject = shader_object_features.shaderObject;
  1339. /* Find and enable extensions */
  1340. if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
  1341. &dev_info.enabledExtensionCount, debug_mode))) {
  1342. for (int i = 0; i < dev_info.queueCreateInfoCount; i++)
  1343. av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities);
  1344. av_free((void *)dev_info.pQueueCreateInfos);
  1345. goto end;
  1346. }
  1347. /* Setup enabled device features */
  1348. hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
  1349. hwctx->device_features.pNext = &p->device_features_1_1;
  1350. p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
  1351. p->device_features_1_1.pNext = &p->device_features_1_2;
  1352. p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
  1353. p->device_features_1_2.pNext = &p->device_features_1_3;
  1354. p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
  1355. p->device_features_1_3.pNext = NULL;
  1356. #define OPT_CHAIN(EXT_FLAG, STRUCT_P, TYPE) \
  1357. do { \
  1358. if (p->vkctx.extensions & EXT_FLAG) { \
  1359. (STRUCT_P)->sType = TYPE; \
  1360. ff_vk_link_struct(hwctx->device_features.pNext, STRUCT_P); \
  1361. } \
  1362. } while (0)
  1363. OPT_CHAIN(FF_VK_EXT_DESCRIPTOR_BUFFER, &p->desc_buf_features,
  1364. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT);
  1365. OPT_CHAIN(FF_VK_EXT_ATOMIC_FLOAT, &p->atomic_float_features,
  1366. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT);
  1367. OPT_CHAIN(FF_VK_EXT_COOP_MATRIX, &p->coop_matrix_features,
  1368. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR);
  1369. OPT_CHAIN(FF_VK_EXT_SHADER_OBJECT, &p->shader_object_features,
  1370. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT);
  1371. OPT_CHAIN(FF_VK_EXT_OPTICAL_FLOW, &p->optical_flow_features,
  1372. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV);
  1373. OPT_CHAIN(FF_VK_EXT_VIDEO_MAINTENANCE_1, &p->video_maint_1_features,
  1374. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR);
  1375. #undef OPT_CHAIN
  1376. /* Add the enabled features into the pnext chain of device creation */
  1377. dev_info.pNext = &hwctx->device_features;
  1378. /* Setup enabled queue families */
  1379. if ((err = setup_queue_families(ctx, &dev_info)))
  1380. goto end;
  1381. ret = vk->CreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
  1382. &hwctx->act_dev);
  1383. for (int i = 0; i < dev_info.queueCreateInfoCount; i++)
  1384. av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities);
  1385. av_free((void *)dev_info.pQueueCreateInfos);
  1386. if (ret != VK_SUCCESS) {
  1387. av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
  1388. ff_vk_ret2str(ret));
  1389. for (int i = 0; i < dev_info.enabledExtensionCount; i++)
  1390. av_free((void *)dev_info.ppEnabledExtensionNames[i]);
  1391. av_free((void *)dev_info.ppEnabledExtensionNames);
  1392. err = AVERROR_EXTERNAL;
  1393. goto end;
  1394. }
  1395. /* Tiled images setting, use them by default */
  1396. opt_d = av_dict_get(opts, "linear_images", NULL, 0);
  1397. if (opt_d)
  1398. p->use_linear_images = strtol(opt_d->value, NULL, 10);
  1399. /*
  1400. * The disable_multiplane argument takes precedent over the option.
  1401. */
  1402. p->disable_multiplane = disable_multiplane;
  1403. if (!p->disable_multiplane) {
  1404. opt_d = av_dict_get(opts, "disable_multiplane", NULL, 0);
  1405. if (opt_d)
  1406. p->disable_multiplane = strtol(opt_d->value, NULL, 10);
  1407. }
  1408. hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
  1409. hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
  1410. end:
  1411. return err;
  1412. }
  1413. static void lock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
  1414. {
  1415. VulkanDevicePriv *p = ctx->hwctx;
  1416. pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
  1417. }
  1418. static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
  1419. {
  1420. VulkanDevicePriv *p = ctx->hwctx;
  1421. pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
  1422. }
  1423. static int vulkan_device_init(AVHWDeviceContext *ctx)
  1424. {
  1425. int err = 0;
  1426. uint32_t qf_num;
  1427. VulkanDevicePriv *p = ctx->hwctx;
  1428. AVVulkanDeviceContext *hwctx = &p->p;
  1429. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1430. VkQueueFamilyProperties2 *qf;
  1431. VkQueueFamilyVideoPropertiesKHR *qf_vid;
  1432. int graph_index, comp_index, tx_index, enc_index, dec_index;
  1433. /* Set device extension flags */
  1434. for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
  1435. for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
  1436. if (!strcmp(hwctx->enabled_dev_extensions[i],
  1437. optional_device_exts[j].name)) {
  1438. p->vkctx.extensions |= optional_device_exts[j].flag;
  1439. break;
  1440. }
  1441. }
  1442. }
  1443. err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 1);
  1444. if (err < 0) {
  1445. av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n");
  1446. return err;
  1447. }
  1448. p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  1449. p->props.pNext = &p->hprops;
  1450. p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
  1451. vk->GetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props);
  1452. av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n",
  1453. p->props.properties.deviceName);
  1454. av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
  1455. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %"PRIu64"\n",
  1456. p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
  1457. av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n",
  1458. p->props.properties.limits.minMemoryMapAlignment);
  1459. av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n",
  1460. p->props.properties.limits.nonCoherentAtomSize);
  1461. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
  1462. av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n",
  1463. p->hprops.minImportedHostPointerAlignment);
  1464. p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
  1465. vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
  1466. if (!qf_num) {
  1467. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  1468. return AVERROR_EXTERNAL;
  1469. }
  1470. qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties2));
  1471. if (!qf)
  1472. return AVERROR(ENOMEM);
  1473. qf_vid = av_malloc_array(qf_num, sizeof(VkQueueFamilyVideoPropertiesKHR));
  1474. if (!qf_vid) {
  1475. av_free(qf);
  1476. return AVERROR(ENOMEM);
  1477. }
  1478. for (uint32_t i = 0; i < qf_num; i++) {
  1479. qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
  1480. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
  1481. };
  1482. qf[i] = (VkQueueFamilyProperties2) {
  1483. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
  1484. .pNext = &qf_vid[i],
  1485. };
  1486. }
  1487. vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &qf_num, qf);
  1488. p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex));
  1489. if (!p->qf_mutex) {
  1490. err = AVERROR(ENOMEM);
  1491. goto end;
  1492. }
  1493. p->nb_tot_qfs = qf_num;
  1494. for (uint32_t i = 0; i < qf_num; i++) {
  1495. p->qf_mutex[i] = av_calloc(qf[i].queueFamilyProperties.queueCount,
  1496. sizeof(**p->qf_mutex));
  1497. if (!p->qf_mutex[i]) {
  1498. err = AVERROR(ENOMEM);
  1499. goto end;
  1500. }
  1501. for (uint32_t j = 0; j < qf[i].queueFamilyProperties.queueCount; j++) {
  1502. err = pthread_mutex_init(&p->qf_mutex[i][j], NULL);
  1503. if (err != 0) {
  1504. av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n",
  1505. av_err2str(err));
  1506. err = AVERROR(err);
  1507. goto end;
  1508. }
  1509. }
  1510. }
  1511. #if FF_API_VULKAN_FIXED_QUEUES
  1512. FF_DISABLE_DEPRECATION_WARNINGS
  1513. graph_index = hwctx->nb_graphics_queues ? hwctx->queue_family_index : -1;
  1514. comp_index = hwctx->nb_comp_queues ? hwctx->queue_family_comp_index : -1;
  1515. tx_index = hwctx->nb_tx_queues ? hwctx->queue_family_tx_index : -1;
  1516. dec_index = hwctx->nb_decode_queues ? hwctx->queue_family_decode_index : -1;
  1517. enc_index = hwctx->nb_encode_queues ? hwctx->queue_family_encode_index : -1;
  1518. #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \
  1519. do { \
  1520. if (ctx_qf < 0 && required) { \
  1521. av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
  1522. " in the context!\n", type); \
  1523. err = AVERROR(EINVAL); \
  1524. goto end; \
  1525. } else if (fidx < 0 || ctx_qf < 0) { \
  1526. break; \
  1527. } else if (ctx_qf >= qf_num) { \
  1528. av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
  1529. type, ctx_qf, qf_num); \
  1530. err = AVERROR(EINVAL); \
  1531. goto end; \
  1532. } \
  1533. \
  1534. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
  1535. " for%s%s%s%s%s\n", \
  1536. ctx_qf, qc, \
  1537. ctx_qf == graph_index ? " graphics" : "", \
  1538. ctx_qf == comp_index ? " compute" : "", \
  1539. ctx_qf == tx_index ? " transfers" : "", \
  1540. ctx_qf == enc_index ? " encode" : "", \
  1541. ctx_qf == dec_index ? " decode" : ""); \
  1542. graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \
  1543. comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \
  1544. tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
  1545. enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
  1546. dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
  1547. } while (0)
  1548. CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
  1549. CHECK_QUEUE("compute", 1, comp_index, hwctx->queue_family_comp_index, hwctx->nb_comp_queues);
  1550. CHECK_QUEUE("upload", 1, tx_index, hwctx->queue_family_tx_index, hwctx->nb_tx_queues);
  1551. CHECK_QUEUE("decode", 0, dec_index, hwctx->queue_family_decode_index, hwctx->nb_decode_queues);
  1552. CHECK_QUEUE("encode", 0, enc_index, hwctx->queue_family_encode_index, hwctx->nb_encode_queues);
  1553. #undef CHECK_QUEUE
  1554. /* Update the new queue family fields. If non-zero already,
  1555. * it means API users have set it. */
  1556. if (!hwctx->nb_qf) {
  1557. #define ADD_QUEUE(ctx_qf, qc, flag) \
  1558. do { \
  1559. if (ctx_qf != -1) { \
  1560. hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) { \
  1561. .idx = ctx_qf, \
  1562. .num = qc, \
  1563. .flags = flag, \
  1564. }; \
  1565. } \
  1566. } while (0)
  1567. ADD_QUEUE(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
  1568. ADD_QUEUE(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
  1569. ADD_QUEUE(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
  1570. ADD_QUEUE(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
  1571. ADD_QUEUE(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
  1572. #undef ADD_QUEUE
  1573. }
  1574. FF_ENABLE_DEPRECATION_WARNINGS
  1575. #endif
  1576. for (int i = 0; i < hwctx->nb_qf; i++) {
  1577. if (!hwctx->qf[i].video_caps &&
  1578. hwctx->qf[i].flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR |
  1579. VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) {
  1580. hwctx->qf[i].video_caps = qf_vid[hwctx->qf[i].idx].videoCodecOperations;
  1581. }
  1582. }
  1583. /* Setup array for pQueueFamilyIndices with used queue families */
  1584. p->nb_img_qfs = 0;
  1585. for (int i = 0; i < hwctx->nb_qf; i++) {
  1586. int seen = 0;
  1587. /* Make sure each entry is unique
  1588. * (VUID-VkBufferCreateInfo-sharingMode-01419) */
  1589. for (int j = (i - 1); j >= 0; j--) {
  1590. if (hwctx->qf[i].idx == hwctx->qf[j].idx) {
  1591. seen = 1;
  1592. break;
  1593. }
  1594. }
  1595. if (!seen)
  1596. p->img_qfs[p->nb_img_qfs++] = hwctx->qf[i].idx;
  1597. }
  1598. if (!hwctx->lock_queue)
  1599. hwctx->lock_queue = lock_queue;
  1600. if (!hwctx->unlock_queue)
  1601. hwctx->unlock_queue = unlock_queue;
  1602. /* Get device capabilities */
  1603. vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
  1604. p->vkctx.device = ctx;
  1605. p->vkctx.hwctx = hwctx;
  1606. ff_vk_load_props(&p->vkctx);
  1607. ff_vk_qf_init(&p->vkctx, &p->compute_qf, VK_QUEUE_COMPUTE_BIT);
  1608. ff_vk_qf_init(&p->vkctx, &p->transfer_qf, VK_QUEUE_TRANSFER_BIT);
  1609. end:
  1610. av_free(qf_vid);
  1611. av_free(qf);
  1612. return err;
  1613. }
  1614. static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
  1615. AVDictionary *opts, int flags)
  1616. {
  1617. VulkanDeviceSelection dev_select = { 0 };
  1618. if (device && device[0]) {
  1619. char *end = NULL;
  1620. dev_select.index = strtol(device, &end, 10);
  1621. if (end == device) {
  1622. dev_select.index = 0;
  1623. dev_select.name = device;
  1624. }
  1625. }
  1626. return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
  1627. }
  1628. static int vulkan_device_derive(AVHWDeviceContext *ctx,
  1629. AVHWDeviceContext *src_ctx,
  1630. AVDictionary *opts, int flags)
  1631. {
  1632. av_unused VulkanDeviceSelection dev_select = { 0 };
  1633. /* If there's only one device on the system, then even if its not covered
  1634. * by the following checks (e.g. non-PCIe ARM GPU), having an empty
  1635. * dev_select will mean it'll get picked. */
  1636. switch(src_ctx->type) {
  1637. #if CONFIG_VAAPI
  1638. case AV_HWDEVICE_TYPE_VAAPI: {
  1639. AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
  1640. VADisplay dpy = src_hwctx->display;
  1641. #if VA_CHECK_VERSION(1, 15, 0)
  1642. VAStatus vas;
  1643. VADisplayAttribute attr = {
  1644. .type = VADisplayPCIID,
  1645. };
  1646. #endif
  1647. const char *vendor;
  1648. #if VA_CHECK_VERSION(1, 15, 0)
  1649. vas = vaGetDisplayAttributes(dpy, &attr, 1);
  1650. if (vas == VA_STATUS_SUCCESS && attr.flags != VA_DISPLAY_ATTRIB_NOT_SUPPORTED)
  1651. dev_select.pci_device = (attr.value & 0xFFFF);
  1652. #endif
  1653. if (!dev_select.pci_device) {
  1654. vendor = vaQueryVendorString(dpy);
  1655. if (!vendor) {
  1656. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
  1657. return AVERROR_EXTERNAL;
  1658. }
  1659. if (strstr(vendor, "AMD"))
  1660. dev_select.vendor_id = 0x1002;
  1661. }
  1662. return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
  1663. }
  1664. #endif
  1665. #if CONFIG_LIBDRM
  1666. case AV_HWDEVICE_TYPE_DRM: {
  1667. int err;
  1668. struct stat drm_node_info;
  1669. drmDevice *drm_dev_info;
  1670. AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
  1671. err = fstat(src_hwctx->fd, &drm_node_info);
  1672. if (err) {
  1673. av_log(ctx, AV_LOG_ERROR, "Unable to get node info from DRM fd: %s!\n",
  1674. av_err2str(AVERROR(errno)));
  1675. return AVERROR_EXTERNAL;
  1676. }
  1677. dev_select.drm_major = major(drm_node_info.st_dev);
  1678. dev_select.drm_minor = minor(drm_node_info.st_dev);
  1679. dev_select.has_drm = 1;
  1680. err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
  1681. if (err) {
  1682. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd: %s!\n",
  1683. av_err2str(AVERROR(errno)));
  1684. return AVERROR_EXTERNAL;
  1685. }
  1686. if (drm_dev_info->bustype == DRM_BUS_PCI)
  1687. dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
  1688. drmFreeDevice(&drm_dev_info);
  1689. return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
  1690. }
  1691. #endif
  1692. #if CONFIG_CUDA
  1693. case AV_HWDEVICE_TYPE_CUDA: {
  1694. AVHWDeviceContext *cuda_cu = src_ctx;
  1695. AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
  1696. AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
  1697. CudaFunctions *cu = cu_internal->cuda_dl;
  1698. int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
  1699. cu_internal->cuda_device));
  1700. if (ret < 0) {
  1701. av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
  1702. return AVERROR_EXTERNAL;
  1703. }
  1704. dev_select.has_uuid = 1;
  1705. /*
  1706. * CUDA is not able to import multiplane images, so always derive a
  1707. * Vulkan device with multiplane disabled.
  1708. */
  1709. return vulkan_device_create_internal(ctx, &dev_select, 1, opts, flags);
  1710. }
  1711. #endif
  1712. default:
  1713. return AVERROR(ENOSYS);
  1714. }
  1715. }
  1716. static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
  1717. const void *hwconfig,
  1718. AVHWFramesConstraints *constraints)
  1719. {
  1720. int count = 0;
  1721. VulkanDevicePriv *p = ctx->hwctx;
  1722. for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
  1723. count += vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
  1724. p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
  1725. VK_IMAGE_TILING_OPTIMAL,
  1726. NULL, NULL, NULL, NULL, 0, 0) >= 0;
  1727. }
  1728. constraints->valid_sw_formats = av_malloc_array(count + 1,
  1729. sizeof(enum AVPixelFormat));
  1730. if (!constraints->valid_sw_formats)
  1731. return AVERROR(ENOMEM);
  1732. count = 0;
  1733. for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
  1734. if (vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
  1735. p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
  1736. VK_IMAGE_TILING_OPTIMAL,
  1737. NULL, NULL, NULL, NULL, 0, 0) >= 0) {
  1738. constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt;
  1739. }
  1740. }
  1741. constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
  1742. constraints->min_width = 1;
  1743. constraints->min_height = 1;
  1744. constraints->max_width = p->props.properties.limits.maxImageDimension2D;
  1745. constraints->max_height = p->props.properties.limits.maxImageDimension2D;
  1746. constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
  1747. if (!constraints->valid_hw_formats)
  1748. return AVERROR(ENOMEM);
  1749. constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
  1750. constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
  1751. return 0;
  1752. }
  1753. static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
  1754. VkMemoryPropertyFlagBits req_flags, const void *alloc_extension,
  1755. VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
  1756. {
  1757. VkResult ret;
  1758. int index = -1;
  1759. VulkanDevicePriv *p = ctx->hwctx;
  1760. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1761. AVVulkanDeviceContext *dev_hwctx = &p->p;
  1762. VkMemoryAllocateInfo alloc_info = {
  1763. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  1764. .pNext = alloc_extension,
  1765. .allocationSize = req->size,
  1766. };
  1767. /* The vulkan spec requires memory types to be sorted in the "optimal"
  1768. * order, so the first matching type we find will be the best/fastest one */
  1769. for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
  1770. const VkMemoryType *type = &p->mprops.memoryTypes[i];
  1771. /* The memory type must be supported by the requirements (bitfield) */
  1772. if (!(req->memoryTypeBits & (1 << i)))
  1773. continue;
  1774. /* The memory type flags must include our properties */
  1775. if ((type->propertyFlags & req_flags) != req_flags)
  1776. continue;
  1777. /* The memory type must be large enough */
  1778. if (req->size > p->mprops.memoryHeaps[type->heapIndex].size)
  1779. continue;
  1780. /* Found a suitable memory type */
  1781. index = i;
  1782. break;
  1783. }
  1784. if (index < 0) {
  1785. av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
  1786. req_flags);
  1787. return AVERROR(EINVAL);
  1788. }
  1789. alloc_info.memoryTypeIndex = index;
  1790. ret = vk->AllocateMemory(dev_hwctx->act_dev, &alloc_info,
  1791. dev_hwctx->alloc, mem);
  1792. if (ret != VK_SUCCESS) {
  1793. av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
  1794. ff_vk_ret2str(ret));
  1795. return AVERROR(ENOMEM);
  1796. }
  1797. *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
  1798. return 0;
  1799. }
  1800. static void vulkan_free_internal(AVVkFrame *f)
  1801. {
  1802. av_unused AVVkFrameInternal *internal = f->internal;
  1803. #if CONFIG_CUDA
  1804. if (internal->cuda_fc_ref) {
  1805. AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
  1806. int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
  1807. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1808. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1809. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1810. CudaFunctions *cu = cu_internal->cuda_dl;
  1811. for (int i = 0; i < planes; i++) {
  1812. if (internal->cu_sem[i])
  1813. CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
  1814. if (internal->cu_mma[i])
  1815. CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
  1816. if (internal->ext_mem[i])
  1817. CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
  1818. #ifdef _WIN32
  1819. if (internal->ext_sem_handle[i])
  1820. CloseHandle(internal->ext_sem_handle[i]);
  1821. if (internal->ext_mem_handle[i])
  1822. CloseHandle(internal->ext_mem_handle[i]);
  1823. #endif
  1824. }
  1825. av_buffer_unref(&internal->cuda_fc_ref);
  1826. }
  1827. #endif
  1828. pthread_mutex_destroy(&internal->update_mutex);
  1829. av_freep(&f->internal);
  1830. }
  1831. static void vulkan_frame_free(AVHWFramesContext *hwfc, AVVkFrame *f)
  1832. {
  1833. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  1834. AVVulkanDeviceContext *hwctx = &p->p;
  1835. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1836. int nb_images = ff_vk_count_images(f);
  1837. int nb_sems = 0;
  1838. while (nb_sems < FF_ARRAY_ELEMS(f->sem) && f->sem[nb_sems])
  1839. nb_sems++;
  1840. if (nb_sems) {
  1841. VkSemaphoreWaitInfo sem_wait = {
  1842. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
  1843. .flags = 0x0,
  1844. .pSemaphores = f->sem,
  1845. .pValues = f->sem_value,
  1846. .semaphoreCount = nb_sems,
  1847. };
  1848. vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX);
  1849. }
  1850. vulkan_free_internal(f);
  1851. for (int i = 0; i < nb_images; i++) {
  1852. vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  1853. vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  1854. vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
  1855. }
  1856. av_free(f);
  1857. }
  1858. static void vulkan_frame_free_cb(void *opaque, uint8_t *data)
  1859. {
  1860. vulkan_frame_free(opaque, (AVVkFrame*)data);
  1861. }
  1862. static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
  1863. void *alloc_pnext, size_t alloc_pnext_stride)
  1864. {
  1865. int img_cnt = 0, err;
  1866. VkResult ret;
  1867. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1868. VulkanDevicePriv *p = ctx->hwctx;
  1869. AVVulkanDeviceContext *hwctx = &p->p;
  1870. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1871. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
  1872. while (f->img[img_cnt]) {
  1873. int use_ded_mem;
  1874. VkImageMemoryRequirementsInfo2 req_desc = {
  1875. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  1876. .image = f->img[img_cnt],
  1877. };
  1878. VkMemoryDedicatedAllocateInfo ded_alloc = {
  1879. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  1880. .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride),
  1881. };
  1882. VkMemoryDedicatedRequirements ded_req = {
  1883. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  1884. };
  1885. VkMemoryRequirements2 req = {
  1886. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  1887. .pNext = &ded_req,
  1888. };
  1889. vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
  1890. if (f->tiling == VK_IMAGE_TILING_LINEAR)
  1891. req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
  1892. p->props.properties.limits.minMemoryMapAlignment);
  1893. /* In case the implementation prefers/requires dedicated allocation */
  1894. use_ded_mem = ded_req.prefersDedicatedAllocation |
  1895. ded_req.requiresDedicatedAllocation;
  1896. if (use_ded_mem)
  1897. ded_alloc.image = f->img[img_cnt];
  1898. /* Allocate memory */
  1899. if ((err = alloc_mem(ctx, &req.memoryRequirements,
  1900. f->tiling == VK_IMAGE_TILING_LINEAR ?
  1901. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
  1902. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1903. use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
  1904. &f->flags, &f->mem[img_cnt])))
  1905. return err;
  1906. f->size[img_cnt] = req.memoryRequirements.size;
  1907. bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1908. bind_info[img_cnt].image = f->img[img_cnt];
  1909. bind_info[img_cnt].memory = f->mem[img_cnt];
  1910. img_cnt++;
  1911. }
  1912. /* Bind the allocated memory to the images */
  1913. ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info);
  1914. if (ret != VK_SUCCESS) {
  1915. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1916. ff_vk_ret2str(ret));
  1917. return AVERROR_EXTERNAL;
  1918. }
  1919. return 0;
  1920. }
  1921. enum PrepMode {
  1922. PREP_MODE_GENERAL,
  1923. PREP_MODE_WRITE,
  1924. PREP_MODE_EXTERNAL_EXPORT,
  1925. PREP_MODE_EXTERNAL_IMPORT,
  1926. PREP_MODE_DECODING_DST,
  1927. PREP_MODE_DECODING_DPB,
  1928. PREP_MODE_ENCODING_DPB,
  1929. };
  1930. static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx,
  1931. AVVkFrame *frame, enum PrepMode pmode)
  1932. {
  1933. int err;
  1934. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  1935. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1936. VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
  1937. int nb_img_bar = 0;
  1938. uint32_t dst_qf = VK_QUEUE_FAMILY_IGNORED;
  1939. VkImageLayout new_layout;
  1940. VkAccessFlags2 new_access;
  1941. VkPipelineStageFlagBits2 src_stage = VK_PIPELINE_STAGE_2_NONE;
  1942. /* This is dirty - but it works. The vulkan.c dependency system doesn't
  1943. * free non-refcounted frames, and non-refcounted hardware frames cannot
  1944. * happen anywhere outside of here. */
  1945. AVBufferRef tmp_ref = {
  1946. .data = (uint8_t *)hwfc,
  1947. };
  1948. AVFrame tmp_frame = {
  1949. .data[0] = (uint8_t *)frame,
  1950. .hw_frames_ctx = &tmp_ref,
  1951. };
  1952. VkCommandBuffer cmd_buf;
  1953. FFVkExecContext *exec = ff_vk_exec_get(ectx);
  1954. cmd_buf = exec->buf;
  1955. ff_vk_exec_start(&p->vkctx, exec);
  1956. err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, &tmp_frame,
  1957. VK_PIPELINE_STAGE_2_NONE,
  1958. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT);
  1959. if (err < 0)
  1960. return err;
  1961. switch (pmode) {
  1962. case PREP_MODE_GENERAL:
  1963. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  1964. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  1965. break;
  1966. case PREP_MODE_WRITE:
  1967. new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  1968. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  1969. break;
  1970. case PREP_MODE_EXTERNAL_IMPORT:
  1971. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  1972. new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
  1973. break;
  1974. case PREP_MODE_EXTERNAL_EXPORT:
  1975. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  1976. new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
  1977. dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
  1978. src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
  1979. break;
  1980. case PREP_MODE_DECODING_DST:
  1981. new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
  1982. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  1983. break;
  1984. case PREP_MODE_DECODING_DPB:
  1985. new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
  1986. new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
  1987. break;
  1988. case PREP_MODE_ENCODING_DPB:
  1989. new_layout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR;
  1990. new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
  1991. break;
  1992. }
  1993. ff_vk_frame_barrier(&p->vkctx, exec, &tmp_frame, img_bar, &nb_img_bar,
  1994. src_stage,
  1995. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  1996. new_access, new_layout, dst_qf);
  1997. vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
  1998. .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
  1999. .pImageMemoryBarriers = img_bar,
  2000. .imageMemoryBarrierCount = nb_img_bar,
  2001. });
  2002. err = ff_vk_exec_submit(&p->vkctx, exec);
  2003. if (err < 0)
  2004. return err;
  2005. /* We can do this because there are no real dependencies */
  2006. ff_vk_exec_discard_deps(&p->vkctx, exec);
  2007. return 0;
  2008. }
  2009. static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format,
  2010. int frame_w, int frame_h, int plane)
  2011. {
  2012. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
  2013. /* Currently always true unless gray + alpha support is added */
  2014. if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB ||
  2015. !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) {
  2016. *w = frame_w;
  2017. *h = frame_h;
  2018. return;
  2019. }
  2020. *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w);
  2021. *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h);
  2022. }
  2023. static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
  2024. VkImageTiling tiling, VkImageUsageFlagBits usage,
  2025. VkImageCreateFlags flags, int nb_layers,
  2026. void *create_pnext)
  2027. {
  2028. int err;
  2029. VkResult ret;
  2030. AVVulkanFramesContext *hwfc_vk = hwfc->hwctx;
  2031. AVHWDeviceContext *ctx = hwfc->device_ctx;
  2032. VulkanDevicePriv *p = ctx->hwctx;
  2033. AVVulkanDeviceContext *hwctx = &p->p;
  2034. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2035. VkExportSemaphoreCreateInfo ext_sem_info = {
  2036. .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
  2037. #ifdef _WIN32
  2038. .handleTypes = IsWindows8OrGreater()
  2039. ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
  2040. : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
  2041. #else
  2042. .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  2043. #endif
  2044. };
  2045. VkSemaphoreTypeCreateInfo sem_type_info = {
  2046. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
  2047. #ifdef _WIN32
  2048. .pNext = p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
  2049. #else
  2050. .pNext = p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
  2051. #endif
  2052. .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
  2053. .initialValue = 0,
  2054. };
  2055. VkSemaphoreCreateInfo sem_spawn = {
  2056. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  2057. .pNext = &sem_type_info,
  2058. };
  2059. AVVkFrame *f = av_vk_frame_alloc();
  2060. if (!f) {
  2061. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  2062. return AVERROR(ENOMEM);
  2063. }
  2064. // TODO: check witdh and height for alignment in case of multiplanar (must be mod-2 if subsampled)
  2065. /* Create the images */
  2066. for (int i = 0; (hwfc_vk->format[i] != VK_FORMAT_UNDEFINED); i++) {
  2067. VkImageCreateInfo create_info = {
  2068. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  2069. .pNext = create_pnext,
  2070. .imageType = VK_IMAGE_TYPE_2D,
  2071. .format = hwfc_vk->format[i],
  2072. .extent.depth = 1,
  2073. .mipLevels = 1,
  2074. .arrayLayers = nb_layers,
  2075. .flags = flags,
  2076. .tiling = tiling,
  2077. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
  2078. .usage = usage,
  2079. .samples = VK_SAMPLE_COUNT_1_BIT,
  2080. .pQueueFamilyIndices = p->img_qfs,
  2081. .queueFamilyIndexCount = p->nb_img_qfs,
  2082. .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  2083. VK_SHARING_MODE_EXCLUSIVE,
  2084. };
  2085. get_plane_wh(&create_info.extent.width, &create_info.extent.height,
  2086. hwfc->sw_format, hwfc->width, hwfc->height, i);
  2087. ret = vk->CreateImage(hwctx->act_dev, &create_info,
  2088. hwctx->alloc, &f->img[i]);
  2089. if (ret != VK_SUCCESS) {
  2090. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  2091. ff_vk_ret2str(ret));
  2092. err = AVERROR(EINVAL);
  2093. goto fail;
  2094. }
  2095. /* Create semaphore */
  2096. ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
  2097. hwctx->alloc, &f->sem[i]);
  2098. if (ret != VK_SUCCESS) {
  2099. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  2100. ff_vk_ret2str(ret));
  2101. err = AVERROR_EXTERNAL;
  2102. goto fail;
  2103. }
  2104. f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
  2105. f->layout[i] = create_info.initialLayout;
  2106. f->access[i] = 0x0;
  2107. f->sem_value[i] = 0;
  2108. }
  2109. f->flags = 0x0;
  2110. f->tiling = tiling;
  2111. *frame = f;
  2112. return 0;
  2113. fail:
  2114. vulkan_frame_free(hwfc, f);
  2115. return err;
  2116. }
  2117. /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
  2118. static void try_export_flags(AVHWFramesContext *hwfc,
  2119. VkExternalMemoryHandleTypeFlags *comp_handle_types,
  2120. VkExternalMemoryHandleTypeFlagBits *iexp,
  2121. VkExternalMemoryHandleTypeFlagBits exp)
  2122. {
  2123. VkResult ret;
  2124. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  2125. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2126. AVVulkanDeviceContext *dev_hwctx = &p->p;
  2127. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2128. const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info =
  2129. ff_vk_find_struct(hwctx->create_pnext,
  2130. VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
  2131. int has_mods = hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info;
  2132. int nb_mods;
  2133. VkExternalImageFormatProperties eprops = {
  2134. .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
  2135. };
  2136. VkImageFormatProperties2 props = {
  2137. .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
  2138. .pNext = &eprops,
  2139. };
  2140. VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
  2141. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
  2142. .pNext = NULL,
  2143. .pQueueFamilyIndices = p->img_qfs,
  2144. .queueFamilyIndexCount = p->nb_img_qfs,
  2145. .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  2146. VK_SHARING_MODE_EXCLUSIVE,
  2147. };
  2148. VkPhysicalDeviceExternalImageFormatInfo enext = {
  2149. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
  2150. .handleType = exp,
  2151. .pNext = has_mods ? &phy_dev_mod_info : NULL,
  2152. };
  2153. VkPhysicalDeviceImageFormatInfo2 pinfo = {
  2154. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
  2155. .pNext = !exp ? NULL : &enext,
  2156. .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
  2157. .type = VK_IMAGE_TYPE_2D,
  2158. .tiling = hwctx->tiling,
  2159. .usage = hwctx->usage,
  2160. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  2161. };
  2162. nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1;
  2163. for (int i = 0; i < nb_mods; i++) {
  2164. if (has_mods)
  2165. phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[i];
  2166. ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
  2167. &pinfo, &props);
  2168. if (ret == VK_SUCCESS) {
  2169. *iexp |= exp;
  2170. *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
  2171. }
  2172. }
  2173. }
  2174. static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
  2175. {
  2176. int err;
  2177. AVVkFrame *f;
  2178. AVBufferRef *avbuf = NULL;
  2179. AVHWFramesContext *hwfc = opaque;
  2180. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2181. VulkanFramesPriv *fp = hwfc->hwctx;
  2182. AVVulkanFramesContext *hwctx = &fp->p;
  2183. VkExternalMemoryHandleTypeFlags e = 0x0;
  2184. VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
  2185. VkExternalMemoryImageCreateInfo eiinfo = {
  2186. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  2187. .pNext = hwctx->create_pnext,
  2188. };
  2189. #ifdef _WIN32
  2190. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
  2191. try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater()
  2192. ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
  2193. : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
  2194. #else
  2195. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
  2196. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  2197. VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
  2198. #endif
  2199. for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
  2200. eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
  2201. eminfo[i].pNext = hwctx->alloc_pnext[i];
  2202. eminfo[i].handleTypes = e;
  2203. }
  2204. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
  2205. hwctx->nb_layers,
  2206. eiinfo.handleTypes ? &eiinfo : hwctx->create_pnext);
  2207. if (err)
  2208. return NULL;
  2209. err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
  2210. if (err)
  2211. goto fail;
  2212. if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
  2213. !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
  2214. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DPB);
  2215. else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)
  2216. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DST);
  2217. else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)
  2218. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_ENCODING_DPB);
  2219. else if (hwctx->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT)
  2220. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_WRITE);
  2221. else
  2222. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_GENERAL);
  2223. if (err)
  2224. goto fail;
  2225. avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
  2226. vulkan_frame_free_cb, hwfc, 0);
  2227. if (!avbuf)
  2228. goto fail;
  2229. return avbuf;
  2230. fail:
  2231. vulkan_frame_free(hwfc, f);
  2232. return NULL;
  2233. }
  2234. static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
  2235. {
  2236. pthread_mutex_lock(&vkf->internal->update_mutex);
  2237. }
  2238. static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
  2239. {
  2240. pthread_mutex_unlock(&vkf->internal->update_mutex);
  2241. }
  2242. static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
  2243. {
  2244. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2245. VulkanFramesPriv *fp = hwfc->hwctx;
  2246. if (fp->modifier_info) {
  2247. if (fp->modifier_info->pDrmFormatModifiers)
  2248. av_freep(&fp->modifier_info->pDrmFormatModifiers);
  2249. av_freep(&fp->modifier_info);
  2250. }
  2251. ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec);
  2252. ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec);
  2253. ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec);
  2254. av_buffer_pool_uninit(&fp->tmp);
  2255. }
  2256. static int vulkan_frames_init(AVHWFramesContext *hwfc)
  2257. {
  2258. int err;
  2259. AVVkFrame *f;
  2260. VulkanFramesPriv *fp = hwfc->hwctx;
  2261. AVVulkanFramesContext *hwctx = &fp->p;
  2262. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2263. VkImageUsageFlagBits supported_usage;
  2264. const struct FFVkFormatEntry *fmt;
  2265. int disable_multiplane = p->disable_multiplane ||
  2266. (hwctx->flags & AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE);
  2267. /* Defaults */
  2268. if (!hwctx->nb_layers)
  2269. hwctx->nb_layers = 1;
  2270. /* VK_IMAGE_TILING_OPTIMAL == 0, can't check for it really */
  2271. if (p->use_linear_images &&
  2272. (hwctx->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT))
  2273. hwctx->tiling = VK_IMAGE_TILING_LINEAR;
  2274. fmt = vk_find_format_entry(hwfc->sw_format);
  2275. if (!fmt) {
  2276. av_log(hwfc, AV_LOG_ERROR, "Unsupported pixel format: %s!\n",
  2277. av_get_pix_fmt_name(hwfc->sw_format));
  2278. return AVERROR(EINVAL);
  2279. }
  2280. if (hwctx->format[0] != VK_FORMAT_UNDEFINED) {
  2281. if (hwctx->format[0] != fmt->vkf) {
  2282. for (int i = 0; i < fmt->nb_images_fallback; i++) {
  2283. if (hwctx->format[i] != fmt->fallback[i]) {
  2284. av_log(hwfc, AV_LOG_ERROR, "Incompatible Vulkan format given "
  2285. "for the current sw_format %s!\n",
  2286. av_get_pix_fmt_name(hwfc->sw_format));
  2287. return AVERROR(EINVAL);
  2288. }
  2289. }
  2290. }
  2291. /* Check if the sw_format itself is supported */
  2292. err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format,
  2293. hwctx->tiling, NULL,
  2294. NULL, NULL, &supported_usage, 0,
  2295. !hwctx->usage ||
  2296. (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT));
  2297. if (err < 0) {
  2298. av_log(hwfc, AV_LOG_ERROR, "Unsupported sw format: %s!\n",
  2299. av_get_pix_fmt_name(hwfc->sw_format));
  2300. return AVERROR(EINVAL);
  2301. }
  2302. } else {
  2303. err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format,
  2304. hwctx->tiling, hwctx->format, NULL,
  2305. NULL, &supported_usage,
  2306. disable_multiplane,
  2307. !hwctx->usage ||
  2308. (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT));
  2309. if (err < 0)
  2310. return err;
  2311. }
  2312. /* Image usage flags */
  2313. if (!hwctx->usage) {
  2314. hwctx->usage = supported_usage & (VK_BUFFER_USAGE_TRANSFER_DST_BIT |
  2315. VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
  2316. VK_IMAGE_USAGE_STORAGE_BIT |
  2317. VK_IMAGE_USAGE_SAMPLED_BIT);
  2318. /* Enables encoding of images, if supported by format and extensions */
  2319. if ((supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) &&
  2320. (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
  2321. FF_VK_EXT_VIDEO_MAINTENANCE_1)))
  2322. hwctx->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
  2323. }
  2324. /* Image creation flags.
  2325. * Only fill them in automatically if the image is not going to be used as
  2326. * a DPB-only image, and we have SAMPLED/STORAGE bits set. */
  2327. if (!hwctx->img_flags) {
  2328. int is_lone_dpb = ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) ||
  2329. ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
  2330. !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)));
  2331. int sampleable = hwctx->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
  2332. VK_IMAGE_USAGE_STORAGE_BIT);
  2333. if (sampleable && !is_lone_dpb) {
  2334. hwctx->img_flags = VK_IMAGE_CREATE_ALIAS_BIT;
  2335. if ((fmt->vk_planes > 1) && (hwctx->format[0] == fmt->vkf))
  2336. hwctx->img_flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
  2337. VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
  2338. }
  2339. }
  2340. /* If the image has an ENCODE_SRC usage, and the maintenance1
  2341. * extension is supported, check if it has a profile list.
  2342. * If there's no profile list, or it has no encode operations,
  2343. * then allow creating the image with no specific profile. */
  2344. if ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) &&
  2345. (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
  2346. FF_VK_EXT_VIDEO_MAINTENANCE_1))) {
  2347. const VkVideoProfileListInfoKHR *pl;
  2348. pl = ff_vk_find_struct(hwctx->create_pnext, VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR);
  2349. if (!pl) {
  2350. hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR;
  2351. } else {
  2352. uint32_t i;
  2353. for (i = 0; i < pl->profileCount; i++) {
  2354. /* Video ops start at exactly 0x00010000 */
  2355. if (pl->pProfiles[i].videoCodecOperation & 0xFFFF0000)
  2356. break;
  2357. }
  2358. if (i == pl->profileCount)
  2359. hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR;
  2360. }
  2361. }
  2362. if (!hwctx->lock_frame)
  2363. hwctx->lock_frame = lock_frame;
  2364. if (!hwctx->unlock_frame)
  2365. hwctx->unlock_frame = unlock_frame;
  2366. err = ff_vk_exec_pool_init(&p->vkctx, &p->compute_qf, &fp->compute_exec,
  2367. p->compute_qf.nb_queues, 0, 0, 0, NULL);
  2368. if (err)
  2369. return err;
  2370. err = ff_vk_exec_pool_init(&p->vkctx, &p->transfer_qf, &fp->upload_exec,
  2371. p->transfer_qf.nb_queues*2, 0, 0, 0, NULL);
  2372. if (err)
  2373. return err;
  2374. err = ff_vk_exec_pool_init(&p->vkctx, &p->transfer_qf, &fp->download_exec,
  2375. p->transfer_qf.nb_queues, 0, 0, 0, NULL);
  2376. if (err)
  2377. return err;
  2378. /* Test to see if allocation will fail */
  2379. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
  2380. hwctx->nb_layers, hwctx->create_pnext);
  2381. if (err)
  2382. return err;
  2383. vulkan_frame_free(hwfc, f);
  2384. /* If user did not specify a pool, hwfc->pool will be set to the internal one
  2385. * in hwcontext.c just after this gets called */
  2386. if (!hwfc->pool) {
  2387. ffhwframesctx(hwfc)->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
  2388. hwfc, vulkan_pool_alloc,
  2389. NULL);
  2390. if (!ffhwframesctx(hwfc)->pool_internal)
  2391. return AVERROR(ENOMEM);
  2392. }
  2393. return 0;
  2394. }
  2395. static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
  2396. {
  2397. frame->buf[0] = av_buffer_pool_get(hwfc->pool);
  2398. if (!frame->buf[0])
  2399. return AVERROR(ENOMEM);
  2400. frame->data[0] = frame->buf[0]->data;
  2401. frame->format = AV_PIX_FMT_VULKAN;
  2402. frame->width = hwfc->width;
  2403. frame->height = hwfc->height;
  2404. return 0;
  2405. }
  2406. static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
  2407. enum AVHWFrameTransferDirection dir,
  2408. enum AVPixelFormat **formats)
  2409. {
  2410. enum AVPixelFormat *fmts;
  2411. int n = 2;
  2412. #if CONFIG_CUDA
  2413. n++;
  2414. #endif
  2415. fmts = av_malloc_array(n, sizeof(*fmts));
  2416. if (!fmts)
  2417. return AVERROR(ENOMEM);
  2418. n = 0;
  2419. fmts[n++] = hwfc->sw_format;
  2420. #if CONFIG_CUDA
  2421. fmts[n++] = AV_PIX_FMT_CUDA;
  2422. #endif
  2423. fmts[n++] = AV_PIX_FMT_NONE;
  2424. *formats = fmts;
  2425. return 0;
  2426. }
  2427. #if CONFIG_LIBDRM
  2428. static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  2429. {
  2430. vulkan_frame_free(hwfc, hwmap->priv);
  2431. }
  2432. static const struct {
  2433. uint32_t drm_fourcc;
  2434. VkFormat vk_format;
  2435. } vulkan_drm_format_map[] = {
  2436. { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
  2437. { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
  2438. { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
  2439. { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
  2440. { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
  2441. { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
  2442. { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  2443. { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  2444. { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  2445. { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  2446. { DRM_FORMAT_ARGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 },
  2447. { DRM_FORMAT_ABGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 },
  2448. { DRM_FORMAT_XRGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 },
  2449. { DRM_FORMAT_XBGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 },
  2450. // All these DRM_FORMATs were added in the same libdrm commit.
  2451. #ifdef DRM_FORMAT_XYUV8888
  2452. { DRM_FORMAT_XYUV8888, VK_FORMAT_R8G8B8A8_UNORM },
  2453. { DRM_FORMAT_XVYU12_16161616, VK_FORMAT_R16G16B16A16_UNORM} ,
  2454. // As we had to map XV36 to a 16bit Vulkan format, reverse mapping will
  2455. // end up yielding Y416 as the DRM format, so we need to recognise it.
  2456. { DRM_FORMAT_Y416, VK_FORMAT_R16G16B16A16_UNORM },
  2457. #endif
  2458. };
  2459. static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
  2460. {
  2461. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  2462. if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
  2463. return vulkan_drm_format_map[i].vk_format;
  2464. return VK_FORMAT_UNDEFINED;
  2465. }
  2466. static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
  2467. const AVFrame *src, int flags)
  2468. {
  2469. int err = 0;
  2470. VkResult ret;
  2471. AVVkFrame *f;
  2472. int bind_counts = 0;
  2473. AVHWDeviceContext *ctx = hwfc->device_ctx;
  2474. VulkanDevicePriv *p = ctx->hwctx;
  2475. AVVulkanDeviceContext *hwctx = &p->p;
  2476. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2477. const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
  2478. VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES];
  2479. VkBindImagePlaneMemoryInfo plane_info[AV_DRM_MAX_PLANES];
  2480. for (int i = 0; i < desc->nb_layers; i++) {
  2481. if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
  2482. av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
  2483. desc->layers[i].format);
  2484. return AVERROR(EINVAL);
  2485. }
  2486. }
  2487. if (!(f = av_vk_frame_alloc())) {
  2488. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  2489. err = AVERROR(ENOMEM);
  2490. goto fail;
  2491. }
  2492. f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
  2493. for (int i = 0; i < desc->nb_layers; i++) {
  2494. const int planes = desc->layers[i].nb_planes;
  2495. /* Semaphore */
  2496. VkSemaphoreTypeCreateInfo sem_type_info = {
  2497. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
  2498. .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
  2499. .initialValue = 0,
  2500. };
  2501. VkSemaphoreCreateInfo sem_spawn = {
  2502. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  2503. .pNext = &sem_type_info,
  2504. };
  2505. /* Image creation */
  2506. VkSubresourceLayout ext_img_layouts[AV_DRM_MAX_PLANES];
  2507. VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = {
  2508. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
  2509. .drmFormatModifier = desc->objects[0].format_modifier,
  2510. .drmFormatModifierPlaneCount = planes,
  2511. .pPlaneLayouts = (const VkSubresourceLayout *)&ext_img_layouts,
  2512. };
  2513. VkExternalMemoryImageCreateInfo ext_img_spec = {
  2514. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  2515. .pNext = &ext_img_mod_spec,
  2516. .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  2517. };
  2518. VkImageCreateInfo create_info = {
  2519. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  2520. .pNext = &ext_img_spec,
  2521. .imageType = VK_IMAGE_TYPE_2D,
  2522. .format = drm_to_vulkan_fmt(desc->layers[i].format),
  2523. .extent.depth = 1,
  2524. .mipLevels = 1,
  2525. .arrayLayers = 1,
  2526. .flags = 0x0,
  2527. .tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
  2528. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
  2529. .usage = 0x0, /* filled in below */
  2530. .samples = VK_SAMPLE_COUNT_1_BIT,
  2531. .pQueueFamilyIndices = p->img_qfs,
  2532. .queueFamilyIndexCount = p->nb_img_qfs,
  2533. .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  2534. VK_SHARING_MODE_EXCLUSIVE,
  2535. };
  2536. /* Image format verification */
  2537. VkExternalImageFormatProperties ext_props = {
  2538. .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
  2539. };
  2540. VkImageFormatProperties2 props_ret = {
  2541. .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
  2542. .pNext = &ext_props,
  2543. };
  2544. VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = {
  2545. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
  2546. .drmFormatModifier = ext_img_mod_spec.drmFormatModifier,
  2547. .pQueueFamilyIndices = create_info.pQueueFamilyIndices,
  2548. .queueFamilyIndexCount = create_info.queueFamilyIndexCount,
  2549. .sharingMode = create_info.sharingMode,
  2550. };
  2551. VkPhysicalDeviceExternalImageFormatInfo props_ext = {
  2552. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
  2553. .pNext = &props_drm_mod,
  2554. .handleType = ext_img_spec.handleTypes,
  2555. };
  2556. VkPhysicalDeviceImageFormatInfo2 fmt_props;
  2557. if (flags & AV_HWFRAME_MAP_READ)
  2558. create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT |
  2559. VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
  2560. if (flags & AV_HWFRAME_MAP_WRITE)
  2561. create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT |
  2562. VK_IMAGE_USAGE_TRANSFER_DST_BIT;
  2563. fmt_props = (VkPhysicalDeviceImageFormatInfo2) {
  2564. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
  2565. .pNext = &props_ext,
  2566. .format = create_info.format,
  2567. .type = create_info.imageType,
  2568. .tiling = create_info.tiling,
  2569. .usage = create_info.usage,
  2570. .flags = create_info.flags,
  2571. };
  2572. /* Check if importing is possible for this combination of parameters */
  2573. ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->phys_dev,
  2574. &fmt_props, &props_ret);
  2575. if (ret != VK_SUCCESS) {
  2576. av_log(ctx, AV_LOG_ERROR, "Cannot map DRM frame to Vulkan: %s\n",
  2577. ff_vk_ret2str(ret));
  2578. err = AVERROR_EXTERNAL;
  2579. goto fail;
  2580. }
  2581. /* Set the image width/height */
  2582. get_plane_wh(&create_info.extent.width, &create_info.extent.height,
  2583. hwfc->sw_format, src->width, src->height, i);
  2584. /* Set the subresource layout based on the layer properties */
  2585. for (int j = 0; j < planes; j++) {
  2586. ext_img_layouts[j].offset = desc->layers[i].planes[j].offset;
  2587. ext_img_layouts[j].rowPitch = desc->layers[i].planes[j].pitch;
  2588. ext_img_layouts[j].size = 0; /* The specs say so for all 3 */
  2589. ext_img_layouts[j].arrayPitch = 0;
  2590. ext_img_layouts[j].depthPitch = 0;
  2591. }
  2592. /* Create image */
  2593. ret = vk->CreateImage(hwctx->act_dev, &create_info,
  2594. hwctx->alloc, &f->img[i]);
  2595. if (ret != VK_SUCCESS) {
  2596. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  2597. ff_vk_ret2str(ret));
  2598. err = AVERROR(EINVAL);
  2599. goto fail;
  2600. }
  2601. ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
  2602. hwctx->alloc, &f->sem[i]);
  2603. if (ret != VK_SUCCESS) {
  2604. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  2605. ff_vk_ret2str(ret));
  2606. err = AVERROR_EXTERNAL;
  2607. goto fail;
  2608. }
  2609. f->queue_family[i] = VK_QUEUE_FAMILY_EXTERNAL;
  2610. f->layout[i] = create_info.initialLayout;
  2611. f->access[i] = 0x0;
  2612. f->sem_value[i] = 0;
  2613. }
  2614. for (int i = 0; i < desc->nb_layers; i++) {
  2615. /* Memory requirements */
  2616. VkImageMemoryRequirementsInfo2 req_desc = {
  2617. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  2618. .image = f->img[i],
  2619. };
  2620. VkMemoryDedicatedRequirements ded_req = {
  2621. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  2622. };
  2623. VkMemoryRequirements2 req2 = {
  2624. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  2625. .pNext = &ded_req,
  2626. };
  2627. /* Allocation/importing */
  2628. VkMemoryFdPropertiesKHR fdmp = {
  2629. .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
  2630. };
  2631. /* This assumes that a layer will never be constructed from multiple
  2632. * objects. If that was to happen in the real world, this code would
  2633. * need to import each plane separately.
  2634. */
  2635. VkImportMemoryFdInfoKHR idesc = {
  2636. .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
  2637. .fd = dup(desc->objects[desc->layers[i].planes[0].object_index].fd),
  2638. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  2639. };
  2640. VkMemoryDedicatedAllocateInfo ded_alloc = {
  2641. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  2642. .pNext = &idesc,
  2643. .image = req_desc.image,
  2644. };
  2645. /* Get object properties */
  2646. ret = vk->GetMemoryFdPropertiesKHR(hwctx->act_dev,
  2647. VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  2648. idesc.fd, &fdmp);
  2649. if (ret != VK_SUCCESS) {
  2650. av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
  2651. ff_vk_ret2str(ret));
  2652. err = AVERROR_EXTERNAL;
  2653. close(idesc.fd);
  2654. goto fail;
  2655. }
  2656. vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2);
  2657. /* Only a single bit must be set, not a range, and it must match */
  2658. req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits;
  2659. err = alloc_mem(ctx, &req2.memoryRequirements,
  2660. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  2661. (ded_req.prefersDedicatedAllocation ||
  2662. ded_req.requiresDedicatedAllocation) ?
  2663. &ded_alloc : ded_alloc.pNext,
  2664. &f->flags, &f->mem[i]);
  2665. if (err) {
  2666. close(idesc.fd);
  2667. return err;
  2668. }
  2669. f->size[i] = req2.memoryRequirements.size;
  2670. }
  2671. for (int i = 0; i < desc->nb_layers; i++) {
  2672. const int planes = desc->layers[i].nb_planes;
  2673. for (int j = 0; j < planes; j++) {
  2674. VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  2675. j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
  2676. VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
  2677. plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
  2678. plane_info[bind_counts].pNext = NULL;
  2679. plane_info[bind_counts].planeAspect = aspect;
  2680. bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  2681. bind_info[bind_counts].pNext = planes > 1 ? &plane_info[bind_counts] : NULL;
  2682. bind_info[bind_counts].image = f->img[i];
  2683. bind_info[bind_counts].memory = f->mem[i];
  2684. /* Offset is already signalled via pPlaneLayouts above */
  2685. bind_info[bind_counts].memoryOffset = 0;
  2686. bind_counts++;
  2687. }
  2688. }
  2689. /* Bind the allocated memory to the images */
  2690. ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
  2691. if (ret != VK_SUCCESS) {
  2692. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  2693. ff_vk_ret2str(ret));
  2694. err = AVERROR_EXTERNAL;
  2695. goto fail;
  2696. }
  2697. *frame = f;
  2698. return 0;
  2699. fail:
  2700. vulkan_frame_free(hwfc, f);
  2701. return err;
  2702. }
  2703. static int vulkan_map_from_drm_frame_sync(AVHWFramesContext *hwfc, AVFrame *dst,
  2704. const AVFrame *src, int flags)
  2705. {
  2706. int err;
  2707. VkResult ret;
  2708. AVHWDeviceContext *ctx = hwfc->device_ctx;
  2709. VulkanDevicePriv *p = ctx->hwctx;
  2710. VulkanFramesPriv *fp = hwfc->hwctx;
  2711. AVVulkanDeviceContext *hwctx = &p->p;
  2712. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2713. const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
  2714. #ifdef DMA_BUF_IOCTL_EXPORT_SYNC_FILE
  2715. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM) {
  2716. VkCommandBuffer cmd_buf;
  2717. FFVkExecContext *exec;
  2718. VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
  2719. VkSemaphore drm_sync_sem[AV_DRM_MAX_PLANES] = { 0 };
  2720. int nb_img_bar = 0;
  2721. for (int i = 0; i < desc->nb_objects; i++) {
  2722. VkSemaphoreTypeCreateInfo sem_type_info = {
  2723. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
  2724. .semaphoreType = VK_SEMAPHORE_TYPE_BINARY,
  2725. };
  2726. VkSemaphoreCreateInfo sem_spawn = {
  2727. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  2728. .pNext = &sem_type_info,
  2729. };
  2730. VkImportSemaphoreFdInfoKHR import_info;
  2731. struct dma_buf_export_sync_file implicit_fd_info = {
  2732. .flags = DMA_BUF_SYNC_READ,
  2733. .fd = -1,
  2734. };
  2735. if (ioctl(desc->objects[i].fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
  2736. &implicit_fd_info)) {
  2737. err = AVERROR(errno);
  2738. av_log(hwctx, AV_LOG_ERROR, "Failed to retrieve implicit DRM sync file: %s\n",
  2739. av_err2str(err));
  2740. for (; i >= 0; i--)
  2741. vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
  2742. return err;
  2743. }
  2744. ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
  2745. hwctx->alloc, &drm_sync_sem[i]);
  2746. if (ret != VK_SUCCESS) {
  2747. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  2748. ff_vk_ret2str(ret));
  2749. err = AVERROR_EXTERNAL;
  2750. for (; i >= 0; i--)
  2751. vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
  2752. return err;
  2753. }
  2754. import_info = (VkImportSemaphoreFdInfoKHR) {
  2755. .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
  2756. .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
  2757. .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
  2758. .semaphore = drm_sync_sem[i],
  2759. .fd = implicit_fd_info.fd,
  2760. };
  2761. ret = vk->ImportSemaphoreFdKHR(hwctx->act_dev, &import_info);
  2762. if (ret != VK_SUCCESS) {
  2763. av_log(hwctx, AV_LOG_ERROR, "Failed to import semaphore: %s\n",
  2764. ff_vk_ret2str(ret));
  2765. err = AVERROR_EXTERNAL;
  2766. for (; i >= 0; i--)
  2767. vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
  2768. return err;
  2769. }
  2770. }
  2771. exec = ff_vk_exec_get(&fp->compute_exec);
  2772. cmd_buf = exec->buf;
  2773. ff_vk_exec_start(&p->vkctx, exec);
  2774. /* Ownership of semaphores is passed */
  2775. err = ff_vk_exec_add_dep_bool_sem(&p->vkctx, exec,
  2776. drm_sync_sem, desc->nb_objects,
  2777. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, 1);
  2778. if (err < 0)
  2779. return err;
  2780. err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, dst,
  2781. VK_PIPELINE_STAGE_2_NONE,
  2782. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT);
  2783. if (err < 0)
  2784. return err;
  2785. ff_vk_frame_barrier(&p->vkctx, exec, dst, img_bar, &nb_img_bar,
  2786. VK_PIPELINE_STAGE_2_NONE,
  2787. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  2788. ((flags & AV_HWFRAME_MAP_READ) ?
  2789. VK_ACCESS_2_SHADER_SAMPLED_READ_BIT : 0x0) |
  2790. ((flags & AV_HWFRAME_MAP_WRITE) ?
  2791. VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT : 0x0),
  2792. VK_IMAGE_LAYOUT_GENERAL,
  2793. VK_QUEUE_FAMILY_IGNORED);
  2794. vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
  2795. .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
  2796. .pImageMemoryBarriers = img_bar,
  2797. .imageMemoryBarrierCount = nb_img_bar,
  2798. });
  2799. err = ff_vk_exec_submit(&p->vkctx, exec);
  2800. if (err < 0)
  2801. return err;
  2802. } else
  2803. #endif
  2804. {
  2805. AVVkFrame *f = (AVVkFrame *)dst->data[0];
  2806. av_log(hwctx, AV_LOG_WARNING, "No support for synchronization when importing DMA-BUFs, "
  2807. "image may be corrupted.\n");
  2808. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_IMPORT);
  2809. if (err)
  2810. return err;
  2811. }
  2812. return 0;
  2813. }
  2814. static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  2815. const AVFrame *src, int flags)
  2816. {
  2817. int err = 0;
  2818. AVVkFrame *f;
  2819. if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src, flags)))
  2820. return err;
  2821. /* The unmapping function will free this */
  2822. dst->data[0] = (uint8_t *)f;
  2823. dst->width = src->width;
  2824. dst->height = src->height;
  2825. err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
  2826. &vulkan_unmap_from_drm, f);
  2827. if (err < 0)
  2828. goto fail;
  2829. err = vulkan_map_from_drm_frame_sync(hwfc, dst, src, flags);
  2830. if (err < 0)
  2831. return err;
  2832. av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
  2833. return 0;
  2834. fail:
  2835. vulkan_frame_free(hwfc->device_ctx->hwctx, f);
  2836. dst->data[0] = NULL;
  2837. return err;
  2838. }
  2839. #if CONFIG_VAAPI
  2840. static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
  2841. AVFrame *dst, const AVFrame *src,
  2842. int flags)
  2843. {
  2844. int err;
  2845. AVFrame *tmp = av_frame_alloc();
  2846. AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  2847. AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
  2848. VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
  2849. if (!tmp)
  2850. return AVERROR(ENOMEM);
  2851. /* We have to sync since like the previous comment said, no semaphores */
  2852. vaSyncSurface(vaapi_ctx->display, surface_id);
  2853. tmp->format = AV_PIX_FMT_DRM_PRIME;
  2854. err = av_hwframe_map(tmp, src, flags);
  2855. if (err < 0)
  2856. goto fail;
  2857. err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
  2858. if (err < 0)
  2859. goto fail;
  2860. err = ff_hwframe_map_replace(dst, src);
  2861. fail:
  2862. av_frame_free(&tmp);
  2863. return err;
  2864. }
  2865. #endif
  2866. #endif
  2867. #if CONFIG_CUDA
  2868. static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
  2869. AVBufferRef *cuda_hwfc,
  2870. const AVFrame *frame)
  2871. {
  2872. int err;
  2873. VkResult ret;
  2874. AVVkFrame *dst_f;
  2875. AVVkFrameInternal *dst_int;
  2876. AVHWDeviceContext *ctx = hwfc->device_ctx;
  2877. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2878. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  2879. VulkanDevicePriv *p = ctx->hwctx;
  2880. AVVulkanDeviceContext *hwctx = &p->p;
  2881. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2882. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
  2883. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  2884. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  2885. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  2886. CudaFunctions *cu = cu_internal->cuda_dl;
  2887. CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
  2888. CU_AD_FORMAT_UNSIGNED_INT8;
  2889. dst_f = (AVVkFrame *)frame->data[0];
  2890. dst_int = dst_f->internal;
  2891. if (!dst_int->cuda_fc_ref) {
  2892. dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
  2893. if (!dst_int->cuda_fc_ref)
  2894. return AVERROR(ENOMEM);
  2895. for (int i = 0; i < planes; i++) {
  2896. CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
  2897. .offset = 0,
  2898. .arrayDesc = {
  2899. .Depth = 0,
  2900. .Format = cufmt,
  2901. .NumChannels = 1 + ((planes == 2) && i),
  2902. .Flags = 0,
  2903. },
  2904. .numLevels = 1,
  2905. };
  2906. int p_w, p_h;
  2907. #ifdef _WIN32
  2908. CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
  2909. .type = IsWindows8OrGreater()
  2910. ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
  2911. : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT,
  2912. .size = dst_f->size[i],
  2913. };
  2914. VkMemoryGetWin32HandleInfoKHR export_info = {
  2915. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
  2916. .memory = dst_f->mem[i],
  2917. .handleType = IsWindows8OrGreater()
  2918. ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
  2919. : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
  2920. };
  2921. VkSemaphoreGetWin32HandleInfoKHR sem_export = {
  2922. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
  2923. .semaphore = dst_f->sem[i],
  2924. .handleType = IsWindows8OrGreater()
  2925. ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
  2926. : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
  2927. };
  2928. CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
  2929. .type = 10 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 */,
  2930. };
  2931. ret = vk->GetMemoryWin32HandleKHR(hwctx->act_dev, &export_info,
  2932. &ext_desc.handle.win32.handle);
  2933. if (ret != VK_SUCCESS) {
  2934. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a Win32 Handle: %s!\n",
  2935. ff_vk_ret2str(ret));
  2936. err = AVERROR_EXTERNAL;
  2937. goto fail;
  2938. }
  2939. dst_int->ext_mem_handle[i] = ext_desc.handle.win32.handle;
  2940. #else
  2941. CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
  2942. .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
  2943. .size = dst_f->size[i],
  2944. };
  2945. VkMemoryGetFdInfoKHR export_info = {
  2946. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  2947. .memory = dst_f->mem[i],
  2948. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
  2949. };
  2950. VkSemaphoreGetFdInfoKHR sem_export = {
  2951. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
  2952. .semaphore = dst_f->sem[i],
  2953. .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  2954. };
  2955. CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
  2956. .type = 9 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD */,
  2957. };
  2958. ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
  2959. &ext_desc.handle.fd);
  2960. if (ret != VK_SUCCESS) {
  2961. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD: %s!\n",
  2962. ff_vk_ret2str(ret));
  2963. err = AVERROR_EXTERNAL;
  2964. goto fail;
  2965. }
  2966. #endif
  2967. ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
  2968. if (ret < 0) {
  2969. #ifndef _WIN32
  2970. close(ext_desc.handle.fd);
  2971. #endif
  2972. err = AVERROR_EXTERNAL;
  2973. goto fail;
  2974. }
  2975. get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  2976. tex_desc.arrayDesc.Width = p_w;
  2977. tex_desc.arrayDesc.Height = p_h;
  2978. ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
  2979. dst_int->ext_mem[i],
  2980. &tex_desc));
  2981. if (ret < 0) {
  2982. err = AVERROR_EXTERNAL;
  2983. goto fail;
  2984. }
  2985. ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
  2986. dst_int->cu_mma[i], 0));
  2987. if (ret < 0) {
  2988. err = AVERROR_EXTERNAL;
  2989. goto fail;
  2990. }
  2991. #ifdef _WIN32
  2992. ret = vk->GetSemaphoreWin32HandleKHR(hwctx->act_dev, &sem_export,
  2993. &ext_sem_desc.handle.win32.handle);
  2994. #else
  2995. ret = vk->GetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
  2996. &ext_sem_desc.handle.fd);
  2997. #endif
  2998. if (ret != VK_SUCCESS) {
  2999. av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
  3000. ff_vk_ret2str(ret));
  3001. err = AVERROR_EXTERNAL;
  3002. goto fail;
  3003. }
  3004. #ifdef _WIN32
  3005. dst_int->ext_sem_handle[i] = ext_sem_desc.handle.win32.handle;
  3006. #endif
  3007. ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
  3008. &ext_sem_desc));
  3009. if (ret < 0) {
  3010. #ifndef _WIN32
  3011. close(ext_sem_desc.handle.fd);
  3012. #endif
  3013. err = AVERROR_EXTERNAL;
  3014. goto fail;
  3015. }
  3016. }
  3017. }
  3018. return 0;
  3019. fail:
  3020. vulkan_free_internal(dst_f);
  3021. return err;
  3022. }
  3023. static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
  3024. AVFrame *dst, const AVFrame *src)
  3025. {
  3026. int err;
  3027. CUcontext dummy;
  3028. AVVkFrame *dst_f;
  3029. AVVkFrameInternal *dst_int;
  3030. VulkanFramesPriv *fp = hwfc->hwctx;
  3031. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  3032. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  3033. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  3034. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  3035. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  3036. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  3037. CudaFunctions *cu = cu_internal->cuda_dl;
  3038. CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
  3039. CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
  3040. dst_f = (AVVkFrame *)dst->data[0];
  3041. err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT);
  3042. if (err < 0)
  3043. return err;
  3044. err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  3045. if (err < 0)
  3046. return err;
  3047. err = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
  3048. if (err < 0) {
  3049. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3050. return err;
  3051. }
  3052. dst_int = dst_f->internal;
  3053. for (int i = 0; i < planes; i++) {
  3054. s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0;
  3055. s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
  3056. }
  3057. err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
  3058. planes, cuda_dev->stream));
  3059. if (err < 0)
  3060. goto fail;
  3061. for (int i = 0; i < planes; i++) {
  3062. CUDA_MEMCPY2D cpy = {
  3063. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  3064. .srcDevice = (CUdeviceptr)src->data[i],
  3065. .srcPitch = src->linesize[i],
  3066. .srcY = 0,
  3067. .dstMemoryType = CU_MEMORYTYPE_ARRAY,
  3068. .dstArray = dst_int->cu_array[i],
  3069. };
  3070. int p_w, p_h;
  3071. get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  3072. cpy.WidthInBytes = p_w * desc->comp[i].step;
  3073. cpy.Height = p_h;
  3074. err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  3075. if (err < 0)
  3076. goto fail;
  3077. }
  3078. err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
  3079. planes, cuda_dev->stream));
  3080. if (err < 0)
  3081. goto fail;
  3082. for (int i = 0; i < planes; i++)
  3083. dst_f->sem_value[i]++;
  3084. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3085. av_log(hwfc, AV_LOG_VERBOSE, "Transferred CUDA image to Vulkan!\n");
  3086. return err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT);
  3087. fail:
  3088. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3089. vulkan_free_internal(dst_f);
  3090. av_buffer_unref(&dst->buf[0]);
  3091. return err;
  3092. }
  3093. #endif
  3094. static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
  3095. const AVFrame *src, int flags)
  3096. {
  3097. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3098. switch (src->format) {
  3099. #if CONFIG_LIBDRM
  3100. #if CONFIG_VAAPI
  3101. case AV_PIX_FMT_VAAPI:
  3102. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3103. return vulkan_map_from_vaapi(hwfc, dst, src, flags);
  3104. else
  3105. return AVERROR(ENOSYS);
  3106. #endif
  3107. case AV_PIX_FMT_DRM_PRIME:
  3108. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3109. return vulkan_map_from_drm(hwfc, dst, src, flags);
  3110. else
  3111. return AVERROR(ENOSYS);
  3112. #endif
  3113. default:
  3114. return AVERROR(ENOSYS);
  3115. }
  3116. }
  3117. #if CONFIG_LIBDRM
  3118. typedef struct VulkanDRMMapping {
  3119. AVDRMFrameDescriptor drm_desc;
  3120. AVVkFrame *source;
  3121. } VulkanDRMMapping;
  3122. static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  3123. {
  3124. AVDRMFrameDescriptor *drm_desc = hwmap->priv;
  3125. for (int i = 0; i < drm_desc->nb_objects; i++)
  3126. close(drm_desc->objects[i].fd);
  3127. av_free(drm_desc);
  3128. }
  3129. static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
  3130. {
  3131. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  3132. if (vulkan_drm_format_map[i].vk_format == vkfmt)
  3133. return vulkan_drm_format_map[i].drm_fourcc;
  3134. return DRM_FORMAT_INVALID;
  3135. }
  3136. static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  3137. const AVFrame *src, int flags)
  3138. {
  3139. int err = 0;
  3140. VkResult ret;
  3141. AVVkFrame *f = (AVVkFrame *)src->data[0];
  3142. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3143. AVVulkanDeviceContext *hwctx = &p->p;
  3144. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3145. VulkanFramesPriv *fp = hwfc->hwctx;
  3146. AVVulkanFramesContext *hwfctx = &fp->p;
  3147. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  3148. VkImageDrmFormatModifierPropertiesEXT drm_mod = {
  3149. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
  3150. };
  3151. VkSemaphoreWaitInfo wait_info = {
  3152. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
  3153. .flags = 0x0,
  3154. .semaphoreCount = planes,
  3155. };
  3156. AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
  3157. if (!drm_desc)
  3158. return AVERROR(ENOMEM);
  3159. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_EXPORT);
  3160. if (err < 0)
  3161. goto end;
  3162. /* Wait for the operation to finish so we can cleanly export it. */
  3163. wait_info.pSemaphores = f->sem;
  3164. wait_info.pValues = f->sem_value;
  3165. vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX);
  3166. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
  3167. if (err < 0)
  3168. goto end;
  3169. ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
  3170. &drm_mod);
  3171. if (ret != VK_SUCCESS) {
  3172. av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
  3173. err = AVERROR_EXTERNAL;
  3174. goto end;
  3175. }
  3176. for (int i = 0; (i < planes) && (f->mem[i]); i++) {
  3177. VkMemoryGetFdInfoKHR export_info = {
  3178. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  3179. .memory = f->mem[i],
  3180. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  3181. };
  3182. ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
  3183. &drm_desc->objects[i].fd);
  3184. if (ret != VK_SUCCESS) {
  3185. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  3186. err = AVERROR_EXTERNAL;
  3187. goto end;
  3188. }
  3189. drm_desc->nb_objects++;
  3190. drm_desc->objects[i].size = f->size[i];
  3191. drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
  3192. }
  3193. drm_desc->nb_layers = planes;
  3194. for (int i = 0; i < drm_desc->nb_layers; i++) {
  3195. VkSubresourceLayout layout;
  3196. VkImageSubresource sub = {
  3197. .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT,
  3198. };
  3199. VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
  3200. drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt);
  3201. drm_desc->layers[i].nb_planes = 1;
  3202. if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
  3203. av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
  3204. err = AVERROR_PATCHWELCOME;
  3205. goto end;
  3206. }
  3207. drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
  3208. if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
  3209. continue;
  3210. vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  3211. drm_desc->layers[i].planes[0].offset = layout.offset;
  3212. drm_desc->layers[i].planes[0].pitch = layout.rowPitch;
  3213. if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY)
  3214. drm_desc->layers[i].planes[0].offset += f->offset[i];
  3215. }
  3216. dst->width = src->width;
  3217. dst->height = src->height;
  3218. dst->data[0] = (uint8_t *)drm_desc;
  3219. av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
  3220. return 0;
  3221. end:
  3222. av_free(drm_desc);
  3223. return err;
  3224. }
  3225. #if CONFIG_VAAPI
  3226. static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
  3227. const AVFrame *src, int flags)
  3228. {
  3229. int err;
  3230. AVFrame *tmp = av_frame_alloc();
  3231. if (!tmp)
  3232. return AVERROR(ENOMEM);
  3233. tmp->format = AV_PIX_FMT_DRM_PRIME;
  3234. err = vulkan_map_to_drm(hwfc, tmp, src, flags);
  3235. if (err < 0)
  3236. goto fail;
  3237. err = av_hwframe_map(dst, tmp, flags);
  3238. if (err < 0)
  3239. goto fail;
  3240. err = ff_hwframe_map_replace(dst, src);
  3241. fail:
  3242. av_frame_free(&tmp);
  3243. return err;
  3244. }
  3245. #endif
  3246. #endif
  3247. static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
  3248. const AVFrame *src, int flags)
  3249. {
  3250. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3251. switch (dst->format) {
  3252. #if CONFIG_LIBDRM
  3253. case AV_PIX_FMT_DRM_PRIME:
  3254. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3255. return vulkan_map_to_drm(hwfc, dst, src, flags);
  3256. else
  3257. return AVERROR(ENOSYS);
  3258. #if CONFIG_VAAPI
  3259. case AV_PIX_FMT_VAAPI:
  3260. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3261. return vulkan_map_to_vaapi(hwfc, dst, src, flags);
  3262. else
  3263. return AVERROR(ENOSYS);
  3264. #endif
  3265. #endif
  3266. default:
  3267. break;
  3268. }
  3269. return AVERROR(ENOSYS);
  3270. }
  3271. static int copy_buffer_data(AVHWFramesContext *hwfc, AVBufferRef *buf,
  3272. AVFrame *swf, VkBufferImageCopy *region,
  3273. int planes, int upload)
  3274. {
  3275. VkResult ret;
  3276. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3277. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3278. AVVulkanDeviceContext *hwctx = &p->p;
  3279. FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data;
  3280. const VkMappedMemoryRange flush_info = {
  3281. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  3282. .memory = vkbuf->mem,
  3283. .size = VK_WHOLE_SIZE,
  3284. };
  3285. if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && !upload) {
  3286. ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, 1,
  3287. &flush_info);
  3288. if (ret != VK_SUCCESS) {
  3289. av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: %s\n",
  3290. ff_vk_ret2str(ret));
  3291. return AVERROR_EXTERNAL;
  3292. }
  3293. }
  3294. for (int i = 0; i < planes; i++)
  3295. av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset,
  3296. region[i].bufferRowLength,
  3297. swf->data[i],
  3298. swf->linesize[i],
  3299. swf->linesize[i],
  3300. region[i].imageExtent.height);
  3301. if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && upload) {
  3302. ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, 1,
  3303. &flush_info);
  3304. if (ret != VK_SUCCESS) {
  3305. av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n",
  3306. ff_vk_ret2str(ret));
  3307. return AVERROR_EXTERNAL;
  3308. }
  3309. }
  3310. return 0;
  3311. }
  3312. static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst,
  3313. AVFrame *swf, VkBufferImageCopy *region, int upload)
  3314. {
  3315. int err;
  3316. VulkanFramesPriv *fp = hwfc->hwctx;
  3317. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3318. const int planes = av_pix_fmt_count_planes(swf->format);
  3319. size_t buf_offset = 0;
  3320. for (int i = 0; i < planes; i++) {
  3321. size_t size;
  3322. ptrdiff_t linesize = swf->linesize[i];
  3323. uint32_t p_w, p_h;
  3324. get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
  3325. linesize = FFALIGN(linesize,
  3326. p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
  3327. size = p_h*linesize;
  3328. region[i] = (VkBufferImageCopy) {
  3329. .bufferOffset = buf_offset,
  3330. .bufferRowLength = linesize,
  3331. .bufferImageHeight = p_h,
  3332. .imageSubresource.layerCount = 1,
  3333. .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
  3334. /* Rest of the fields adjusted/filled in later */
  3335. };
  3336. buf_offset = FFALIGN(buf_offset + size,
  3337. p->props.properties.limits.optimalBufferCopyOffsetAlignment);
  3338. }
  3339. err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst,
  3340. VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
  3341. VK_BUFFER_USAGE_TRANSFER_DST_BIT,
  3342. NULL, buf_offset,
  3343. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
  3344. VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
  3345. if (err < 0)
  3346. return err;
  3347. return 0;
  3348. }
  3349. static int create_mapped_buffer(AVHWFramesContext *hwfc,
  3350. FFVkBuffer *vkb, VkBufferUsageFlags usage,
  3351. size_t size,
  3352. VkExternalMemoryBufferCreateInfo *create_desc,
  3353. VkImportMemoryHostPointerInfoEXT *import_desc,
  3354. VkMemoryHostPointerPropertiesEXT props)
  3355. {
  3356. int err;
  3357. VkResult ret;
  3358. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3359. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3360. AVVulkanDeviceContext *hwctx = &p->p;
  3361. VkBufferCreateInfo buf_spawn = {
  3362. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  3363. .pNext = create_desc,
  3364. .usage = usage,
  3365. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  3366. .size = size,
  3367. };
  3368. VkMemoryRequirements req = {
  3369. .size = size,
  3370. .alignment = p->hprops.minImportedHostPointerAlignment,
  3371. .memoryTypeBits = props.memoryTypeBits,
  3372. };
  3373. err = ff_vk_alloc_mem(&p->vkctx, &req,
  3374. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
  3375. import_desc, &vkb->flags, &vkb->mem);
  3376. if (err < 0)
  3377. return err;
  3378. ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, hwctx->alloc, &vkb->buf);
  3379. if (ret != VK_SUCCESS) {
  3380. vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc);
  3381. return AVERROR_EXTERNAL;
  3382. }
  3383. ret = vk->BindBufferMemory(hwctx->act_dev, vkb->buf, vkb->mem, 0);
  3384. if (ret != VK_SUCCESS) {
  3385. vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc);
  3386. vk->DestroyBuffer(hwctx->act_dev, vkb->buf, hwctx->alloc);
  3387. return AVERROR_EXTERNAL;
  3388. }
  3389. return 0;
  3390. }
  3391. static void destroy_avvkbuf(void *opaque, uint8_t *data)
  3392. {
  3393. FFVulkanContext *s = opaque;
  3394. FFVkBuffer *buf = (FFVkBuffer *)data;
  3395. ff_vk_free_buf(s, buf);
  3396. av_free(buf);
  3397. }
  3398. static int host_map_frame(AVHWFramesContext *hwfc, AVBufferRef **dst, int *nb_bufs,
  3399. AVFrame *swf, VkBufferImageCopy *region, int upload)
  3400. {
  3401. int err;
  3402. VkResult ret;
  3403. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3404. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3405. AVVulkanDeviceContext *hwctx = &p->p;
  3406. const int planes = av_pix_fmt_count_planes(swf->format);
  3407. VkExternalMemoryBufferCreateInfo create_desc = {
  3408. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
  3409. .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
  3410. };
  3411. VkImportMemoryHostPointerInfoEXT import_desc = {
  3412. .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
  3413. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
  3414. };
  3415. VkMemoryHostPointerPropertiesEXT props;
  3416. for (int i = 0; i < planes; i++) {
  3417. FFVkBuffer *vkb;
  3418. uint32_t p_w, p_h;
  3419. size_t offs;
  3420. size_t buffer_size;
  3421. /* We can't host map images with negative strides */
  3422. if (swf->linesize[i] < 0) {
  3423. err = AVERROR(EINVAL);
  3424. goto fail;
  3425. }
  3426. get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
  3427. /* Get the previous point at which mapping was possible and use it */
  3428. offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment;
  3429. import_desc.pHostPointer = swf->data[i] - offs;
  3430. props = (VkMemoryHostPointerPropertiesEXT) {
  3431. VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
  3432. };
  3433. ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
  3434. import_desc.handleType,
  3435. import_desc.pHostPointer,
  3436. &props);
  3437. if (!(ret == VK_SUCCESS && props.memoryTypeBits)) {
  3438. err = AVERROR(EINVAL);
  3439. goto fail;
  3440. }
  3441. /* Buffer region for this plane */
  3442. region[i] = (VkBufferImageCopy) {
  3443. .bufferOffset = offs,
  3444. .bufferRowLength = swf->linesize[i],
  3445. .bufferImageHeight = p_h,
  3446. .imageSubresource.layerCount = 1,
  3447. .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
  3448. /* Rest of the fields adjusted/filled in later */
  3449. };
  3450. /* Add the offset at the start, which gets ignored */
  3451. buffer_size = offs + swf->linesize[i]*p_h;
  3452. buffer_size = FFALIGN(buffer_size, p->props.properties.limits.minMemoryMapAlignment);
  3453. buffer_size = FFALIGN(buffer_size, p->hprops.minImportedHostPointerAlignment);
  3454. /* Create a buffer */
  3455. vkb = av_mallocz(sizeof(*vkb));
  3456. if (!vkb) {
  3457. err = AVERROR(ENOMEM);
  3458. goto fail;
  3459. }
  3460. err = create_mapped_buffer(hwfc, vkb,
  3461. upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT :
  3462. VK_BUFFER_USAGE_TRANSFER_DST_BIT,
  3463. buffer_size, &create_desc, &import_desc,
  3464. props);
  3465. if (err < 0) {
  3466. av_free(vkb);
  3467. goto fail;
  3468. }
  3469. /* Create a ref */
  3470. dst[*nb_bufs] = av_buffer_create((uint8_t *)vkb, sizeof(*vkb),
  3471. destroy_avvkbuf, &p->vkctx, 0);
  3472. if (!dst[*nb_bufs]) {
  3473. destroy_avvkbuf(&p->vkctx, (uint8_t *)vkb);
  3474. err = AVERROR(ENOMEM);
  3475. goto fail;
  3476. }
  3477. (*nb_bufs)++;
  3478. }
  3479. return 0;
  3480. fail:
  3481. for (int i = 0; i < (*nb_bufs); i++)
  3482. av_buffer_unref(&dst[i]);
  3483. return err;
  3484. }
  3485. static int vulkan_transfer_frame(AVHWFramesContext *hwfc,
  3486. AVFrame *swf, AVFrame *hwf,
  3487. int upload)
  3488. {
  3489. int err;
  3490. VulkanFramesPriv *fp = hwfc->hwctx;
  3491. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3492. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3493. int host_mapped = 0;
  3494. AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0];
  3495. VkBufferImageCopy region[AV_NUM_DATA_POINTERS]; // always one per plane
  3496. const int planes = av_pix_fmt_count_planes(swf->format);
  3497. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(swf->format);
  3498. const int nb_images = ff_vk_count_images(hwf_vk);
  3499. static const VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
  3500. VK_IMAGE_ASPECT_PLANE_0_BIT,
  3501. VK_IMAGE_ASPECT_PLANE_1_BIT,
  3502. VK_IMAGE_ASPECT_PLANE_2_BIT, };
  3503. VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
  3504. int nb_img_bar = 0;
  3505. AVBufferRef *bufs[AV_NUM_DATA_POINTERS];
  3506. int nb_bufs = 0;
  3507. VkCommandBuffer cmd_buf;
  3508. FFVkExecContext *exec;
  3509. /* Sanity checking */
  3510. if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
  3511. av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
  3512. return AVERROR(EINVAL);
  3513. }
  3514. if (swf->width > hwfc->width || swf->height > hwfc->height)
  3515. return AVERROR(EINVAL);
  3516. /* Setup buffers first */
  3517. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
  3518. err = host_map_frame(hwfc, bufs, &nb_bufs, swf, region, upload);
  3519. if (err >= 0)
  3520. host_mapped = 1;
  3521. }
  3522. if (!host_mapped) {
  3523. err = get_plane_buf(hwfc, &bufs[0], swf, region, upload);
  3524. if (err < 0)
  3525. goto end;
  3526. nb_bufs = 1;
  3527. if (upload) {
  3528. err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 1);
  3529. if (err < 0)
  3530. goto end;
  3531. }
  3532. }
  3533. exec = ff_vk_exec_get(&fp->upload_exec);
  3534. cmd_buf = exec->buf;
  3535. ff_vk_exec_start(&p->vkctx, exec);
  3536. /* Prep destination Vulkan frame */
  3537. err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, hwf,
  3538. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  3539. VK_PIPELINE_STAGE_2_TRANSFER_BIT);
  3540. if (err < 0)
  3541. goto end;
  3542. /* No need to declare buf deps for synchronous transfers */
  3543. if (upload) {
  3544. err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, nb_bufs, 1);
  3545. if (err < 0) {
  3546. ff_vk_exec_discard_deps(&p->vkctx, exec);
  3547. goto end;
  3548. }
  3549. }
  3550. ff_vk_frame_barrier(&p->vkctx, exec, hwf, img_bar, &nb_img_bar,
  3551. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  3552. VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
  3553. upload ? VK_ACCESS_TRANSFER_WRITE_BIT :
  3554. VK_ACCESS_TRANSFER_READ_BIT,
  3555. upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL :
  3556. VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
  3557. VK_QUEUE_FAMILY_IGNORED);
  3558. vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
  3559. .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
  3560. .pImageMemoryBarriers = img_bar,
  3561. .imageMemoryBarrierCount = nb_img_bar,
  3562. });
  3563. for (int i = 0; i < planes; i++) {
  3564. int buf_idx = FFMIN(i, (nb_bufs - 1));
  3565. int img_idx = FFMIN(i, (nb_images - 1));
  3566. FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[buf_idx]->data;
  3567. uint32_t orig_stride = region[i].bufferRowLength;
  3568. region[i].bufferRowLength /= desc->comp[i].step;
  3569. region[i].imageSubresource.aspectMask = plane_aspect[(planes != nb_images) +
  3570. i*(planes != nb_images)];
  3571. if (upload)
  3572. vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf,
  3573. hwf_vk->img[img_idx],
  3574. img_bar[img_idx].newLayout,
  3575. 1, &region[i]);
  3576. else
  3577. vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->img[img_idx],
  3578. img_bar[img_idx].newLayout,
  3579. vkbuf->buf,
  3580. 1, &region[i]);
  3581. region[i].bufferRowLength = orig_stride;
  3582. }
  3583. err = ff_vk_exec_submit(&p->vkctx, exec);
  3584. if (err < 0) {
  3585. ff_vk_exec_discard_deps(&p->vkctx, exec);
  3586. } else if (!upload) {
  3587. ff_vk_exec_wait(&p->vkctx, exec);
  3588. if (!host_mapped)
  3589. err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 0);
  3590. }
  3591. end:
  3592. for (int i = 0; i < nb_bufs; i++)
  3593. av_buffer_unref(&bufs[i]);
  3594. return err;
  3595. }
  3596. static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
  3597. const AVFrame *src)
  3598. {
  3599. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3600. switch (src->format) {
  3601. #if CONFIG_CUDA
  3602. case AV_PIX_FMT_CUDA:
  3603. #ifdef _WIN32
  3604. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
  3605. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
  3606. #else
  3607. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
  3608. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
  3609. #endif
  3610. return vulkan_transfer_data_from_cuda(hwfc, dst, src);
  3611. #endif
  3612. default:
  3613. if (src->hw_frames_ctx)
  3614. return AVERROR(ENOSYS);
  3615. else
  3616. return vulkan_transfer_frame(hwfc, (AVFrame *)src, dst, 1);
  3617. }
  3618. }
  3619. #if CONFIG_CUDA
  3620. static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
  3621. const AVFrame *src)
  3622. {
  3623. int err;
  3624. CUcontext dummy;
  3625. AVVkFrame *dst_f;
  3626. AVVkFrameInternal *dst_int;
  3627. VulkanFramesPriv *fp = hwfc->hwctx;
  3628. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  3629. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  3630. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
  3631. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  3632. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  3633. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  3634. CudaFunctions *cu = cu_internal->cuda_dl;
  3635. CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
  3636. CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
  3637. dst_f = (AVVkFrame *)src->data[0];
  3638. err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT);
  3639. if (err < 0)
  3640. return err;
  3641. err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  3642. if (err < 0)
  3643. return err;
  3644. err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
  3645. if (err < 0) {
  3646. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3647. return err;
  3648. }
  3649. dst_int = dst_f->internal;
  3650. for (int i = 0; i < planes; i++) {
  3651. s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0;
  3652. s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
  3653. }
  3654. err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
  3655. planes, cuda_dev->stream));
  3656. if (err < 0)
  3657. goto fail;
  3658. for (int i = 0; i < planes; i++) {
  3659. CUDA_MEMCPY2D cpy = {
  3660. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  3661. .dstDevice = (CUdeviceptr)dst->data[i],
  3662. .dstPitch = dst->linesize[i],
  3663. .dstY = 0,
  3664. .srcMemoryType = CU_MEMORYTYPE_ARRAY,
  3665. .srcArray = dst_int->cu_array[i],
  3666. };
  3667. int w, h;
  3668. get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  3669. cpy.WidthInBytes = w * desc->comp[i].step;
  3670. cpy.Height = h;
  3671. err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  3672. if (err < 0)
  3673. goto fail;
  3674. }
  3675. err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
  3676. planes, cuda_dev->stream));
  3677. if (err < 0)
  3678. goto fail;
  3679. for (int i = 0; i < planes; i++)
  3680. dst_f->sem_value[i]++;
  3681. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3682. av_log(hwfc, AV_LOG_VERBOSE, "Transferred Vulkan image to CUDA!\n");
  3683. return prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT);
  3684. fail:
  3685. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3686. vulkan_free_internal(dst_f);
  3687. av_buffer_unref(&dst->buf[0]);
  3688. return err;
  3689. }
  3690. #endif
  3691. static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
  3692. const AVFrame *src)
  3693. {
  3694. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3695. switch (dst->format) {
  3696. #if CONFIG_CUDA
  3697. case AV_PIX_FMT_CUDA:
  3698. #ifdef _WIN32
  3699. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
  3700. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
  3701. #else
  3702. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
  3703. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
  3704. #endif
  3705. return vulkan_transfer_data_to_cuda(hwfc, dst, src);
  3706. #endif
  3707. default:
  3708. if (dst->hw_frames_ctx)
  3709. return AVERROR(ENOSYS);
  3710. else
  3711. return vulkan_transfer_frame(hwfc, dst, (AVFrame *)src, 0);
  3712. }
  3713. }
  3714. static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
  3715. AVHWFramesContext *src_fc, int flags)
  3716. {
  3717. return vulkan_frames_init(dst_fc);
  3718. }
  3719. AVVkFrame *av_vk_frame_alloc(void)
  3720. {
  3721. int err;
  3722. AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
  3723. if (!f)
  3724. return NULL;
  3725. f->internal = av_mallocz(sizeof(*f->internal));
  3726. if (!f->internal) {
  3727. av_free(f);
  3728. return NULL;
  3729. }
  3730. err = pthread_mutex_init(&f->internal->update_mutex, NULL);
  3731. if (err != 0) {
  3732. av_free(f->internal);
  3733. av_free(f);
  3734. return NULL;
  3735. }
  3736. return f;
  3737. }
  3738. const HWContextType ff_hwcontext_type_vulkan = {
  3739. .type = AV_HWDEVICE_TYPE_VULKAN,
  3740. .name = "Vulkan",
  3741. .device_hwctx_size = sizeof(VulkanDevicePriv),
  3742. .frames_hwctx_size = sizeof(VulkanFramesPriv),
  3743. .device_init = &vulkan_device_init,
  3744. .device_uninit = &vulkan_device_uninit,
  3745. .device_create = &vulkan_device_create,
  3746. .device_derive = &vulkan_device_derive,
  3747. .frames_get_constraints = &vulkan_frames_get_constraints,
  3748. .frames_init = vulkan_frames_init,
  3749. .frames_get_buffer = vulkan_get_buffer,
  3750. .frames_uninit = vulkan_frames_uninit,
  3751. .transfer_get_formats = vulkan_transfer_get_formats,
  3752. .transfer_data_to = vulkan_transfer_data_to,
  3753. .transfer_data_from = vulkan_transfer_data_from,
  3754. .map_to = vulkan_map_to,
  3755. .map_from = vulkan_map_from,
  3756. .frames_derive_to = &vulkan_frames_derive_to,
  3757. .pix_fmts = (const enum AVPixelFormat []) {
  3758. AV_PIX_FMT_VULKAN,
  3759. AV_PIX_FMT_NONE
  3760. },
  3761. };