hwcontext_vulkan.c 173 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480
  1. /*
  2. * Copyright (c) Lynne
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #define VK_NO_PROTOTYPES
  21. #define VK_ENABLE_BETA_EXTENSIONS
  22. #ifdef _WIN32
  23. #include <windows.h> /* Included to prevent conflicts with CreateSemaphore */
  24. #include <versionhelpers.h>
  25. #include "compat/w32dlfcn.h"
  26. #else
  27. #include <dlfcn.h>
  28. #include <unistd.h>
  29. #endif
  30. #include "thread.h"
  31. #include "config.h"
  32. #include "pixdesc.h"
  33. #include "avstring.h"
  34. #include "imgutils.h"
  35. #include "hwcontext.h"
  36. #include "hwcontext_internal.h"
  37. #include "hwcontext_vulkan.h"
  38. #include "mem.h"
  39. #include "vulkan.h"
  40. #include "vulkan_loader.h"
  41. #if CONFIG_VAAPI
  42. #include "hwcontext_vaapi.h"
  43. #endif
  44. #if CONFIG_LIBDRM
  45. #if CONFIG_VAAPI
  46. #include <va/va_drmcommon.h>
  47. #endif
  48. #ifdef __linux__
  49. #include <sys/sysmacros.h>
  50. #endif
  51. #include <sys/stat.h>
  52. #include <xf86drm.h>
  53. #include <drm_fourcc.h>
  54. #include "hwcontext_drm.h"
  55. #endif
  56. #if HAVE_LINUX_DMA_BUF_H
  57. #include <sys/ioctl.h>
  58. #include <linux/dma-buf.h>
  59. #endif
  60. #if CONFIG_CUDA
  61. #include "hwcontext_cuda_internal.h"
  62. #include "cuda_check.h"
  63. #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
  64. #endif
  65. typedef struct VulkanDeviceFeatures {
  66. VkPhysicalDeviceFeatures2 device;
  67. VkPhysicalDeviceVulkan11Features vulkan_1_1;
  68. VkPhysicalDeviceVulkan12Features vulkan_1_2;
  69. VkPhysicalDeviceVulkan13Features vulkan_1_3;
  70. VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore;
  71. VkPhysicalDeviceVideoMaintenance1FeaturesKHR video_maintenance_1;
  72. VkPhysicalDeviceShaderObjectFeaturesEXT shader_object;
  73. VkPhysicalDeviceCooperativeMatrixFeaturesKHR cooperative_matrix;
  74. VkPhysicalDeviceDescriptorBufferFeaturesEXT descriptor_buffer;
  75. VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float;
  76. VkPhysicalDeviceOpticalFlowFeaturesNV optical_flow;
  77. #ifdef VK_KHR_shader_relaxed_extended_instruction
  78. VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR relaxed_extended_instruction;
  79. #endif
  80. } VulkanDeviceFeatures;
  81. typedef struct VulkanDevicePriv {
  82. /**
  83. * The public AVVulkanDeviceContext. See hwcontext_vulkan.h for it.
  84. */
  85. AVVulkanDeviceContext p;
  86. /* Vulkan library and loader functions */
  87. void *libvulkan;
  88. FFVulkanContext vkctx;
  89. AVVulkanDeviceQueueFamily *compute_qf;
  90. AVVulkanDeviceQueueFamily *transfer_qf;
  91. /* Properties */
  92. VkPhysicalDeviceProperties2 props;
  93. VkPhysicalDeviceMemoryProperties mprops;
  94. VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
  95. /* Opaque FD external semaphore properties */
  96. VkExternalSemaphoreProperties ext_sem_props_opaque;
  97. /* Enabled features */
  98. VulkanDeviceFeatures feats;
  99. /* Queues */
  100. pthread_mutex_t **qf_mutex;
  101. uint32_t nb_tot_qfs;
  102. uint32_t img_qfs[5];
  103. uint32_t nb_img_qfs;
  104. /* Debug callback */
  105. VkDebugUtilsMessengerEXT debug_ctx;
  106. /* Settings */
  107. int use_linear_images;
  108. /* Option to allocate all image planes in a single allocation */
  109. int contiguous_planes;
  110. /* Disable multiplane images */
  111. int disable_multiplane;
  112. /* Nvidia */
  113. int dev_is_nvidia;
  114. } VulkanDevicePriv;
  115. typedef struct VulkanFramesPriv {
  116. /**
  117. * The public AVVulkanFramesContext. See hwcontext_vulkan.h for it.
  118. */
  119. AVVulkanFramesContext p;
  120. /* Image conversions */
  121. FFVkExecPool compute_exec;
  122. /* Image transfers */
  123. FFVkExecPool upload_exec;
  124. FFVkExecPool download_exec;
  125. /* Temporary buffer pools */
  126. AVBufferPool *tmp;
  127. /* Modifier info list to free at uninit */
  128. VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
  129. } VulkanFramesPriv;
  130. typedef struct AVVkFrameInternal {
  131. pthread_mutex_t update_mutex;
  132. #if CONFIG_CUDA
  133. /* Importing external memory into cuda is really expensive so we keep the
  134. * memory imported all the time */
  135. AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
  136. CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
  137. CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
  138. CUarray cu_array[AV_NUM_DATA_POINTERS];
  139. CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
  140. #ifdef _WIN32
  141. HANDLE ext_mem_handle[AV_NUM_DATA_POINTERS];
  142. HANDLE ext_sem_handle[AV_NUM_DATA_POINTERS];
  143. #endif
  144. #endif
  145. } AVVkFrameInternal;
  146. /* Initialize all structs in VulkanDeviceFeatures */
  147. static void device_features_init(AVHWDeviceContext *ctx, VulkanDeviceFeatures *feats)
  148. {
  149. VulkanDevicePriv *p = ctx->hwctx;
  150. #define OPT_CHAIN(STRUCT_P, EXT_FLAG, TYPE) \
  151. do { \
  152. if ((EXT_FLAG == FF_VK_EXT_NO_FLAG) || \
  153. (p->vkctx.extensions & EXT_FLAG)) { \
  154. (STRUCT_P)->sType = TYPE; \
  155. ff_vk_link_struct(&feats->device, STRUCT_P); \
  156. } \
  157. } while (0)
  158. feats->device = (VkPhysicalDeviceFeatures2) {
  159. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
  160. };
  161. OPT_CHAIN(&feats->vulkan_1_1, FF_VK_EXT_NO_FLAG,
  162. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
  163. OPT_CHAIN(&feats->vulkan_1_2, FF_VK_EXT_NO_FLAG,
  164. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
  165. OPT_CHAIN(&feats->vulkan_1_3, FF_VK_EXT_NO_FLAG,
  166. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES);
  167. OPT_CHAIN(&feats->timeline_semaphore, FF_VK_EXT_PORTABILITY_SUBSET,
  168. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES);
  169. OPT_CHAIN(&feats->video_maintenance_1, FF_VK_EXT_VIDEO_MAINTENANCE_1,
  170. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR);
  171. OPT_CHAIN(&feats->shader_object, FF_VK_EXT_SHADER_OBJECT,
  172. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT);
  173. OPT_CHAIN(&feats->cooperative_matrix, FF_VK_EXT_COOP_MATRIX,
  174. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR);
  175. OPT_CHAIN(&feats->descriptor_buffer, FF_VK_EXT_DESCRIPTOR_BUFFER,
  176. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT);
  177. OPT_CHAIN(&feats->atomic_float, FF_VK_EXT_ATOMIC_FLOAT,
  178. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT);
  179. #ifdef VK_KHR_shader_relaxed_extended_instruction
  180. OPT_CHAIN(&feats->relaxed_extended_instruction, FF_VK_EXT_RELAXED_EXTENDED_INSTR,
  181. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR);
  182. #endif
  183. OPT_CHAIN(&feats->optical_flow, FF_VK_EXT_OPTICAL_FLOW,
  184. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV);
  185. #undef OPT_CHAIN
  186. }
  187. /* Copy all needed device features */
  188. static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceFeatures *src)
  189. {
  190. #define COPY_VAL(VAL) \
  191. do { \
  192. dst->VAL = src->VAL; \
  193. } while (0) \
  194. COPY_VAL(device.features.shaderImageGatherExtended);
  195. COPY_VAL(device.features.shaderStorageImageReadWithoutFormat);
  196. COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat);
  197. COPY_VAL(device.features.fragmentStoresAndAtomics);
  198. COPY_VAL(device.features.vertexPipelineStoresAndAtomics);
  199. COPY_VAL(device.features.shaderInt64);
  200. COPY_VAL(device.features.shaderInt16);
  201. COPY_VAL(device.features.shaderFloat64);
  202. COPY_VAL(device.features.shaderStorageImageReadWithoutFormat);
  203. COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat);
  204. COPY_VAL(vulkan_1_1.samplerYcbcrConversion);
  205. COPY_VAL(vulkan_1_1.storagePushConstant16);
  206. COPY_VAL(vulkan_1_1.storageBuffer16BitAccess);
  207. COPY_VAL(vulkan_1_1.uniformAndStorageBuffer16BitAccess);
  208. COPY_VAL(vulkan_1_2.timelineSemaphore);
  209. COPY_VAL(vulkan_1_2.scalarBlockLayout);
  210. COPY_VAL(vulkan_1_2.bufferDeviceAddress);
  211. COPY_VAL(vulkan_1_2.hostQueryReset);
  212. COPY_VAL(vulkan_1_2.storagePushConstant8);
  213. COPY_VAL(vulkan_1_2.shaderInt8);
  214. COPY_VAL(vulkan_1_2.storageBuffer8BitAccess);
  215. COPY_VAL(vulkan_1_2.uniformAndStorageBuffer8BitAccess);
  216. COPY_VAL(vulkan_1_2.shaderFloat16);
  217. COPY_VAL(vulkan_1_2.shaderBufferInt64Atomics);
  218. COPY_VAL(vulkan_1_2.shaderSharedInt64Atomics);
  219. COPY_VAL(vulkan_1_2.vulkanMemoryModel);
  220. COPY_VAL(vulkan_1_2.vulkanMemoryModelDeviceScope);
  221. COPY_VAL(vulkan_1_3.dynamicRendering);
  222. COPY_VAL(vulkan_1_3.maintenance4);
  223. COPY_VAL(vulkan_1_3.synchronization2);
  224. COPY_VAL(vulkan_1_3.computeFullSubgroups);
  225. COPY_VAL(vulkan_1_3.shaderZeroInitializeWorkgroupMemory);
  226. COPY_VAL(vulkan_1_3.dynamicRendering);
  227. COPY_VAL(timeline_semaphore.timelineSemaphore);
  228. COPY_VAL(video_maintenance_1.videoMaintenance1);
  229. COPY_VAL(shader_object.shaderObject);
  230. COPY_VAL(cooperative_matrix.cooperativeMatrix);
  231. COPY_VAL(descriptor_buffer.descriptorBuffer);
  232. COPY_VAL(descriptor_buffer.descriptorBufferPushDescriptors);
  233. COPY_VAL(atomic_float.shaderBufferFloat32Atomics);
  234. COPY_VAL(atomic_float.shaderBufferFloat32AtomicAdd);
  235. #ifdef VK_KHR_shader_relaxed_extended_instruction
  236. COPY_VAL(relaxed_extended_instruction.shaderRelaxedExtendedInstruction);
  237. #endif
  238. COPY_VAL(optical_flow.opticalFlow);
  239. #undef COPY_VAL
  240. }
  241. #define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT)
  242. #define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)
  243. static const struct FFVkFormatEntry {
  244. VkFormat vkf;
  245. enum AVPixelFormat pixfmt;
  246. VkImageAspectFlags aspect;
  247. int vk_planes;
  248. int nb_images;
  249. int nb_images_fallback;
  250. const VkFormat fallback[5];
  251. } vk_formats_list[] = {
  252. /* Gray formats */
  253. { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } },
  254. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
  255. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY12, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
  256. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY14, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
  257. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
  258. { VK_FORMAT_R32_UINT, AV_PIX_FMT_GRAY32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_UINT } },
  259. { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } },
  260. /* RGB formats */
  261. { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
  262. { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  263. { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } },
  264. { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } },
  265. { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } },
  266. { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  267. { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
  268. { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
  269. { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
  270. { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  271. { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
  272. { VK_FORMAT_A2B10G10R10_UNORM_PACK32, AV_PIX_FMT_X2BGR10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2B10G10R10_UNORM_PACK32 } },
  273. { VK_FORMAT_R32G32B32_SFLOAT, AV_PIX_FMT_RGBF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_SFLOAT } },
  274. { VK_FORMAT_R32G32B32A32_SFLOAT, AV_PIX_FMT_RGBAF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_SFLOAT } },
  275. { VK_FORMAT_R32G32B32_UINT, AV_PIX_FMT_RGB96, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_UINT } },
  276. { VK_FORMAT_R32G32B32A32_UINT, AV_PIX_FMT_RGBA128, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_UINT } },
  277. /* Planar RGB */
  278. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP10, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  279. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP12, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  280. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP14, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  281. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP16, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  282. { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
  283. /* Planar RGB + Alpha */
  284. { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  285. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  286. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  287. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP14, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  288. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  289. { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
  290. /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */
  291. { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  292. { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  293. { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  294. { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  295. /* Two-plane 422 YUV at 8, 10 and 16 bits */
  296. { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  297. { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  298. { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  299. { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  300. /* Two-plane 444 YUV at 8, 10 and 16 bits */
  301. { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  302. { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  303. { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  304. { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  305. /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */
  306. { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  307. { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  308. { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  309. { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  310. { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  311. { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  312. { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  313. { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  314. { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  315. { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  316. { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  317. { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  318. /* Single plane 422 at 8, 10, 12 and 16 bits */
  319. { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  320. { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  321. { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  322. { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  323. { VK_FORMAT_G16B16G16R16_422_UNORM, AV_PIX_FMT_Y216, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  324. /* Single plane 444 at 8, 10, 12 and 16 bits */
  325. { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_UYVA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
  326. { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_XV30, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  327. { VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  328. { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  329. };
  330. static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list);
  331. const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
  332. {
  333. for (int i = 0; i < nb_vk_formats_list; i++)
  334. if (vk_formats_list[i].pixfmt == p)
  335. return vk_formats_list[i].fallback;
  336. return NULL;
  337. }
  338. static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p)
  339. {
  340. for (int i = 0; i < nb_vk_formats_list; i++)
  341. if (vk_formats_list[i].pixfmt == p)
  342. return &vk_formats_list[i];
  343. return NULL;
  344. }
  345. /* Malitia pura, Khronos */
  346. #define FN_MAP_TO(dst_t, dst_name, src_t, src_name) \
  347. static av_unused dst_t map_ ##src_name## _to_ ##dst_name(src_t src) \
  348. { \
  349. dst_t dst = 0x0; \
  350. MAP_TO(VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT, \
  351. VK_IMAGE_USAGE_SAMPLED_BIT); \
  352. MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT, \
  353. VK_IMAGE_USAGE_TRANSFER_SRC_BIT); \
  354. MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT, \
  355. VK_IMAGE_USAGE_TRANSFER_DST_BIT); \
  356. MAP_TO(VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT, \
  357. VK_IMAGE_USAGE_STORAGE_BIT); \
  358. MAP_TO(VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT, \
  359. VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); \
  360. MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \
  361. VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \
  362. MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \
  363. VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \
  364. MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \
  365. VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \
  366. MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \
  367. VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \
  368. return dst; \
  369. }
  370. #define MAP_TO(flag1, flag2) if (src & flag2) dst |= flag1;
  371. FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage)
  372. #undef MAP_TO
  373. #define MAP_TO(flag1, flag2) if (src & flag1) dst |= flag2;
  374. FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats)
  375. #undef MAP_TO
  376. #undef FN_MAP_TO
  377. static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
  378. VkImageTiling tiling,
  379. VkFormat fmts[AV_NUM_DATA_POINTERS], /* Output format list */
  380. int *nb_images, /* Output number of images */
  381. VkImageAspectFlags *aspect, /* Output aspect */
  382. VkImageUsageFlags *supported_usage, /* Output supported usage */
  383. int disable_multiplane, int need_storage)
  384. {
  385. VulkanDevicePriv *priv = dev_ctx->hwctx;
  386. AVVulkanDeviceContext *hwctx = &priv->p;
  387. FFVulkanFunctions *vk = &priv->vkctx.vkfn;
  388. const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
  389. VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
  390. VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
  391. for (int i = 0; i < nb_vk_formats_list; i++) {
  392. if (vk_formats_list[i].pixfmt == p) {
  393. VkFormatProperties3 fprops = {
  394. .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3,
  395. };
  396. VkFormatProperties2 prop = {
  397. .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
  398. .pNext = &fprops,
  399. };
  400. VkFormatFeatureFlagBits2 feats_primary, feats_secondary;
  401. int basics_primary = 0, basics_secondary = 0;
  402. int storage_primary = 0, storage_secondary = 0;
  403. vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
  404. vk_formats_list[i].vkf,
  405. &prop);
  406. feats_primary = tiling == VK_IMAGE_TILING_LINEAR ?
  407. fprops.linearTilingFeatures : fprops.optimalTilingFeatures;
  408. basics_primary = (feats_primary & basic_flags) == basic_flags;
  409. storage_primary = !!(feats_primary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
  410. if (vk_formats_list[i].vkf != vk_formats_list[i].fallback[0]) {
  411. vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
  412. vk_formats_list[i].fallback[0],
  413. &prop);
  414. feats_secondary = tiling == VK_IMAGE_TILING_LINEAR ?
  415. fprops.linearTilingFeatures : fprops.optimalTilingFeatures;
  416. basics_secondary = (feats_secondary & basic_flags) == basic_flags;
  417. storage_secondary = !!(feats_secondary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
  418. } else {
  419. basics_secondary = basics_primary;
  420. storage_secondary = storage_primary;
  421. }
  422. if (basics_primary &&
  423. !(disable_multiplane && vk_formats_list[i].vk_planes > 1) &&
  424. (!need_storage || (need_storage && (storage_primary | storage_secondary)))) {
  425. if (fmts) {
  426. if (vk_formats_list[i].nb_images > 1) {
  427. for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++)
  428. fmts[j] = vk_formats_list[i].fallback[j];
  429. } else {
  430. fmts[0] = vk_formats_list[i].vkf;
  431. }
  432. }
  433. if (nb_images)
  434. *nb_images = 1;
  435. if (aspect)
  436. *aspect = vk_formats_list[i].aspect;
  437. if (supported_usage)
  438. *supported_usage = map_feats_to_usage(feats_primary) |
  439. ((need_storage && (storage_primary | storage_secondary)) ?
  440. VK_IMAGE_USAGE_STORAGE_BIT : 0);
  441. return 0;
  442. } else if (basics_secondary &&
  443. (!need_storage || (need_storage && storage_secondary))) {
  444. if (fmts) {
  445. for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++)
  446. fmts[j] = vk_formats_list[i].fallback[j];
  447. }
  448. if (nb_images)
  449. *nb_images = vk_formats_list[i].nb_images_fallback;
  450. if (aspect)
  451. *aspect = vk_formats_list[i].aspect;
  452. if (supported_usage)
  453. *supported_usage = map_feats_to_usage(feats_secondary);
  454. return 0;
  455. } else {
  456. return AVERROR(ENOTSUP);
  457. }
  458. }
  459. }
  460. return AVERROR(EINVAL);
  461. }
  462. #if CONFIG_VULKAN_STATIC
  463. VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance,
  464. const char *pName);
  465. #endif
  466. static int load_libvulkan(AVHWDeviceContext *ctx)
  467. {
  468. VulkanDevicePriv *p = ctx->hwctx;
  469. AVVulkanDeviceContext *hwctx = &p->p;
  470. #if CONFIG_VULKAN_STATIC
  471. hwctx->get_proc_addr = vkGetInstanceProcAddr;
  472. #else
  473. static const char *lib_names[] = {
  474. #if defined(_WIN32)
  475. "vulkan-1.dll",
  476. #elif defined(__APPLE__)
  477. "libvulkan.dylib",
  478. "libvulkan.1.dylib",
  479. "libMoltenVK.dylib",
  480. #else
  481. "libvulkan.so.1",
  482. "libvulkan.so",
  483. #endif
  484. };
  485. for (int i = 0; i < FF_ARRAY_ELEMS(lib_names); i++) {
  486. p->libvulkan = dlopen(lib_names[i], RTLD_NOW | RTLD_LOCAL);
  487. if (p->libvulkan)
  488. break;
  489. }
  490. if (!p->libvulkan) {
  491. av_log(ctx, AV_LOG_ERROR, "Unable to open the libvulkan library!\n");
  492. return AVERROR_UNKNOWN;
  493. }
  494. hwctx->get_proc_addr = (PFN_vkGetInstanceProcAddr)dlsym(p->libvulkan, "vkGetInstanceProcAddr");
  495. #endif /* CONFIG_VULKAN_STATIC */
  496. return 0;
  497. }
  498. typedef struct VulkanOptExtension {
  499. const char *name;
  500. FFVulkanExtensions flag;
  501. } VulkanOptExtension;
  502. static const VulkanOptExtension optional_instance_exts[] = {
  503. { VK_EXT_LAYER_SETTINGS_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
  504. #ifdef __APPLE__
  505. { VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
  506. #endif
  507. };
  508. static const VulkanOptExtension optional_device_exts[] = {
  509. /* Misc or required by other extensions */
  510. { VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_PORTABILITY_SUBSET },
  511. { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR },
  512. { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER },
  513. { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM },
  514. { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT },
  515. { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
  516. { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW },
  517. { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT },
  518. { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 },
  519. /* Imports/exports */
  520. { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
  521. { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_DMABUF_MEMORY },
  522. { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, FF_VK_EXT_DRM_MODIFIER_FLAGS },
  523. { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM },
  524. { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY },
  525. #ifdef _WIN32
  526. { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
  527. { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
  528. #endif
  529. /* Video encoding/decoding */
  530. { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE },
  531. { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE },
  532. { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE },
  533. { VK_KHR_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 },
  534. { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
  535. { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 },
  536. { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
  537. { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 },
  538. };
  539. static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
  540. VkDebugUtilsMessageTypeFlagsEXT messageType,
  541. const VkDebugUtilsMessengerCallbackDataEXT *data,
  542. void *priv)
  543. {
  544. int l;
  545. AVHWDeviceContext *ctx = priv;
  546. /* Ignore false positives */
  547. switch (data->messageIdNumber) {
  548. case 0x086974c1: /* BestPractices-vkCreateCommandPool-command-buffer-reset */
  549. case 0xfd92477a: /* BestPractices-vkAllocateMemory-small-allocation */
  550. case 0x618ab1e7: /* VUID-VkImageViewCreateInfo-usage-02275 */
  551. case 0x30f4ac70: /* VUID-VkImageCreateInfo-pNext-06811 */
  552. case 0xa05b236e: /* UNASSIGNED-Threading-MultipleThreads-Write */
  553. return VK_FALSE;
  554. default:
  555. break;
  556. }
  557. switch (severity) {
  558. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
  559. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
  560. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
  561. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
  562. default: l = AV_LOG_DEBUG; break;
  563. }
  564. av_log(ctx, l, "%s\n", data->pMessage);
  565. for (int i = 0; i < data->cmdBufLabelCount; i++)
  566. av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
  567. return VK_FALSE;
  568. }
  569. #define ADD_VAL_TO_LIST(list, count, val) \
  570. do { \
  571. list = av_realloc_array(list, sizeof(*list), ++count); \
  572. if (!list) { \
  573. err = AVERROR(ENOMEM); \
  574. goto fail; \
  575. } \
  576. list[count - 1] = av_strdup(val); \
  577. if (!list[count - 1]) { \
  578. err = AVERROR(ENOMEM); \
  579. goto fail; \
  580. } \
  581. } while(0)
  582. #define RELEASE_PROPS(props, count) \
  583. if (props) { \
  584. for (int i = 0; i < count; i++) \
  585. av_free((void *)((props)[i])); \
  586. av_free((void *)props); \
  587. }
  588. enum FFVulkanDebugMode {
  589. FF_VULKAN_DEBUG_NONE = 0,
  590. /* Standard GPU-assisted validation */
  591. FF_VULKAN_DEBUG_VALIDATE = 1,
  592. /* Passes printfs in shaders to the debug callback */
  593. FF_VULKAN_DEBUG_PRINTF = 2,
  594. /* Enables extra printouts */
  595. FF_VULKAN_DEBUG_PRACTICES = 3,
  596. /* Disables validation but keeps shader debug info and optimizations */
  597. FF_VULKAN_DEBUG_PROFILE = 4,
  598. FF_VULKAN_DEBUG_NB,
  599. };
  600. static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
  601. const char * const **dst, uint32_t *num,
  602. enum FFVulkanDebugMode debug_mode)
  603. {
  604. const char *tstr;
  605. const char **extension_names = NULL;
  606. VulkanDevicePriv *p = ctx->hwctx;
  607. AVVulkanDeviceContext *hwctx = &p->p;
  608. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  609. int err = 0, found, extensions_found = 0;
  610. const char *mod;
  611. int optional_exts_num;
  612. uint32_t sup_ext_count;
  613. char *user_exts_str = NULL;
  614. AVDictionaryEntry *user_exts;
  615. VkExtensionProperties *sup_ext;
  616. const VulkanOptExtension *optional_exts;
  617. if (!dev) {
  618. mod = "instance";
  619. optional_exts = optional_instance_exts;
  620. optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
  621. user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
  622. if (user_exts) {
  623. user_exts_str = av_strdup(user_exts->value);
  624. if (!user_exts_str) {
  625. err = AVERROR(ENOMEM);
  626. goto fail;
  627. }
  628. }
  629. vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
  630. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  631. if (!sup_ext)
  632. return AVERROR(ENOMEM);
  633. vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
  634. } else {
  635. mod = "device";
  636. optional_exts = optional_device_exts;
  637. optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
  638. user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
  639. if (user_exts) {
  640. user_exts_str = av_strdup(user_exts->value);
  641. if (!user_exts_str) {
  642. err = AVERROR(ENOMEM);
  643. goto fail;
  644. }
  645. }
  646. vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  647. &sup_ext_count, NULL);
  648. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  649. if (!sup_ext)
  650. return AVERROR(ENOMEM);
  651. vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  652. &sup_ext_count, sup_ext);
  653. }
  654. for (int i = 0; i < optional_exts_num; i++) {
  655. tstr = optional_exts[i].name;
  656. found = 0;
  657. if (dev &&
  658. ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) ||
  659. (debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
  660. (debug_mode == FF_VULKAN_DEBUG_PRACTICES)) &&
  661. !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME)) {
  662. continue;
  663. }
  664. for (int j = 0; j < sup_ext_count; j++) {
  665. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  666. found = 1;
  667. break;
  668. }
  669. }
  670. if (!found)
  671. continue;
  672. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
  673. p->vkctx.extensions |= optional_exts[i].flag;
  674. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  675. }
  676. if (!dev &&
  677. ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) ||
  678. (debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
  679. (debug_mode == FF_VULKAN_DEBUG_PRACTICES))) {
  680. tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
  681. found = 0;
  682. for (int j = 0; j < sup_ext_count; j++) {
  683. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  684. found = 1;
  685. break;
  686. }
  687. }
  688. if (found) {
  689. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
  690. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  691. } else {
  692. av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
  693. tstr);
  694. err = AVERROR(EINVAL);
  695. goto fail;
  696. }
  697. }
  698. #ifdef VK_KHR_shader_relaxed_extended_instruction
  699. if (((debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
  700. (debug_mode == FF_VULKAN_DEBUG_PROFILE)) && dev) {
  701. tstr = VK_KHR_SHADER_RELAXED_EXTENDED_INSTRUCTION_EXTENSION_NAME;
  702. found = 0;
  703. for (int j = 0; j < sup_ext_count; j++) {
  704. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  705. found = 1;
  706. break;
  707. }
  708. }
  709. if (found) {
  710. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
  711. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  712. } else {
  713. av_log(ctx, AV_LOG_ERROR, "Debug_printf/profile enabled, but extension \"%s\" not found!\n",
  714. tstr);
  715. err = AVERROR(EINVAL);
  716. goto fail;
  717. }
  718. }
  719. #endif
  720. if (user_exts_str) {
  721. char *save, *token = av_strtok(user_exts_str, "+", &save);
  722. while (token) {
  723. found = 0;
  724. for (int j = 0; j < sup_ext_count; j++) {
  725. if (!strcmp(token, sup_ext[j].extensionName)) {
  726. found = 1;
  727. break;
  728. }
  729. }
  730. if (found) {
  731. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token);
  732. ADD_VAL_TO_LIST(extension_names, extensions_found, token);
  733. } else {
  734. av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
  735. mod, token);
  736. }
  737. token = av_strtok(NULL, "+", &save);
  738. }
  739. }
  740. *dst = extension_names;
  741. *num = extensions_found;
  742. av_free(user_exts_str);
  743. av_free(sup_ext);
  744. return 0;
  745. fail:
  746. RELEASE_PROPS(extension_names, extensions_found);
  747. av_free(user_exts_str);
  748. av_free(sup_ext);
  749. return err;
  750. }
  751. static int check_layers(AVHWDeviceContext *ctx, AVDictionary *opts,
  752. const char * const **dst, uint32_t *num,
  753. enum FFVulkanDebugMode *debug_mode)
  754. {
  755. int err = 0;
  756. VulkanDevicePriv *priv = ctx->hwctx;
  757. FFVulkanFunctions *vk = &priv->vkctx.vkfn;
  758. static const char layer_standard_validation[] = { "VK_LAYER_KHRONOS_validation" };
  759. int layer_standard_validation_found = 0;
  760. uint32_t sup_layer_count;
  761. VkLayerProperties *sup_layers;
  762. AVDictionaryEntry *user_layers = av_dict_get(opts, "layers", NULL, 0);
  763. char *user_layers_str = NULL;
  764. char *save, *token;
  765. const char **enabled_layers = NULL;
  766. uint32_t enabled_layers_count = 0;
  767. AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
  768. enum FFVulkanDebugMode mode;
  769. *debug_mode = mode = FF_VULKAN_DEBUG_NONE;
  770. /* Get a list of all layers */
  771. vk->EnumerateInstanceLayerProperties(&sup_layer_count, NULL);
  772. sup_layers = av_malloc_array(sup_layer_count, sizeof(VkLayerProperties));
  773. if (!sup_layers)
  774. return AVERROR(ENOMEM);
  775. vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers);
  776. av_log(ctx, AV_LOG_VERBOSE, "Supported layers:\n");
  777. for (int i = 0; i < sup_layer_count; i++)
  778. av_log(ctx, AV_LOG_VERBOSE, "\t%s\n", sup_layers[i].layerName);
  779. /* If no user layers or debug layers are given, return */
  780. if (!debug_opt && !user_layers)
  781. goto end;
  782. /* Check for any properly supported validation layer */
  783. if (debug_opt) {
  784. if (!strcmp(debug_opt->value, "profile")) {
  785. mode = FF_VULKAN_DEBUG_PROFILE;
  786. } else if (!strcmp(debug_opt->value, "printf")) {
  787. mode = FF_VULKAN_DEBUG_PRINTF;
  788. } else if (!strcmp(debug_opt->value, "validate")) {
  789. mode = FF_VULKAN_DEBUG_VALIDATE;
  790. } else if (!strcmp(debug_opt->value, "practices")) {
  791. mode = FF_VULKAN_DEBUG_PRACTICES;
  792. } else {
  793. char *end_ptr = NULL;
  794. int idx = strtol(debug_opt->value, &end_ptr, 10);
  795. if (end_ptr == debug_opt->value || end_ptr[0] != '\0' ||
  796. idx < 0 || idx >= FF_VULKAN_DEBUG_NB) {
  797. av_log(ctx, AV_LOG_ERROR, "Invalid debugging mode \"%s\"\n",
  798. debug_opt->value);
  799. err = AVERROR(EINVAL);
  800. goto end;
  801. }
  802. mode = idx;
  803. }
  804. }
  805. /* If mode is VALIDATE or PRINTF, try to find the standard validation layer extension */
  806. if ((mode == FF_VULKAN_DEBUG_VALIDATE) ||
  807. (mode == FF_VULKAN_DEBUG_PRINTF) ||
  808. (mode == FF_VULKAN_DEBUG_PRACTICES)) {
  809. for (int i = 0; i < sup_layer_count; i++) {
  810. if (!strcmp(layer_standard_validation, sup_layers[i].layerName)) {
  811. av_log(ctx, AV_LOG_VERBOSE, "Standard validation layer %s is enabled\n",
  812. layer_standard_validation);
  813. ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, layer_standard_validation);
  814. *debug_mode = mode;
  815. layer_standard_validation_found = 1;
  816. break;
  817. }
  818. }
  819. if (!layer_standard_validation_found) {
  820. av_log(ctx, AV_LOG_ERROR,
  821. "Validation Layer \"%s\" not supported\n", layer_standard_validation);
  822. err = AVERROR(ENOTSUP);
  823. goto end;
  824. }
  825. } else if (mode == FF_VULKAN_DEBUG_PROFILE) {
  826. *debug_mode = mode;
  827. }
  828. /* Process any custom layers enabled */
  829. if (user_layers) {
  830. int found;
  831. user_layers_str = av_strdup(user_layers->value);
  832. if (!user_layers_str) {
  833. err = AVERROR(ENOMEM);
  834. goto fail;
  835. }
  836. token = av_strtok(user_layers_str, "+", &save);
  837. while (token) {
  838. found = 0;
  839. /* If debug=1/2 was specified as an option, skip this layer */
  840. if (!strcmp(layer_standard_validation, token) && layer_standard_validation_found) {
  841. token = av_strtok(NULL, "+", &save);
  842. break;
  843. }
  844. /* Try to find the layer in the list of supported layers */
  845. for (int j = 0; j < sup_layer_count; j++) {
  846. if (!strcmp(token, sup_layers[j].layerName)) {
  847. found = 1;
  848. break;
  849. }
  850. }
  851. if (found) {
  852. av_log(ctx, AV_LOG_VERBOSE, "Using layer: %s\n", token);
  853. ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, token);
  854. /* If debug was not set as an option, force it */
  855. if (!strcmp(layer_standard_validation, token))
  856. *debug_mode = FF_VULKAN_DEBUG_VALIDATE;
  857. } else {
  858. av_log(ctx, AV_LOG_ERROR,
  859. "Layer \"%s\" not supported\n", token);
  860. err = AVERROR(EINVAL);
  861. goto end;
  862. }
  863. token = av_strtok(NULL, "+", &save);
  864. }
  865. }
  866. fail:
  867. end:
  868. av_free(sup_layers);
  869. av_free(user_layers_str);
  870. if (err < 0) {
  871. RELEASE_PROPS(enabled_layers, enabled_layers_count);
  872. } else {
  873. *dst = enabled_layers;
  874. *num = enabled_layers_count;
  875. }
  876. return err;
  877. }
  878. /* Creates a VkInstance */
  879. static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts,
  880. enum FFVulkanDebugMode *debug_mode)
  881. {
  882. int err = 0;
  883. VkResult ret;
  884. VulkanDevicePriv *p = ctx->hwctx;
  885. AVVulkanDeviceContext *hwctx = &p->p;
  886. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  887. VkApplicationInfo application_info = {
  888. .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
  889. .pApplicationName = "ffmpeg",
  890. .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
  891. LIBAVUTIL_VERSION_MINOR,
  892. LIBAVUTIL_VERSION_MICRO),
  893. .pEngineName = "libavutil",
  894. .apiVersion = VK_API_VERSION_1_3,
  895. .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
  896. LIBAVUTIL_VERSION_MINOR,
  897. LIBAVUTIL_VERSION_MICRO),
  898. };
  899. VkValidationFeaturesEXT validation_features = {
  900. .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
  901. };
  902. VkInstanceCreateInfo inst_props = {
  903. .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
  904. .pApplicationInfo = &application_info,
  905. };
  906. if (!hwctx->get_proc_addr) {
  907. err = load_libvulkan(ctx);
  908. if (err < 0)
  909. return err;
  910. }
  911. err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 0, 0);
  912. if (err < 0) {
  913. av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n");
  914. return err;
  915. }
  916. err = check_layers(ctx, opts, &inst_props.ppEnabledLayerNames,
  917. &inst_props.enabledLayerCount, debug_mode);
  918. if (err)
  919. goto fail;
  920. /* Check for present/missing extensions */
  921. err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
  922. &inst_props.enabledExtensionCount, *debug_mode);
  923. hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
  924. hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
  925. if (err < 0)
  926. goto fail;
  927. /* Enable debug features if needed */
  928. if (*debug_mode == FF_VULKAN_DEBUG_VALIDATE) {
  929. static const VkValidationFeatureEnableEXT feat_list_validate[] = {
  930. VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
  931. VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
  932. VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
  933. };
  934. validation_features.pEnabledValidationFeatures = feat_list_validate;
  935. validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_validate);
  936. inst_props.pNext = &validation_features;
  937. } else if (*debug_mode == FF_VULKAN_DEBUG_PRINTF) {
  938. static const VkValidationFeatureEnableEXT feat_list_debug[] = {
  939. VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
  940. VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
  941. VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT,
  942. };
  943. validation_features.pEnabledValidationFeatures = feat_list_debug;
  944. validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_debug);
  945. inst_props.pNext = &validation_features;
  946. } else if (*debug_mode == FF_VULKAN_DEBUG_PRACTICES) {
  947. static const VkValidationFeatureEnableEXT feat_list_practices[] = {
  948. VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
  949. VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT,
  950. };
  951. validation_features.pEnabledValidationFeatures = feat_list_practices;
  952. validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_practices);
  953. inst_props.pNext = &validation_features;
  954. }
  955. #ifdef __APPLE__
  956. for (int i = 0; i < inst_props.enabledExtensionCount; i++) {
  957. if (!strcmp(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME,
  958. inst_props.ppEnabledExtensionNames[i])) {
  959. inst_props.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
  960. break;
  961. }
  962. }
  963. #endif
  964. /* Try to create the instance */
  965. ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
  966. /* Check for errors */
  967. if (ret != VK_SUCCESS) {
  968. av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
  969. ff_vk_ret2str(ret));
  970. err = AVERROR_EXTERNAL;
  971. goto fail;
  972. }
  973. err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 0);
  974. if (err < 0) {
  975. av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n");
  976. goto fail;
  977. }
  978. /* Setup debugging callback if needed */
  979. if ((*debug_mode == FF_VULKAN_DEBUG_VALIDATE) ||
  980. (*debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
  981. (*debug_mode == FF_VULKAN_DEBUG_PRACTICES)) {
  982. VkDebugUtilsMessengerCreateInfoEXT dbg = {
  983. .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
  984. .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
  985. VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
  986. VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
  987. VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
  988. .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
  989. VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
  990. VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
  991. .pfnUserCallback = vk_dbg_callback,
  992. .pUserData = ctx,
  993. };
  994. vk->CreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
  995. hwctx->alloc, &p->debug_ctx);
  996. }
  997. err = 0;
  998. fail:
  999. RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount);
  1000. return err;
  1001. }
  1002. typedef struct VulkanDeviceSelection {
  1003. uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
  1004. int has_uuid;
  1005. uint32_t drm_major; /* Will use this second unless !has_drm */
  1006. uint32_t drm_minor; /* Will use this second unless !has_drm */
  1007. uint32_t has_drm; /* has drm node info */
  1008. const char *name; /* Will use this third unless NULL */
  1009. uint32_t pci_device; /* Will use this fourth unless 0x0 */
  1010. uint32_t vendor_id; /* Last resort to find something deterministic */
  1011. int index; /* Finally fall back to index */
  1012. } VulkanDeviceSelection;
  1013. static const char *vk_dev_type(enum VkPhysicalDeviceType type)
  1014. {
  1015. switch (type) {
  1016. case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
  1017. case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete";
  1018. case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual";
  1019. case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software";
  1020. default: return "unknown";
  1021. }
  1022. }
  1023. /* Finds a device */
  1024. static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
  1025. {
  1026. int err = 0, choice = -1;
  1027. uint32_t num;
  1028. VkResult ret;
  1029. VulkanDevicePriv *p = ctx->hwctx;
  1030. AVVulkanDeviceContext *hwctx = &p->p;
  1031. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1032. VkPhysicalDevice *devices = NULL;
  1033. VkPhysicalDeviceIDProperties *idp = NULL;
  1034. VkPhysicalDeviceProperties2 *prop = NULL;
  1035. VkPhysicalDeviceDrmPropertiesEXT *drm_prop = NULL;
  1036. ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL);
  1037. if (ret != VK_SUCCESS || !num) {
  1038. av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", ff_vk_ret2str(ret));
  1039. return AVERROR(ENODEV);
  1040. }
  1041. devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
  1042. if (!devices)
  1043. return AVERROR(ENOMEM);
  1044. ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices);
  1045. if (ret != VK_SUCCESS) {
  1046. av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
  1047. ff_vk_ret2str(ret));
  1048. err = AVERROR(ENODEV);
  1049. goto end;
  1050. }
  1051. prop = av_calloc(num, sizeof(*prop));
  1052. if (!prop) {
  1053. err = AVERROR(ENOMEM);
  1054. goto end;
  1055. }
  1056. idp = av_calloc(num, sizeof(*idp));
  1057. if (!idp) {
  1058. err = AVERROR(ENOMEM);
  1059. goto end;
  1060. }
  1061. if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) {
  1062. drm_prop = av_calloc(num, sizeof(*drm_prop));
  1063. if (!drm_prop) {
  1064. err = AVERROR(ENOMEM);
  1065. goto end;
  1066. }
  1067. }
  1068. av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
  1069. for (int i = 0; i < num; i++) {
  1070. if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) {
  1071. drm_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT;
  1072. idp[i].pNext = &drm_prop[i];
  1073. }
  1074. idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
  1075. prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  1076. prop[i].pNext = &idp[i];
  1077. vk->GetPhysicalDeviceProperties2(devices[i], &prop[i]);
  1078. av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i,
  1079. prop[i].properties.deviceName,
  1080. vk_dev_type(prop[i].properties.deviceType),
  1081. prop[i].properties.deviceID);
  1082. }
  1083. if (select->has_uuid) {
  1084. for (int i = 0; i < num; i++) {
  1085. if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
  1086. choice = i;
  1087. goto end;
  1088. }
  1089. }
  1090. av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
  1091. err = AVERROR(ENODEV);
  1092. goto end;
  1093. } else if ((p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) && select->has_drm) {
  1094. for (int i = 0; i < num; i++) {
  1095. if ((select->drm_major == drm_prop[i].primaryMajor &&
  1096. select->drm_minor == drm_prop[i].primaryMinor) ||
  1097. (select->drm_major == drm_prop[i].renderMajor &&
  1098. select->drm_minor == drm_prop[i].renderMinor)) {
  1099. choice = i;
  1100. goto end;
  1101. }
  1102. }
  1103. av_log(ctx, AV_LOG_ERROR, "Unable to find device by given DRM node numbers %i:%i!\n",
  1104. select->drm_major, select->drm_minor);
  1105. err = AVERROR(ENODEV);
  1106. goto end;
  1107. } else if (select->name) {
  1108. av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
  1109. for (int i = 0; i < num; i++) {
  1110. if (strstr(prop[i].properties.deviceName, select->name)) {
  1111. choice = i;
  1112. goto end;
  1113. }
  1114. }
  1115. av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
  1116. select->name);
  1117. err = AVERROR(ENODEV);
  1118. goto end;
  1119. } else if (select->pci_device) {
  1120. av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
  1121. for (int i = 0; i < num; i++) {
  1122. if (select->pci_device == prop[i].properties.deviceID) {
  1123. choice = i;
  1124. goto end;
  1125. }
  1126. }
  1127. av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
  1128. select->pci_device);
  1129. err = AVERROR(EINVAL);
  1130. goto end;
  1131. } else if (select->vendor_id) {
  1132. av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
  1133. for (int i = 0; i < num; i++) {
  1134. if (select->vendor_id == prop[i].properties.vendorID) {
  1135. choice = i;
  1136. goto end;
  1137. }
  1138. }
  1139. av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
  1140. select->vendor_id);
  1141. err = AVERROR(ENODEV);
  1142. goto end;
  1143. } else {
  1144. if (select->index < num) {
  1145. choice = select->index;
  1146. goto end;
  1147. }
  1148. av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
  1149. select->index);
  1150. err = AVERROR(ENODEV);
  1151. goto end;
  1152. }
  1153. end:
  1154. if (choice > -1) {
  1155. av_log(ctx, AV_LOG_VERBOSE, "Device %d selected: %s (%s) (0x%x)\n",
  1156. choice, prop[choice].properties.deviceName,
  1157. vk_dev_type(prop[choice].properties.deviceType),
  1158. prop[choice].properties.deviceID);
  1159. hwctx->phys_dev = devices[choice];
  1160. }
  1161. av_free(devices);
  1162. av_free(prop);
  1163. av_free(idp);
  1164. av_free(drm_prop);
  1165. return err;
  1166. }
  1167. /* Picks the least used qf with the fewest unneeded flags, or -1 if none found */
  1168. static inline int pick_queue_family(VkQueueFamilyProperties2 *qf, uint32_t num_qf,
  1169. VkQueueFlagBits flags)
  1170. {
  1171. int index = -1;
  1172. uint32_t min_score = UINT32_MAX;
  1173. for (int i = 0; i < num_qf; i++) {
  1174. VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
  1175. /* Per the spec, reporting transfer caps is optional for these 2 types */
  1176. if ((flags & VK_QUEUE_TRANSFER_BIT) &&
  1177. (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)))
  1178. qflags |= VK_QUEUE_TRANSFER_BIT;
  1179. if (qflags & flags) {
  1180. uint32_t score = av_popcount(qflags) + qf[i].queueFamilyProperties.timestampValidBits;
  1181. if (score < min_score) {
  1182. index = i;
  1183. min_score = score;
  1184. }
  1185. }
  1186. }
  1187. if (index > -1)
  1188. qf[index].queueFamilyProperties.timestampValidBits++;
  1189. return index;
  1190. }
  1191. static inline int pick_video_queue_family(VkQueueFamilyProperties2 *qf,
  1192. VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf,
  1193. VkVideoCodecOperationFlagBitsKHR flags)
  1194. {
  1195. int index = -1;
  1196. uint32_t min_score = UINT32_MAX;
  1197. for (int i = 0; i < num_qf; i++) {
  1198. const VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
  1199. const VkQueueFlagBits vflags = qf_vid[i].videoCodecOperations;
  1200. if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR)))
  1201. continue;
  1202. if (vflags & flags) {
  1203. uint32_t score = av_popcount(vflags) + qf[i].queueFamilyProperties.timestampValidBits;
  1204. if (score < min_score) {
  1205. index = i;
  1206. min_score = score;
  1207. }
  1208. }
  1209. }
  1210. if (index > -1)
  1211. qf[index].queueFamilyProperties.timestampValidBits++;
  1212. return index;
  1213. }
  1214. static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
  1215. {
  1216. uint32_t num;
  1217. VulkanDevicePriv *p = ctx->hwctx;
  1218. AVVulkanDeviceContext *hwctx = &p->p;
  1219. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1220. VkQueueFamilyProperties2 *qf = NULL;
  1221. VkQueueFamilyVideoPropertiesKHR *qf_vid = NULL;
  1222. /* First get the number of queue families */
  1223. vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
  1224. if (!num) {
  1225. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  1226. return AVERROR_EXTERNAL;
  1227. }
  1228. /* Then allocate memory */
  1229. qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties2));
  1230. if (!qf)
  1231. return AVERROR(ENOMEM);
  1232. qf_vid = av_malloc_array(num, sizeof(VkQueueFamilyVideoPropertiesKHR));
  1233. if (!qf_vid)
  1234. return AVERROR(ENOMEM);
  1235. for (uint32_t i = 0; i < num; i++) {
  1236. qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
  1237. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
  1238. };
  1239. qf[i] = (VkQueueFamilyProperties2) {
  1240. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
  1241. .pNext = p->vkctx.extensions & FF_VK_EXT_VIDEO_QUEUE ? &qf_vid[i] : NULL,
  1242. };
  1243. }
  1244. /* Finally retrieve the queue families */
  1245. vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &num, qf);
  1246. av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n");
  1247. for (int i = 0; i < num; i++) {
  1248. av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s%s (queues: %i)\n", i,
  1249. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
  1250. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
  1251. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
  1252. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
  1253. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
  1254. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
  1255. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_OPTICAL_FLOW_BIT_NV) ? " optical_flow" : "",
  1256. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
  1257. qf[i].queueFamilyProperties.queueCount);
  1258. /* We use this field to keep a score of how many times we've used that
  1259. * queue family in order to make better choices. */
  1260. qf[i].queueFamilyProperties.timestampValidBits = 0;
  1261. }
  1262. hwctx->nb_qf = 0;
  1263. /* Pick each queue family to use */
  1264. #define PICK_QF(type, vid_op) \
  1265. do { \
  1266. uint32_t i; \
  1267. uint32_t idx; \
  1268. \
  1269. if (vid_op) \
  1270. idx = pick_video_queue_family(qf, qf_vid, num, vid_op); \
  1271. else \
  1272. idx = pick_queue_family(qf, num, type); \
  1273. \
  1274. if (idx == -1) \
  1275. continue; \
  1276. \
  1277. for (i = 0; i < hwctx->nb_qf; i++) { \
  1278. if (hwctx->qf[i].idx == idx) { \
  1279. hwctx->qf[i].flags |= type; \
  1280. hwctx->qf[i].video_caps |= vid_op; \
  1281. break; \
  1282. } \
  1283. } \
  1284. if (i == hwctx->nb_qf) { \
  1285. hwctx->qf[i].idx = idx; \
  1286. hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \
  1287. hwctx->qf[i].flags = type; \
  1288. hwctx->qf[i].video_caps = vid_op; \
  1289. hwctx->nb_qf++; \
  1290. } \
  1291. } while (0)
  1292. PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1293. PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1294. PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1295. PICK_QF(VK_QUEUE_OPTICAL_FLOW_BIT_NV, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1296. PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR);
  1297. PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR);
  1298. PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR);
  1299. PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR);
  1300. PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
  1301. av_free(qf);
  1302. av_free(qf_vid);
  1303. #undef PICK_QF
  1304. cd->pQueueCreateInfos = av_malloc_array(hwctx->nb_qf,
  1305. sizeof(VkDeviceQueueCreateInfo));
  1306. if (!cd->pQueueCreateInfos)
  1307. return AVERROR(ENOMEM);
  1308. for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
  1309. int dup = 0;
  1310. float *weights = NULL;
  1311. VkDeviceQueueCreateInfo *pc;
  1312. for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) {
  1313. if (hwctx->qf[i].idx == cd->pQueueCreateInfos[j].queueFamilyIndex) {
  1314. dup = 1;
  1315. break;
  1316. }
  1317. }
  1318. if (dup)
  1319. continue;
  1320. weights = av_malloc_array(hwctx->qf[i].num, sizeof(float));
  1321. if (!weights) {
  1322. for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++)
  1323. av_free((void *)cd->pQueueCreateInfos[i].pQueuePriorities);
  1324. av_free((void *)cd->pQueueCreateInfos);
  1325. return AVERROR(ENOMEM);
  1326. }
  1327. for (uint32_t j = 0; j < hwctx->qf[i].num; j++)
  1328. weights[j] = 1.0;
  1329. pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
  1330. pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) {
  1331. .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
  1332. .queueFamilyIndex = hwctx->qf[i].idx,
  1333. .queueCount = hwctx->qf[i].num,
  1334. .pQueuePriorities = weights,
  1335. };
  1336. }
  1337. #if FF_API_VULKAN_FIXED_QUEUES
  1338. FF_DISABLE_DEPRECATION_WARNINGS
  1339. /* Setup deprecated fields */
  1340. hwctx->queue_family_index = -1;
  1341. hwctx->queue_family_comp_index = -1;
  1342. hwctx->queue_family_tx_index = -1;
  1343. hwctx->queue_family_encode_index = -1;
  1344. hwctx->queue_family_decode_index = -1;
  1345. #define SET_OLD_QF(field, nb_field, type) \
  1346. do { \
  1347. if (field < 0 && hwctx->qf[i].flags & type) { \
  1348. field = hwctx->qf[i].idx; \
  1349. nb_field = hwctx->qf[i].num; \
  1350. } \
  1351. } while (0)
  1352. for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
  1353. SET_OLD_QF(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
  1354. SET_OLD_QF(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
  1355. SET_OLD_QF(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
  1356. SET_OLD_QF(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
  1357. SET_OLD_QF(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
  1358. }
  1359. #undef SET_OLD_QF
  1360. FF_ENABLE_DEPRECATION_WARNINGS
  1361. #endif
  1362. return 0;
  1363. }
  1364. /* Only resources created by vulkan_device_create should be released here,
  1365. * resources created by vulkan_device_init should be released by
  1366. * vulkan_device_uninit, to make sure we don't free user provided resources,
  1367. * and there is no leak.
  1368. */
  1369. static void vulkan_device_free(AVHWDeviceContext *ctx)
  1370. {
  1371. VulkanDevicePriv *p = ctx->hwctx;
  1372. AVVulkanDeviceContext *hwctx = &p->p;
  1373. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1374. if (hwctx->act_dev)
  1375. vk->DestroyDevice(hwctx->act_dev, hwctx->alloc);
  1376. if (p->debug_ctx)
  1377. vk->DestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
  1378. hwctx->alloc);
  1379. if (hwctx->inst)
  1380. vk->DestroyInstance(hwctx->inst, hwctx->alloc);
  1381. if (p->libvulkan)
  1382. dlclose(p->libvulkan);
  1383. RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
  1384. RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
  1385. }
  1386. static void vulkan_device_uninit(AVHWDeviceContext *ctx)
  1387. {
  1388. VulkanDevicePriv *p = ctx->hwctx;
  1389. for (uint32_t i = 0; i < p->nb_tot_qfs; i++) {
  1390. pthread_mutex_destroy(p->qf_mutex[i]);
  1391. av_freep(&p->qf_mutex[i]);
  1392. }
  1393. av_freep(&p->qf_mutex);
  1394. ff_vk_uninit(&p->vkctx);
  1395. }
  1396. static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
  1397. VulkanDeviceSelection *dev_select,
  1398. int disable_multiplane,
  1399. AVDictionary *opts, int flags)
  1400. {
  1401. int err = 0;
  1402. VkResult ret;
  1403. AVDictionaryEntry *opt_d;
  1404. VulkanDevicePriv *p = ctx->hwctx;
  1405. AVVulkanDeviceContext *hwctx = &p->p;
  1406. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1407. enum FFVulkanDebugMode debug_mode = FF_VULKAN_DEBUG_NONE;
  1408. VulkanDeviceFeatures supported_feats = { 0 };
  1409. VkDeviceCreateInfo dev_info = {
  1410. .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
  1411. };
  1412. /* Create an instance if not given one */
  1413. if ((err = create_instance(ctx, opts, &debug_mode)))
  1414. goto end;
  1415. /* Find a physical device (if not given one) */
  1416. if ((err = find_device(ctx, dev_select)))
  1417. goto end;
  1418. /* Find and enable extensions for the physical device */
  1419. if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
  1420. &dev_info.enabledExtensionCount, debug_mode))) {
  1421. for (int i = 0; i < dev_info.queueCreateInfoCount; i++)
  1422. av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities);
  1423. av_free((void *)dev_info.pQueueCreateInfos);
  1424. goto end;
  1425. }
  1426. /* Get all supported features for the physical device */
  1427. device_features_init(ctx, &supported_feats);
  1428. vk->GetPhysicalDeviceFeatures2(hwctx->phys_dev, &supported_feats.device);
  1429. /* Copy all needed features from those supported and activate them */
  1430. device_features_init(ctx, &p->feats);
  1431. device_features_copy_needed(&p->feats, &supported_feats);
  1432. dev_info.pNext = p->feats.device.pNext;
  1433. dev_info.pEnabledFeatures = &p->feats.device.features;
  1434. /* Setup enabled queue families */
  1435. if ((err = setup_queue_families(ctx, &dev_info)))
  1436. goto end;
  1437. /* Finally create the device */
  1438. ret = vk->CreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
  1439. &hwctx->act_dev);
  1440. for (int i = 0; i < dev_info.queueCreateInfoCount; i++)
  1441. av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities);
  1442. av_free((void *)dev_info.pQueueCreateInfos);
  1443. if (ret != VK_SUCCESS) {
  1444. av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
  1445. ff_vk_ret2str(ret));
  1446. for (int i = 0; i < dev_info.enabledExtensionCount; i++)
  1447. av_free((void *)dev_info.ppEnabledExtensionNames[i]);
  1448. av_free((void *)dev_info.ppEnabledExtensionNames);
  1449. err = AVERROR_EXTERNAL;
  1450. goto end;
  1451. }
  1452. /* Tiled images setting, use them by default */
  1453. opt_d = av_dict_get(opts, "linear_images", NULL, 0);
  1454. if (opt_d)
  1455. p->use_linear_images = strtol(opt_d->value, NULL, 10);
  1456. /* The disable_multiplane argument takes precedent over the option */
  1457. p->disable_multiplane = disable_multiplane;
  1458. if (!p->disable_multiplane) {
  1459. opt_d = av_dict_get(opts, "disable_multiplane", NULL, 0);
  1460. if (opt_d)
  1461. p->disable_multiplane = strtol(opt_d->value, NULL, 10);
  1462. }
  1463. /* Set the public device feature struct and its pNext chain */
  1464. hwctx->device_features = p->feats.device;
  1465. /* Set the list of all active extensions */
  1466. hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
  1467. hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
  1468. /* The extension lists need to be freed */
  1469. ctx->free = vulkan_device_free;
  1470. end:
  1471. return err;
  1472. }
  1473. static void lock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
  1474. {
  1475. VulkanDevicePriv *p = ctx->hwctx;
  1476. pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
  1477. }
  1478. static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
  1479. {
  1480. VulkanDevicePriv *p = ctx->hwctx;
  1481. pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
  1482. }
  1483. static int vulkan_device_init(AVHWDeviceContext *ctx)
  1484. {
  1485. int err = 0;
  1486. uint32_t qf_num;
  1487. VulkanDevicePriv *p = ctx->hwctx;
  1488. AVVulkanDeviceContext *hwctx = &p->p;
  1489. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1490. VkQueueFamilyProperties2 *qf;
  1491. VkQueueFamilyVideoPropertiesKHR *qf_vid;
  1492. VkPhysicalDeviceExternalSemaphoreInfo ext_sem_props_info;
  1493. int graph_index, comp_index, tx_index, enc_index, dec_index;
  1494. /* Set device extension flags */
  1495. for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
  1496. for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
  1497. if (!strcmp(hwctx->enabled_dev_extensions[i],
  1498. optional_device_exts[j].name)) {
  1499. p->vkctx.extensions |= optional_device_exts[j].flag;
  1500. break;
  1501. }
  1502. }
  1503. }
  1504. err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 1);
  1505. if (err < 0) {
  1506. av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n");
  1507. return err;
  1508. }
  1509. p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  1510. p->props.pNext = &p->hprops;
  1511. p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
  1512. vk->GetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props);
  1513. av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n",
  1514. p->props.properties.deviceName);
  1515. av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
  1516. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %"PRIu64"\n",
  1517. p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
  1518. av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n",
  1519. p->props.properties.limits.minMemoryMapAlignment);
  1520. av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n",
  1521. p->props.properties.limits.nonCoherentAtomSize);
  1522. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
  1523. av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n",
  1524. p->hprops.minImportedHostPointerAlignment);
  1525. p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
  1526. vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
  1527. if (!qf_num) {
  1528. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  1529. return AVERROR_EXTERNAL;
  1530. }
  1531. ext_sem_props_info = (VkPhysicalDeviceExternalSemaphoreInfo) {
  1532. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO,
  1533. };
  1534. /* Opaque FD semaphore properties */
  1535. ext_sem_props_info.handleType =
  1536. #ifdef _WIN32
  1537. IsWindows8OrGreater()
  1538. ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
  1539. : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT;
  1540. #else
  1541. VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
  1542. #endif
  1543. p->ext_sem_props_opaque.sType = VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES;
  1544. vk->GetPhysicalDeviceExternalSemaphoreProperties(hwctx->phys_dev,
  1545. &ext_sem_props_info,
  1546. &p->ext_sem_props_opaque);
  1547. qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties2));
  1548. if (!qf)
  1549. return AVERROR(ENOMEM);
  1550. qf_vid = av_malloc_array(qf_num, sizeof(VkQueueFamilyVideoPropertiesKHR));
  1551. if (!qf_vid) {
  1552. av_free(qf);
  1553. return AVERROR(ENOMEM);
  1554. }
  1555. for (uint32_t i = 0; i < qf_num; i++) {
  1556. qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
  1557. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
  1558. };
  1559. qf[i] = (VkQueueFamilyProperties2) {
  1560. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
  1561. .pNext = p->vkctx.extensions & FF_VK_EXT_VIDEO_QUEUE ? &qf_vid[i] : NULL,
  1562. };
  1563. }
  1564. vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &qf_num, qf);
  1565. p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex));
  1566. if (!p->qf_mutex) {
  1567. err = AVERROR(ENOMEM);
  1568. goto end;
  1569. }
  1570. p->nb_tot_qfs = qf_num;
  1571. for (uint32_t i = 0; i < qf_num; i++) {
  1572. p->qf_mutex[i] = av_calloc(qf[i].queueFamilyProperties.queueCount,
  1573. sizeof(**p->qf_mutex));
  1574. if (!p->qf_mutex[i]) {
  1575. err = AVERROR(ENOMEM);
  1576. goto end;
  1577. }
  1578. for (uint32_t j = 0; j < qf[i].queueFamilyProperties.queueCount; j++) {
  1579. err = pthread_mutex_init(&p->qf_mutex[i][j], NULL);
  1580. if (err != 0) {
  1581. av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n",
  1582. av_err2str(err));
  1583. err = AVERROR(err);
  1584. goto end;
  1585. }
  1586. }
  1587. }
  1588. #if FF_API_VULKAN_FIXED_QUEUES
  1589. FF_DISABLE_DEPRECATION_WARNINGS
  1590. graph_index = hwctx->nb_graphics_queues ? hwctx->queue_family_index : -1;
  1591. comp_index = hwctx->nb_comp_queues ? hwctx->queue_family_comp_index : -1;
  1592. tx_index = hwctx->nb_tx_queues ? hwctx->queue_family_tx_index : -1;
  1593. dec_index = hwctx->nb_decode_queues ? hwctx->queue_family_decode_index : -1;
  1594. enc_index = hwctx->nb_encode_queues ? hwctx->queue_family_encode_index : -1;
  1595. #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \
  1596. do { \
  1597. if (ctx_qf < 0 && required) { \
  1598. av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
  1599. " in the context!\n", type); \
  1600. err = AVERROR(EINVAL); \
  1601. goto end; \
  1602. } else if (fidx < 0 || ctx_qf < 0) { \
  1603. break; \
  1604. } else if (ctx_qf >= qf_num) { \
  1605. av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
  1606. type, ctx_qf, qf_num); \
  1607. err = AVERROR(EINVAL); \
  1608. goto end; \
  1609. } \
  1610. \
  1611. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
  1612. " for%s%s%s%s%s\n", \
  1613. ctx_qf, qc, \
  1614. ctx_qf == graph_index ? " graphics" : "", \
  1615. ctx_qf == comp_index ? " compute" : "", \
  1616. ctx_qf == tx_index ? " transfers" : "", \
  1617. ctx_qf == enc_index ? " encode" : "", \
  1618. ctx_qf == dec_index ? " decode" : ""); \
  1619. graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \
  1620. comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \
  1621. tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
  1622. enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
  1623. dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
  1624. } while (0)
  1625. CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
  1626. CHECK_QUEUE("compute", 1, comp_index, hwctx->queue_family_comp_index, hwctx->nb_comp_queues);
  1627. CHECK_QUEUE("upload", 1, tx_index, hwctx->queue_family_tx_index, hwctx->nb_tx_queues);
  1628. CHECK_QUEUE("decode", 0, dec_index, hwctx->queue_family_decode_index, hwctx->nb_decode_queues);
  1629. CHECK_QUEUE("encode", 0, enc_index, hwctx->queue_family_encode_index, hwctx->nb_encode_queues);
  1630. #undef CHECK_QUEUE
  1631. /* Update the new queue family fields. If non-zero already,
  1632. * it means API users have set it. */
  1633. if (!hwctx->nb_qf) {
  1634. #define ADD_QUEUE(ctx_qf, qc, flag) \
  1635. do { \
  1636. if (ctx_qf != -1) { \
  1637. hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) { \
  1638. .idx = ctx_qf, \
  1639. .num = qc, \
  1640. .flags = flag, \
  1641. }; \
  1642. } \
  1643. } while (0)
  1644. ADD_QUEUE(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
  1645. ADD_QUEUE(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
  1646. ADD_QUEUE(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
  1647. ADD_QUEUE(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
  1648. ADD_QUEUE(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
  1649. #undef ADD_QUEUE
  1650. }
  1651. FF_ENABLE_DEPRECATION_WARNINGS
  1652. #endif
  1653. for (int i = 0; i < hwctx->nb_qf; i++) {
  1654. if (!hwctx->qf[i].video_caps &&
  1655. hwctx->qf[i].flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR |
  1656. VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) {
  1657. hwctx->qf[i].video_caps = qf_vid[hwctx->qf[i].idx].videoCodecOperations;
  1658. }
  1659. }
  1660. /* Setup array for pQueueFamilyIndices with used queue families */
  1661. p->nb_img_qfs = 0;
  1662. for (int i = 0; i < hwctx->nb_qf; i++) {
  1663. int seen = 0;
  1664. /* Make sure each entry is unique
  1665. * (VUID-VkBufferCreateInfo-sharingMode-01419) */
  1666. for (int j = (i - 1); j >= 0; j--) {
  1667. if (hwctx->qf[i].idx == hwctx->qf[j].idx) {
  1668. seen = 1;
  1669. break;
  1670. }
  1671. }
  1672. if (!seen)
  1673. p->img_qfs[p->nb_img_qfs++] = hwctx->qf[i].idx;
  1674. }
  1675. if (!hwctx->lock_queue)
  1676. hwctx->lock_queue = lock_queue;
  1677. if (!hwctx->unlock_queue)
  1678. hwctx->unlock_queue = unlock_queue;
  1679. /* Get device capabilities */
  1680. vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
  1681. p->vkctx.device = ctx;
  1682. p->vkctx.hwctx = hwctx;
  1683. ff_vk_load_props(&p->vkctx);
  1684. p->compute_qf = ff_vk_qf_find(&p->vkctx, VK_QUEUE_COMPUTE_BIT, 0);
  1685. p->transfer_qf = ff_vk_qf_find(&p->vkctx, VK_QUEUE_TRANSFER_BIT, 0);
  1686. end:
  1687. av_free(qf_vid);
  1688. av_free(qf);
  1689. return err;
  1690. }
  1691. static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
  1692. AVDictionary *opts, int flags)
  1693. {
  1694. VulkanDeviceSelection dev_select = { 0 };
  1695. if (device && device[0]) {
  1696. char *end = NULL;
  1697. dev_select.index = strtol(device, &end, 10);
  1698. if (end == device) {
  1699. dev_select.index = 0;
  1700. dev_select.name = device;
  1701. }
  1702. }
  1703. return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
  1704. }
  1705. static int vulkan_device_derive(AVHWDeviceContext *ctx,
  1706. AVHWDeviceContext *src_ctx,
  1707. AVDictionary *opts, int flags)
  1708. {
  1709. av_unused VulkanDeviceSelection dev_select = { 0 };
  1710. /* If there's only one device on the system, then even if its not covered
  1711. * by the following checks (e.g. non-PCIe ARM GPU), having an empty
  1712. * dev_select will mean it'll get picked. */
  1713. switch(src_ctx->type) {
  1714. #if CONFIG_VAAPI
  1715. case AV_HWDEVICE_TYPE_VAAPI: {
  1716. AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
  1717. VADisplay dpy = src_hwctx->display;
  1718. #if VA_CHECK_VERSION(1, 15, 0)
  1719. VAStatus vas;
  1720. VADisplayAttribute attr = {
  1721. .type = VADisplayPCIID,
  1722. };
  1723. #endif
  1724. const char *vendor;
  1725. #if VA_CHECK_VERSION(1, 15, 0)
  1726. vas = vaGetDisplayAttributes(dpy, &attr, 1);
  1727. if (vas == VA_STATUS_SUCCESS && attr.flags != VA_DISPLAY_ATTRIB_NOT_SUPPORTED)
  1728. dev_select.pci_device = (attr.value & 0xFFFF);
  1729. #endif
  1730. if (!dev_select.pci_device) {
  1731. vendor = vaQueryVendorString(dpy);
  1732. if (!vendor) {
  1733. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
  1734. return AVERROR_EXTERNAL;
  1735. }
  1736. if (strstr(vendor, "AMD"))
  1737. dev_select.vendor_id = 0x1002;
  1738. }
  1739. return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
  1740. }
  1741. #endif
  1742. #if CONFIG_LIBDRM
  1743. case AV_HWDEVICE_TYPE_DRM: {
  1744. int err;
  1745. struct stat drm_node_info;
  1746. drmDevice *drm_dev_info;
  1747. AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
  1748. err = fstat(src_hwctx->fd, &drm_node_info);
  1749. if (err) {
  1750. av_log(ctx, AV_LOG_ERROR, "Unable to get node info from DRM fd: %s!\n",
  1751. av_err2str(AVERROR(errno)));
  1752. return AVERROR_EXTERNAL;
  1753. }
  1754. dev_select.drm_major = major(drm_node_info.st_dev);
  1755. dev_select.drm_minor = minor(drm_node_info.st_dev);
  1756. dev_select.has_drm = 1;
  1757. err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
  1758. if (err) {
  1759. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd: %s!\n",
  1760. av_err2str(AVERROR(errno)));
  1761. return AVERROR_EXTERNAL;
  1762. }
  1763. if (drm_dev_info->bustype == DRM_BUS_PCI)
  1764. dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
  1765. drmFreeDevice(&drm_dev_info);
  1766. return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
  1767. }
  1768. #endif
  1769. #if CONFIG_CUDA
  1770. case AV_HWDEVICE_TYPE_CUDA: {
  1771. AVHWDeviceContext *cuda_cu = src_ctx;
  1772. AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
  1773. AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
  1774. CudaFunctions *cu = cu_internal->cuda_dl;
  1775. int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
  1776. cu_internal->cuda_device));
  1777. if (ret < 0) {
  1778. av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
  1779. return AVERROR_EXTERNAL;
  1780. }
  1781. dev_select.has_uuid = 1;
  1782. /*
  1783. * CUDA is not able to import multiplane images, so always derive a
  1784. * Vulkan device with multiplane disabled.
  1785. */
  1786. return vulkan_device_create_internal(ctx, &dev_select, 1, opts, flags);
  1787. }
  1788. #endif
  1789. default:
  1790. return AVERROR(ENOSYS);
  1791. }
  1792. }
  1793. static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
  1794. const void *hwconfig,
  1795. AVHWFramesConstraints *constraints)
  1796. {
  1797. int count = 0;
  1798. VulkanDevicePriv *p = ctx->hwctx;
  1799. for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
  1800. count += vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
  1801. p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
  1802. VK_IMAGE_TILING_OPTIMAL,
  1803. NULL, NULL, NULL, NULL, p->disable_multiplane, 1) >= 0;
  1804. }
  1805. constraints->valid_sw_formats = av_malloc_array(count + 1,
  1806. sizeof(enum AVPixelFormat));
  1807. if (!constraints->valid_sw_formats)
  1808. return AVERROR(ENOMEM);
  1809. count = 0;
  1810. for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
  1811. if (vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
  1812. p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
  1813. VK_IMAGE_TILING_OPTIMAL,
  1814. NULL, NULL, NULL, NULL, p->disable_multiplane, 1) >= 0) {
  1815. constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt;
  1816. }
  1817. }
  1818. constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
  1819. constraints->min_width = 1;
  1820. constraints->min_height = 1;
  1821. constraints->max_width = p->props.properties.limits.maxImageDimension2D;
  1822. constraints->max_height = p->props.properties.limits.maxImageDimension2D;
  1823. constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
  1824. if (!constraints->valid_hw_formats)
  1825. return AVERROR(ENOMEM);
  1826. constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
  1827. constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
  1828. return 0;
  1829. }
  1830. static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
  1831. VkMemoryPropertyFlagBits req_flags, const void *alloc_extension,
  1832. VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
  1833. {
  1834. VkResult ret;
  1835. int index = -1;
  1836. VulkanDevicePriv *p = ctx->hwctx;
  1837. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1838. AVVulkanDeviceContext *dev_hwctx = &p->p;
  1839. VkMemoryAllocateInfo alloc_info = {
  1840. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  1841. .pNext = alloc_extension,
  1842. .allocationSize = req->size,
  1843. };
  1844. /* The vulkan spec requires memory types to be sorted in the "optimal"
  1845. * order, so the first matching type we find will be the best/fastest one */
  1846. for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
  1847. const VkMemoryType *type = &p->mprops.memoryTypes[i];
  1848. /* The memory type must be supported by the requirements (bitfield) */
  1849. if (!(req->memoryTypeBits & (1 << i)))
  1850. continue;
  1851. /* The memory type flags must include our properties */
  1852. if ((type->propertyFlags & req_flags) != req_flags)
  1853. continue;
  1854. /* The memory type must be large enough */
  1855. if (req->size > p->mprops.memoryHeaps[type->heapIndex].size)
  1856. continue;
  1857. /* Found a suitable memory type */
  1858. index = i;
  1859. break;
  1860. }
  1861. if (index < 0) {
  1862. av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
  1863. req_flags);
  1864. return AVERROR(EINVAL);
  1865. }
  1866. alloc_info.memoryTypeIndex = index;
  1867. ret = vk->AllocateMemory(dev_hwctx->act_dev, &alloc_info,
  1868. dev_hwctx->alloc, mem);
  1869. if (ret != VK_SUCCESS) {
  1870. av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
  1871. ff_vk_ret2str(ret));
  1872. return AVERROR(ENOMEM);
  1873. }
  1874. *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
  1875. return 0;
  1876. }
  1877. static void vulkan_free_internal(AVVkFrame *f)
  1878. {
  1879. av_unused AVVkFrameInternal *internal = f->internal;
  1880. #if CONFIG_CUDA
  1881. if (internal->cuda_fc_ref) {
  1882. AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
  1883. int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
  1884. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1885. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1886. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1887. CudaFunctions *cu = cu_internal->cuda_dl;
  1888. for (int i = 0; i < planes; i++) {
  1889. if (internal->cu_sem[i])
  1890. CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
  1891. if (internal->cu_mma[i])
  1892. CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
  1893. if (internal->ext_mem[i])
  1894. CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
  1895. #ifdef _WIN32
  1896. if (internal->ext_sem_handle[i])
  1897. CloseHandle(internal->ext_sem_handle[i]);
  1898. if (internal->ext_mem_handle[i])
  1899. CloseHandle(internal->ext_mem_handle[i]);
  1900. #endif
  1901. }
  1902. av_buffer_unref(&internal->cuda_fc_ref);
  1903. }
  1904. #endif
  1905. pthread_mutex_destroy(&internal->update_mutex);
  1906. av_freep(&f->internal);
  1907. }
  1908. static void vulkan_frame_free(AVHWFramesContext *hwfc, AVVkFrame *f)
  1909. {
  1910. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  1911. AVVulkanDeviceContext *hwctx = &p->p;
  1912. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1913. int nb_images = ff_vk_count_images(f);
  1914. int nb_sems = 0;
  1915. while (nb_sems < FF_ARRAY_ELEMS(f->sem) && f->sem[nb_sems])
  1916. nb_sems++;
  1917. if (nb_sems) {
  1918. VkSemaphoreWaitInfo sem_wait = {
  1919. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
  1920. .flags = 0x0,
  1921. .pSemaphores = f->sem,
  1922. .pValues = f->sem_value,
  1923. .semaphoreCount = nb_sems,
  1924. };
  1925. vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX);
  1926. }
  1927. vulkan_free_internal(f);
  1928. for (int i = 0; i < nb_images; i++) {
  1929. vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  1930. vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  1931. vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
  1932. }
  1933. av_free(f);
  1934. }
  1935. static void vulkan_frame_free_cb(void *opaque, uint8_t *data)
  1936. {
  1937. vulkan_frame_free(opaque, (AVVkFrame*)data);
  1938. }
  1939. static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
  1940. void *alloc_pnext, size_t alloc_pnext_stride)
  1941. {
  1942. int img_cnt = 0, err;
  1943. VkResult ret;
  1944. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1945. VulkanDevicePriv *p = ctx->hwctx;
  1946. AVVulkanDeviceContext *hwctx = &p->p;
  1947. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1948. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
  1949. while (f->img[img_cnt]) {
  1950. int use_ded_mem;
  1951. VkImageMemoryRequirementsInfo2 req_desc = {
  1952. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  1953. .image = f->img[img_cnt],
  1954. };
  1955. VkMemoryDedicatedAllocateInfo ded_alloc = {
  1956. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  1957. .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride),
  1958. };
  1959. VkMemoryDedicatedRequirements ded_req = {
  1960. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  1961. };
  1962. VkMemoryRequirements2 req = {
  1963. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  1964. .pNext = &ded_req,
  1965. };
  1966. vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
  1967. if (f->tiling == VK_IMAGE_TILING_LINEAR)
  1968. req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
  1969. p->props.properties.limits.minMemoryMapAlignment);
  1970. /* In case the implementation prefers/requires dedicated allocation */
  1971. use_ded_mem = ded_req.prefersDedicatedAllocation |
  1972. ded_req.requiresDedicatedAllocation;
  1973. if (use_ded_mem)
  1974. ded_alloc.image = f->img[img_cnt];
  1975. /* Allocate memory */
  1976. if ((err = alloc_mem(ctx, &req.memoryRequirements,
  1977. f->tiling == VK_IMAGE_TILING_LINEAR ?
  1978. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
  1979. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1980. use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
  1981. &f->flags, &f->mem[img_cnt])))
  1982. return err;
  1983. f->size[img_cnt] = req.memoryRequirements.size;
  1984. bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1985. bind_info[img_cnt].image = f->img[img_cnt];
  1986. bind_info[img_cnt].memory = f->mem[img_cnt];
  1987. img_cnt++;
  1988. }
  1989. /* Bind the allocated memory to the images */
  1990. ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info);
  1991. if (ret != VK_SUCCESS) {
  1992. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1993. ff_vk_ret2str(ret));
  1994. return AVERROR_EXTERNAL;
  1995. }
  1996. return 0;
  1997. }
  1998. enum PrepMode {
  1999. PREP_MODE_GENERAL,
  2000. PREP_MODE_WRITE,
  2001. PREP_MODE_EXTERNAL_EXPORT,
  2002. PREP_MODE_EXTERNAL_IMPORT,
  2003. PREP_MODE_DECODING_DST,
  2004. PREP_MODE_DECODING_DPB,
  2005. PREP_MODE_ENCODING_DPB,
  2006. };
  2007. static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx,
  2008. AVVkFrame *frame, enum PrepMode pmode)
  2009. {
  2010. int err;
  2011. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2012. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2013. VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
  2014. int nb_img_bar = 0;
  2015. uint32_t dst_qf = VK_QUEUE_FAMILY_IGNORED;
  2016. VkImageLayout new_layout;
  2017. VkAccessFlags2 new_access;
  2018. VkPipelineStageFlagBits2 src_stage = VK_PIPELINE_STAGE_2_NONE;
  2019. /* This is dirty - but it works. The vulkan.c dependency system doesn't
  2020. * free non-refcounted frames, and non-refcounted hardware frames cannot
  2021. * happen anywhere outside of here. */
  2022. AVBufferRef tmp_ref = {
  2023. .data = (uint8_t *)hwfc,
  2024. };
  2025. AVFrame tmp_frame = {
  2026. .data[0] = (uint8_t *)frame,
  2027. .hw_frames_ctx = &tmp_ref,
  2028. };
  2029. VkCommandBuffer cmd_buf;
  2030. FFVkExecContext *exec = ff_vk_exec_get(&p->vkctx, ectx);
  2031. cmd_buf = exec->buf;
  2032. ff_vk_exec_start(&p->vkctx, exec);
  2033. err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, &tmp_frame,
  2034. VK_PIPELINE_STAGE_2_NONE,
  2035. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT);
  2036. if (err < 0)
  2037. return err;
  2038. switch (pmode) {
  2039. case PREP_MODE_GENERAL:
  2040. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  2041. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  2042. break;
  2043. case PREP_MODE_WRITE:
  2044. new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  2045. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  2046. break;
  2047. case PREP_MODE_EXTERNAL_IMPORT:
  2048. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  2049. new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
  2050. break;
  2051. case PREP_MODE_EXTERNAL_EXPORT:
  2052. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  2053. new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
  2054. dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
  2055. src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
  2056. break;
  2057. case PREP_MODE_DECODING_DST:
  2058. new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
  2059. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  2060. break;
  2061. case PREP_MODE_DECODING_DPB:
  2062. new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
  2063. new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
  2064. break;
  2065. case PREP_MODE_ENCODING_DPB:
  2066. new_layout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR;
  2067. new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
  2068. break;
  2069. }
  2070. ff_vk_frame_barrier(&p->vkctx, exec, &tmp_frame, img_bar, &nb_img_bar,
  2071. src_stage,
  2072. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  2073. new_access, new_layout, dst_qf);
  2074. vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
  2075. .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
  2076. .pImageMemoryBarriers = img_bar,
  2077. .imageMemoryBarrierCount = nb_img_bar,
  2078. });
  2079. err = ff_vk_exec_submit(&p->vkctx, exec);
  2080. if (err < 0)
  2081. return err;
  2082. /* We can do this because there are no real dependencies */
  2083. ff_vk_exec_discard_deps(&p->vkctx, exec);
  2084. return 0;
  2085. }
  2086. static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format,
  2087. int frame_w, int frame_h, int plane)
  2088. {
  2089. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
  2090. /* Currently always true unless gray + alpha support is added */
  2091. if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB ||
  2092. !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) {
  2093. *w = frame_w;
  2094. *h = frame_h;
  2095. return;
  2096. }
  2097. *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w);
  2098. *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h);
  2099. }
  2100. static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
  2101. VkImageTiling tiling, VkImageUsageFlagBits usage,
  2102. VkImageCreateFlags flags, int nb_layers,
  2103. void *create_pnext)
  2104. {
  2105. int err;
  2106. VkResult ret;
  2107. AVVulkanFramesContext *hwfc_vk = hwfc->hwctx;
  2108. AVHWDeviceContext *ctx = hwfc->device_ctx;
  2109. VulkanDevicePriv *p = ctx->hwctx;
  2110. AVVulkanDeviceContext *hwctx = &p->p;
  2111. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2112. AVVkFrame *f;
  2113. VkSemaphoreTypeCreateInfo sem_type_info = {
  2114. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
  2115. .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
  2116. .initialValue = 0,
  2117. };
  2118. VkSemaphoreCreateInfo sem_spawn = {
  2119. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  2120. .pNext = &sem_type_info,
  2121. };
  2122. VkExportSemaphoreCreateInfo ext_sem_info_opaque = {
  2123. .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
  2124. #ifdef _WIN32
  2125. .handleTypes = IsWindows8OrGreater()
  2126. ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
  2127. : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
  2128. #else
  2129. .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  2130. #endif
  2131. };
  2132. /* Check if exporting is supported before chaining any structs */
  2133. if (p->ext_sem_props_opaque.externalSemaphoreFeatures & VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT) {
  2134. if (p->vkctx.extensions & (FF_VK_EXT_EXTERNAL_WIN32_SEM | FF_VK_EXT_EXTERNAL_FD_SEM))
  2135. ff_vk_link_struct(&sem_type_info, &ext_sem_info_opaque);
  2136. }
  2137. f = av_vk_frame_alloc();
  2138. if (!f) {
  2139. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  2140. return AVERROR(ENOMEM);
  2141. }
  2142. // TODO: check witdh and height for alignment in case of multiplanar (must be mod-2 if subsampled)
  2143. /* Create the images */
  2144. for (int i = 0; (hwfc_vk->format[i] != VK_FORMAT_UNDEFINED); i++) {
  2145. VkImageCreateInfo create_info = {
  2146. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  2147. .pNext = create_pnext,
  2148. .imageType = VK_IMAGE_TYPE_2D,
  2149. .format = hwfc_vk->format[i],
  2150. .extent.depth = 1,
  2151. .mipLevels = 1,
  2152. .arrayLayers = nb_layers,
  2153. .flags = flags,
  2154. .tiling = tiling,
  2155. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
  2156. .usage = usage,
  2157. .samples = VK_SAMPLE_COUNT_1_BIT,
  2158. .pQueueFamilyIndices = p->img_qfs,
  2159. .queueFamilyIndexCount = p->nb_img_qfs,
  2160. .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  2161. VK_SHARING_MODE_EXCLUSIVE,
  2162. };
  2163. get_plane_wh(&create_info.extent.width, &create_info.extent.height,
  2164. hwfc->sw_format, hwfc->width, hwfc->height, i);
  2165. ret = vk->CreateImage(hwctx->act_dev, &create_info,
  2166. hwctx->alloc, &f->img[i]);
  2167. if (ret != VK_SUCCESS) {
  2168. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  2169. ff_vk_ret2str(ret));
  2170. err = AVERROR(EINVAL);
  2171. goto fail;
  2172. }
  2173. /* Create semaphore */
  2174. ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
  2175. hwctx->alloc, &f->sem[i]);
  2176. if (ret != VK_SUCCESS) {
  2177. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  2178. ff_vk_ret2str(ret));
  2179. err = AVERROR_EXTERNAL;
  2180. goto fail;
  2181. }
  2182. f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
  2183. f->layout[i] = create_info.initialLayout;
  2184. f->access[i] = 0x0;
  2185. f->sem_value[i] = 0;
  2186. }
  2187. f->flags = 0x0;
  2188. f->tiling = tiling;
  2189. *frame = f;
  2190. return 0;
  2191. fail:
  2192. vulkan_frame_free(hwfc, f);
  2193. return err;
  2194. }
  2195. /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
  2196. static void try_export_flags(AVHWFramesContext *hwfc,
  2197. VkExternalMemoryHandleTypeFlags *comp_handle_types,
  2198. VkExternalMemoryHandleTypeFlagBits *iexp,
  2199. VkExternalMemoryHandleTypeFlagBits exp)
  2200. {
  2201. VkResult ret;
  2202. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  2203. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2204. AVVulkanDeviceContext *dev_hwctx = &p->p;
  2205. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2206. const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info =
  2207. ff_vk_find_struct(hwctx->create_pnext,
  2208. VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
  2209. int has_mods = hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info;
  2210. int nb_mods;
  2211. VkExternalImageFormatProperties eprops = {
  2212. .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
  2213. };
  2214. VkImageFormatProperties2 props = {
  2215. .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
  2216. .pNext = &eprops,
  2217. };
  2218. VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
  2219. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
  2220. .pNext = NULL,
  2221. .pQueueFamilyIndices = p->img_qfs,
  2222. .queueFamilyIndexCount = p->nb_img_qfs,
  2223. .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  2224. VK_SHARING_MODE_EXCLUSIVE,
  2225. };
  2226. VkPhysicalDeviceExternalImageFormatInfo enext = {
  2227. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
  2228. .handleType = exp,
  2229. .pNext = has_mods ? &phy_dev_mod_info : NULL,
  2230. };
  2231. VkPhysicalDeviceImageFormatInfo2 pinfo = {
  2232. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
  2233. .pNext = !exp ? NULL : &enext,
  2234. .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
  2235. .type = VK_IMAGE_TYPE_2D,
  2236. .tiling = hwctx->tiling,
  2237. .usage = hwctx->usage,
  2238. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  2239. };
  2240. nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1;
  2241. for (int i = 0; i < nb_mods; i++) {
  2242. if (has_mods)
  2243. phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[i];
  2244. ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
  2245. &pinfo, &props);
  2246. if (ret == VK_SUCCESS) {
  2247. *iexp |= exp;
  2248. *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
  2249. }
  2250. }
  2251. }
  2252. static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
  2253. {
  2254. int err;
  2255. AVVkFrame *f;
  2256. AVBufferRef *avbuf = NULL;
  2257. AVHWFramesContext *hwfc = opaque;
  2258. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2259. VulkanFramesPriv *fp = hwfc->hwctx;
  2260. AVVulkanFramesContext *hwctx = &fp->p;
  2261. VkExternalMemoryHandleTypeFlags e = 0x0;
  2262. VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
  2263. VkExternalMemoryImageCreateInfo eiinfo = {
  2264. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  2265. .pNext = hwctx->create_pnext,
  2266. };
  2267. #ifdef _WIN32
  2268. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
  2269. try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater()
  2270. ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
  2271. : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
  2272. #else
  2273. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
  2274. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  2275. VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
  2276. #endif
  2277. for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
  2278. eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
  2279. eminfo[i].pNext = hwctx->alloc_pnext[i];
  2280. eminfo[i].handleTypes = e;
  2281. }
  2282. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
  2283. hwctx->nb_layers,
  2284. eiinfo.handleTypes ? &eiinfo : hwctx->create_pnext);
  2285. if (err)
  2286. return NULL;
  2287. err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
  2288. if (err)
  2289. goto fail;
  2290. if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
  2291. !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
  2292. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DPB);
  2293. else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)
  2294. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DST);
  2295. else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)
  2296. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_ENCODING_DPB);
  2297. else if (hwctx->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT)
  2298. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_WRITE);
  2299. else
  2300. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_GENERAL);
  2301. if (err)
  2302. goto fail;
  2303. avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
  2304. vulkan_frame_free_cb, hwfc, 0);
  2305. if (!avbuf)
  2306. goto fail;
  2307. return avbuf;
  2308. fail:
  2309. vulkan_frame_free(hwfc, f);
  2310. return NULL;
  2311. }
  2312. static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
  2313. {
  2314. pthread_mutex_lock(&vkf->internal->update_mutex);
  2315. }
  2316. static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
  2317. {
  2318. pthread_mutex_unlock(&vkf->internal->update_mutex);
  2319. }
  2320. static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
  2321. {
  2322. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2323. VulkanFramesPriv *fp = hwfc->hwctx;
  2324. if (fp->modifier_info) {
  2325. if (fp->modifier_info->pDrmFormatModifiers)
  2326. av_freep(&fp->modifier_info->pDrmFormatModifiers);
  2327. av_freep(&fp->modifier_info);
  2328. }
  2329. ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec);
  2330. ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec);
  2331. ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec);
  2332. av_buffer_pool_uninit(&fp->tmp);
  2333. }
  2334. static int vulkan_frames_init(AVHWFramesContext *hwfc)
  2335. {
  2336. int err;
  2337. AVVkFrame *f;
  2338. VulkanFramesPriv *fp = hwfc->hwctx;
  2339. AVVulkanFramesContext *hwctx = &fp->p;
  2340. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2341. VkImageUsageFlagBits supported_usage;
  2342. const struct FFVkFormatEntry *fmt;
  2343. int disable_multiplane = p->disable_multiplane ||
  2344. (hwctx->flags & AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE);
  2345. /* Defaults */
  2346. if (!hwctx->nb_layers)
  2347. hwctx->nb_layers = 1;
  2348. /* VK_IMAGE_TILING_OPTIMAL == 0, can't check for it really */
  2349. if (p->use_linear_images &&
  2350. (hwctx->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT))
  2351. hwctx->tiling = VK_IMAGE_TILING_LINEAR;
  2352. fmt = vk_find_format_entry(hwfc->sw_format);
  2353. if (!fmt) {
  2354. av_log(hwfc, AV_LOG_ERROR, "Unsupported pixel format: %s!\n",
  2355. av_get_pix_fmt_name(hwfc->sw_format));
  2356. return AVERROR(EINVAL);
  2357. }
  2358. if (hwctx->format[0] != VK_FORMAT_UNDEFINED) {
  2359. if (hwctx->format[0] != fmt->vkf) {
  2360. for (int i = 0; i < fmt->nb_images_fallback; i++) {
  2361. if (hwctx->format[i] != fmt->fallback[i]) {
  2362. av_log(hwfc, AV_LOG_ERROR, "Incompatible Vulkan format given "
  2363. "for the current sw_format %s!\n",
  2364. av_get_pix_fmt_name(hwfc->sw_format));
  2365. return AVERROR(EINVAL);
  2366. }
  2367. }
  2368. }
  2369. /* Check if the sw_format itself is supported */
  2370. err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format,
  2371. hwctx->tiling, NULL,
  2372. NULL, NULL, &supported_usage, 0,
  2373. !hwctx->usage ||
  2374. (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT));
  2375. if (err < 0) {
  2376. av_log(hwfc, AV_LOG_ERROR, "Unsupported sw format: %s!\n",
  2377. av_get_pix_fmt_name(hwfc->sw_format));
  2378. return AVERROR(EINVAL);
  2379. }
  2380. } else {
  2381. err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format,
  2382. hwctx->tiling, hwctx->format, NULL,
  2383. NULL, &supported_usage,
  2384. disable_multiplane,
  2385. !hwctx->usage ||
  2386. (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT));
  2387. if (err < 0)
  2388. return err;
  2389. }
  2390. /* Image usage flags */
  2391. if (!hwctx->usage) {
  2392. hwctx->usage = supported_usage & (VK_BUFFER_USAGE_TRANSFER_DST_BIT |
  2393. VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
  2394. VK_IMAGE_USAGE_STORAGE_BIT |
  2395. VK_IMAGE_USAGE_SAMPLED_BIT);
  2396. /* Enables encoding of images, if supported by format and extensions */
  2397. if ((supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) &&
  2398. (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
  2399. FF_VK_EXT_VIDEO_MAINTENANCE_1)))
  2400. hwctx->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
  2401. }
  2402. /* Image creation flags.
  2403. * Only fill them in automatically if the image is not going to be used as
  2404. * a DPB-only image, and we have SAMPLED/STORAGE bits set. */
  2405. if (!hwctx->img_flags) {
  2406. int is_lone_dpb = ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) ||
  2407. ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
  2408. !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)));
  2409. int sampleable = hwctx->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
  2410. VK_IMAGE_USAGE_STORAGE_BIT);
  2411. hwctx->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
  2412. if (sampleable && !is_lone_dpb) {
  2413. hwctx->img_flags |= VK_IMAGE_CREATE_ALIAS_BIT;
  2414. if ((fmt->vk_planes > 1) && (hwctx->format[0] == fmt->vkf))
  2415. hwctx->img_flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
  2416. }
  2417. }
  2418. /* If the image has an ENCODE_SRC usage, and the maintenance1
  2419. * extension is supported, check if it has a profile list.
  2420. * If there's no profile list, or it has no encode operations,
  2421. * then allow creating the image with no specific profile. */
  2422. if ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) &&
  2423. (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
  2424. FF_VK_EXT_VIDEO_MAINTENANCE_1))) {
  2425. const VkVideoProfileListInfoKHR *pl;
  2426. pl = ff_vk_find_struct(hwctx->create_pnext, VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR);
  2427. if (!pl) {
  2428. hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR;
  2429. } else {
  2430. uint32_t i;
  2431. for (i = 0; i < pl->profileCount; i++) {
  2432. /* Video ops start at exactly 0x00010000 */
  2433. if (pl->pProfiles[i].videoCodecOperation & 0xFFFF0000)
  2434. break;
  2435. }
  2436. if (i == pl->profileCount)
  2437. hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR;
  2438. }
  2439. }
  2440. if (!hwctx->lock_frame)
  2441. hwctx->lock_frame = lock_frame;
  2442. if (!hwctx->unlock_frame)
  2443. hwctx->unlock_frame = unlock_frame;
  2444. err = ff_vk_exec_pool_init(&p->vkctx, p->compute_qf, &fp->compute_exec,
  2445. p->compute_qf->num, 0, 0, 0, NULL);
  2446. if (err)
  2447. return err;
  2448. err = ff_vk_exec_pool_init(&p->vkctx, p->transfer_qf, &fp->upload_exec,
  2449. p->transfer_qf->num*2, 0, 0, 0, NULL);
  2450. if (err)
  2451. return err;
  2452. err = ff_vk_exec_pool_init(&p->vkctx, p->transfer_qf, &fp->download_exec,
  2453. p->transfer_qf->num, 0, 0, 0, NULL);
  2454. if (err)
  2455. return err;
  2456. /* Test to see if allocation will fail */
  2457. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
  2458. hwctx->nb_layers, hwctx->create_pnext);
  2459. if (err)
  2460. return err;
  2461. vulkan_frame_free(hwfc, f);
  2462. /* If user did not specify a pool, hwfc->pool will be set to the internal one
  2463. * in hwcontext.c just after this gets called */
  2464. if (!hwfc->pool) {
  2465. ffhwframesctx(hwfc)->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
  2466. hwfc, vulkan_pool_alloc,
  2467. NULL);
  2468. if (!ffhwframesctx(hwfc)->pool_internal)
  2469. return AVERROR(ENOMEM);
  2470. }
  2471. return 0;
  2472. }
  2473. static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
  2474. {
  2475. frame->buf[0] = av_buffer_pool_get(hwfc->pool);
  2476. if (!frame->buf[0])
  2477. return AVERROR(ENOMEM);
  2478. frame->data[0] = frame->buf[0]->data;
  2479. frame->format = AV_PIX_FMT_VULKAN;
  2480. frame->width = hwfc->width;
  2481. frame->height = hwfc->height;
  2482. return 0;
  2483. }
  2484. static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
  2485. enum AVHWFrameTransferDirection dir,
  2486. enum AVPixelFormat **formats)
  2487. {
  2488. enum AVPixelFormat *fmts;
  2489. int n = 2;
  2490. #if CONFIG_CUDA
  2491. n++;
  2492. #endif
  2493. fmts = av_malloc_array(n, sizeof(*fmts));
  2494. if (!fmts)
  2495. return AVERROR(ENOMEM);
  2496. n = 0;
  2497. fmts[n++] = hwfc->sw_format;
  2498. #if CONFIG_CUDA
  2499. fmts[n++] = AV_PIX_FMT_CUDA;
  2500. #endif
  2501. fmts[n++] = AV_PIX_FMT_NONE;
  2502. *formats = fmts;
  2503. return 0;
  2504. }
  2505. #if CONFIG_LIBDRM
  2506. static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  2507. {
  2508. vulkan_frame_free(hwfc, hwmap->priv);
  2509. }
  2510. static const struct {
  2511. uint32_t drm_fourcc;
  2512. VkFormat vk_format;
  2513. } vulkan_drm_format_map[] = {
  2514. { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
  2515. { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
  2516. { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
  2517. { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
  2518. { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
  2519. { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
  2520. { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  2521. { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  2522. { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  2523. { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  2524. { DRM_FORMAT_ARGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 },
  2525. { DRM_FORMAT_ABGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 },
  2526. { DRM_FORMAT_XRGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 },
  2527. { DRM_FORMAT_XBGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 },
  2528. // All these DRM_FORMATs were added in the same libdrm commit.
  2529. #ifdef DRM_FORMAT_XYUV8888
  2530. { DRM_FORMAT_XYUV8888, VK_FORMAT_R8G8B8A8_UNORM },
  2531. { DRM_FORMAT_XVYU2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 } ,
  2532. { DRM_FORMAT_XVYU12_16161616, VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16 } ,
  2533. { DRM_FORMAT_XVYU16161616, VK_FORMAT_R16G16B16A16_UNORM } ,
  2534. #endif
  2535. };
  2536. static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
  2537. {
  2538. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  2539. if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
  2540. return vulkan_drm_format_map[i].vk_format;
  2541. return VK_FORMAT_UNDEFINED;
  2542. }
  2543. static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
  2544. const AVFrame *src, int flags)
  2545. {
  2546. int err = 0;
  2547. VkResult ret;
  2548. AVVkFrame *f;
  2549. int bind_counts = 0;
  2550. AVHWDeviceContext *ctx = hwfc->device_ctx;
  2551. VulkanDevicePriv *p = ctx->hwctx;
  2552. AVVulkanDeviceContext *hwctx = &p->p;
  2553. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2554. const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
  2555. VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES];
  2556. VkBindImagePlaneMemoryInfo plane_info[AV_DRM_MAX_PLANES];
  2557. for (int i = 0; i < desc->nb_layers; i++) {
  2558. if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
  2559. av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
  2560. desc->layers[i].format);
  2561. return AVERROR(EINVAL);
  2562. }
  2563. }
  2564. if (!(f = av_vk_frame_alloc())) {
  2565. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  2566. err = AVERROR(ENOMEM);
  2567. goto fail;
  2568. }
  2569. f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
  2570. for (int i = 0; i < desc->nb_layers; i++) {
  2571. const int planes = desc->layers[i].nb_planes;
  2572. /* Semaphore */
  2573. VkSemaphoreTypeCreateInfo sem_type_info = {
  2574. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
  2575. .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
  2576. .initialValue = 0,
  2577. };
  2578. VkSemaphoreCreateInfo sem_spawn = {
  2579. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  2580. .pNext = &sem_type_info,
  2581. };
  2582. /* Image creation */
  2583. VkSubresourceLayout ext_img_layouts[AV_DRM_MAX_PLANES];
  2584. VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = {
  2585. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
  2586. .drmFormatModifier = desc->objects[0].format_modifier,
  2587. .drmFormatModifierPlaneCount = planes,
  2588. .pPlaneLayouts = (const VkSubresourceLayout *)&ext_img_layouts,
  2589. };
  2590. VkExternalMemoryImageCreateInfo ext_img_spec = {
  2591. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  2592. .pNext = &ext_img_mod_spec,
  2593. .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  2594. };
  2595. VkImageCreateInfo create_info = {
  2596. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  2597. .pNext = &ext_img_spec,
  2598. .imageType = VK_IMAGE_TYPE_2D,
  2599. .format = drm_to_vulkan_fmt(desc->layers[i].format),
  2600. .extent.depth = 1,
  2601. .mipLevels = 1,
  2602. .arrayLayers = 1,
  2603. .flags = 0x0,
  2604. .tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
  2605. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
  2606. .usage = 0x0, /* filled in below */
  2607. .samples = VK_SAMPLE_COUNT_1_BIT,
  2608. .pQueueFamilyIndices = p->img_qfs,
  2609. .queueFamilyIndexCount = p->nb_img_qfs,
  2610. .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  2611. VK_SHARING_MODE_EXCLUSIVE,
  2612. };
  2613. /* Image format verification */
  2614. VkExternalImageFormatProperties ext_props = {
  2615. .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
  2616. };
  2617. VkImageFormatProperties2 props_ret = {
  2618. .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
  2619. .pNext = &ext_props,
  2620. };
  2621. VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = {
  2622. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
  2623. .drmFormatModifier = ext_img_mod_spec.drmFormatModifier,
  2624. .pQueueFamilyIndices = create_info.pQueueFamilyIndices,
  2625. .queueFamilyIndexCount = create_info.queueFamilyIndexCount,
  2626. .sharingMode = create_info.sharingMode,
  2627. };
  2628. VkPhysicalDeviceExternalImageFormatInfo props_ext = {
  2629. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
  2630. .pNext = &props_drm_mod,
  2631. .handleType = ext_img_spec.handleTypes,
  2632. };
  2633. VkPhysicalDeviceImageFormatInfo2 fmt_props;
  2634. if (flags & AV_HWFRAME_MAP_READ)
  2635. create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT |
  2636. VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
  2637. if (flags & AV_HWFRAME_MAP_WRITE)
  2638. create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT |
  2639. VK_IMAGE_USAGE_TRANSFER_DST_BIT;
  2640. fmt_props = (VkPhysicalDeviceImageFormatInfo2) {
  2641. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
  2642. .pNext = &props_ext,
  2643. .format = create_info.format,
  2644. .type = create_info.imageType,
  2645. .tiling = create_info.tiling,
  2646. .usage = create_info.usage,
  2647. .flags = create_info.flags,
  2648. };
  2649. /* Check if importing is possible for this combination of parameters */
  2650. ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->phys_dev,
  2651. &fmt_props, &props_ret);
  2652. if (ret != VK_SUCCESS) {
  2653. av_log(ctx, AV_LOG_ERROR, "Cannot map DRM frame to Vulkan: %s\n",
  2654. ff_vk_ret2str(ret));
  2655. err = AVERROR_EXTERNAL;
  2656. goto fail;
  2657. }
  2658. /* Set the image width/height */
  2659. get_plane_wh(&create_info.extent.width, &create_info.extent.height,
  2660. hwfc->sw_format, src->width, src->height, i);
  2661. /* Set the subresource layout based on the layer properties */
  2662. for (int j = 0; j < planes; j++) {
  2663. ext_img_layouts[j].offset = desc->layers[i].planes[j].offset;
  2664. ext_img_layouts[j].rowPitch = desc->layers[i].planes[j].pitch;
  2665. ext_img_layouts[j].size = 0; /* The specs say so for all 3 */
  2666. ext_img_layouts[j].arrayPitch = 0;
  2667. ext_img_layouts[j].depthPitch = 0;
  2668. }
  2669. /* Create image */
  2670. ret = vk->CreateImage(hwctx->act_dev, &create_info,
  2671. hwctx->alloc, &f->img[i]);
  2672. if (ret != VK_SUCCESS) {
  2673. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  2674. ff_vk_ret2str(ret));
  2675. err = AVERROR(EINVAL);
  2676. goto fail;
  2677. }
  2678. ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
  2679. hwctx->alloc, &f->sem[i]);
  2680. if (ret != VK_SUCCESS) {
  2681. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  2682. ff_vk_ret2str(ret));
  2683. err = AVERROR_EXTERNAL;
  2684. goto fail;
  2685. }
  2686. f->queue_family[i] = VK_QUEUE_FAMILY_EXTERNAL;
  2687. f->layout[i] = create_info.initialLayout;
  2688. f->access[i] = 0x0;
  2689. f->sem_value[i] = 0;
  2690. }
  2691. for (int i = 0; i < desc->nb_layers; i++) {
  2692. /* Memory requirements */
  2693. VkImageMemoryRequirementsInfo2 req_desc = {
  2694. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  2695. .image = f->img[i],
  2696. };
  2697. VkMemoryDedicatedRequirements ded_req = {
  2698. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  2699. };
  2700. VkMemoryRequirements2 req2 = {
  2701. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  2702. .pNext = &ded_req,
  2703. };
  2704. /* Allocation/importing */
  2705. VkMemoryFdPropertiesKHR fdmp = {
  2706. .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
  2707. };
  2708. /* This assumes that a layer will never be constructed from multiple
  2709. * objects. If that was to happen in the real world, this code would
  2710. * need to import each plane separately.
  2711. */
  2712. VkImportMemoryFdInfoKHR idesc = {
  2713. .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
  2714. .fd = dup(desc->objects[desc->layers[i].planes[0].object_index].fd),
  2715. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  2716. };
  2717. VkMemoryDedicatedAllocateInfo ded_alloc = {
  2718. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  2719. .pNext = &idesc,
  2720. .image = req_desc.image,
  2721. };
  2722. /* Get object properties */
  2723. ret = vk->GetMemoryFdPropertiesKHR(hwctx->act_dev,
  2724. VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  2725. idesc.fd, &fdmp);
  2726. if (ret != VK_SUCCESS) {
  2727. av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
  2728. ff_vk_ret2str(ret));
  2729. err = AVERROR_EXTERNAL;
  2730. close(idesc.fd);
  2731. goto fail;
  2732. }
  2733. vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2);
  2734. /* Only a single bit must be set, not a range, and it must match */
  2735. req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits;
  2736. err = alloc_mem(ctx, &req2.memoryRequirements,
  2737. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  2738. (ded_req.prefersDedicatedAllocation ||
  2739. ded_req.requiresDedicatedAllocation) ?
  2740. &ded_alloc : ded_alloc.pNext,
  2741. &f->flags, &f->mem[i]);
  2742. if (err) {
  2743. close(idesc.fd);
  2744. return err;
  2745. }
  2746. f->size[i] = req2.memoryRequirements.size;
  2747. }
  2748. for (int i = 0; i < desc->nb_layers; i++) {
  2749. const int planes = desc->layers[i].nb_planes;
  2750. for (int j = 0; j < planes; j++) {
  2751. VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  2752. j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
  2753. VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
  2754. plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
  2755. plane_info[bind_counts].pNext = NULL;
  2756. plane_info[bind_counts].planeAspect = aspect;
  2757. bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  2758. bind_info[bind_counts].pNext = planes > 1 ? &plane_info[bind_counts] : NULL;
  2759. bind_info[bind_counts].image = f->img[i];
  2760. bind_info[bind_counts].memory = f->mem[i];
  2761. /* Offset is already signalled via pPlaneLayouts above */
  2762. bind_info[bind_counts].memoryOffset = 0;
  2763. bind_counts++;
  2764. }
  2765. }
  2766. /* Bind the allocated memory to the images */
  2767. ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
  2768. if (ret != VK_SUCCESS) {
  2769. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  2770. ff_vk_ret2str(ret));
  2771. err = AVERROR_EXTERNAL;
  2772. goto fail;
  2773. }
  2774. *frame = f;
  2775. return 0;
  2776. fail:
  2777. vulkan_frame_free(hwfc, f);
  2778. return err;
  2779. }
  2780. static int vulkan_map_from_drm_frame_sync(AVHWFramesContext *hwfc, AVFrame *dst,
  2781. const AVFrame *src, int flags)
  2782. {
  2783. int err;
  2784. VkResult ret;
  2785. AVHWDeviceContext *ctx = hwfc->device_ctx;
  2786. VulkanDevicePriv *p = ctx->hwctx;
  2787. VulkanFramesPriv *fp = hwfc->hwctx;
  2788. AVVulkanDeviceContext *hwctx = &p->p;
  2789. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2790. const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
  2791. #ifdef DMA_BUF_IOCTL_EXPORT_SYNC_FILE
  2792. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM) {
  2793. VkCommandBuffer cmd_buf;
  2794. FFVkExecContext *exec;
  2795. VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
  2796. VkSemaphore drm_sync_sem[AV_DRM_MAX_PLANES] = { 0 };
  2797. int nb_img_bar = 0;
  2798. for (int i = 0; i < desc->nb_objects; i++) {
  2799. VkSemaphoreTypeCreateInfo sem_type_info = {
  2800. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
  2801. .semaphoreType = VK_SEMAPHORE_TYPE_BINARY,
  2802. };
  2803. VkSemaphoreCreateInfo sem_spawn = {
  2804. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  2805. .pNext = &sem_type_info,
  2806. };
  2807. VkImportSemaphoreFdInfoKHR import_info;
  2808. struct dma_buf_export_sync_file implicit_fd_info = {
  2809. .flags = DMA_BUF_SYNC_READ,
  2810. .fd = -1,
  2811. };
  2812. if (ioctl(desc->objects[i].fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
  2813. &implicit_fd_info)) {
  2814. err = AVERROR(errno);
  2815. av_log(hwctx, AV_LOG_ERROR, "Failed to retrieve implicit DRM sync file: %s\n",
  2816. av_err2str(err));
  2817. for (; i >= 0; i--)
  2818. vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
  2819. return err;
  2820. }
  2821. ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
  2822. hwctx->alloc, &drm_sync_sem[i]);
  2823. if (ret != VK_SUCCESS) {
  2824. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  2825. ff_vk_ret2str(ret));
  2826. err = AVERROR_EXTERNAL;
  2827. for (; i >= 0; i--)
  2828. vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
  2829. return err;
  2830. }
  2831. import_info = (VkImportSemaphoreFdInfoKHR) {
  2832. .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
  2833. .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
  2834. .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
  2835. .semaphore = drm_sync_sem[i],
  2836. .fd = implicit_fd_info.fd,
  2837. };
  2838. ret = vk->ImportSemaphoreFdKHR(hwctx->act_dev, &import_info);
  2839. if (ret != VK_SUCCESS) {
  2840. av_log(hwctx, AV_LOG_ERROR, "Failed to import semaphore: %s\n",
  2841. ff_vk_ret2str(ret));
  2842. err = AVERROR_EXTERNAL;
  2843. for (; i >= 0; i--)
  2844. vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
  2845. return err;
  2846. }
  2847. }
  2848. exec = ff_vk_exec_get(&p->vkctx, &fp->compute_exec);
  2849. cmd_buf = exec->buf;
  2850. ff_vk_exec_start(&p->vkctx, exec);
  2851. /* Ownership of semaphores is passed */
  2852. err = ff_vk_exec_add_dep_bool_sem(&p->vkctx, exec,
  2853. drm_sync_sem, desc->nb_objects,
  2854. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, 1);
  2855. if (err < 0)
  2856. return err;
  2857. err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, dst,
  2858. VK_PIPELINE_STAGE_2_NONE,
  2859. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT);
  2860. if (err < 0)
  2861. return err;
  2862. ff_vk_frame_barrier(&p->vkctx, exec, dst, img_bar, &nb_img_bar,
  2863. VK_PIPELINE_STAGE_2_NONE,
  2864. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  2865. ((flags & AV_HWFRAME_MAP_READ) ?
  2866. VK_ACCESS_2_SHADER_SAMPLED_READ_BIT : 0x0) |
  2867. ((flags & AV_HWFRAME_MAP_WRITE) ?
  2868. VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT : 0x0),
  2869. VK_IMAGE_LAYOUT_GENERAL,
  2870. VK_QUEUE_FAMILY_IGNORED);
  2871. vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
  2872. .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
  2873. .pImageMemoryBarriers = img_bar,
  2874. .imageMemoryBarrierCount = nb_img_bar,
  2875. });
  2876. err = ff_vk_exec_submit(&p->vkctx, exec);
  2877. if (err < 0)
  2878. return err;
  2879. } else
  2880. #endif
  2881. {
  2882. AVVkFrame *f = (AVVkFrame *)dst->data[0];
  2883. av_log(hwctx, AV_LOG_WARNING, "No support for synchronization when importing DMA-BUFs, "
  2884. "image may be corrupted.\n");
  2885. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_IMPORT);
  2886. if (err)
  2887. return err;
  2888. }
  2889. return 0;
  2890. }
  2891. static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  2892. const AVFrame *src, int flags)
  2893. {
  2894. int err = 0;
  2895. AVVkFrame *f;
  2896. if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src, flags)))
  2897. return err;
  2898. /* The unmapping function will free this */
  2899. dst->data[0] = (uint8_t *)f;
  2900. dst->width = src->width;
  2901. dst->height = src->height;
  2902. err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
  2903. &vulkan_unmap_from_drm, f);
  2904. if (err < 0)
  2905. goto fail;
  2906. err = vulkan_map_from_drm_frame_sync(hwfc, dst, src, flags);
  2907. if (err < 0)
  2908. return err;
  2909. av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
  2910. return 0;
  2911. fail:
  2912. vulkan_frame_free(hwfc->device_ctx->hwctx, f);
  2913. dst->data[0] = NULL;
  2914. return err;
  2915. }
  2916. #if CONFIG_VAAPI
  2917. static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
  2918. AVFrame *dst, const AVFrame *src,
  2919. int flags)
  2920. {
  2921. int err;
  2922. AVFrame *tmp = av_frame_alloc();
  2923. AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  2924. AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
  2925. VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
  2926. if (!tmp)
  2927. return AVERROR(ENOMEM);
  2928. /* We have to sync since like the previous comment said, no semaphores */
  2929. vaSyncSurface(vaapi_ctx->display, surface_id);
  2930. tmp->format = AV_PIX_FMT_DRM_PRIME;
  2931. err = av_hwframe_map(tmp, src, flags);
  2932. if (err < 0)
  2933. goto fail;
  2934. err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
  2935. if (err < 0)
  2936. goto fail;
  2937. err = ff_hwframe_map_replace(dst, src);
  2938. fail:
  2939. av_frame_free(&tmp);
  2940. return err;
  2941. }
  2942. #endif
  2943. #endif
  2944. #if CONFIG_CUDA
  2945. static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
  2946. AVBufferRef *cuda_hwfc,
  2947. const AVFrame *frame)
  2948. {
  2949. int err;
  2950. VkResult ret;
  2951. AVVkFrame *dst_f;
  2952. AVVkFrameInternal *dst_int;
  2953. AVHWDeviceContext *ctx = hwfc->device_ctx;
  2954. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2955. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  2956. VulkanDevicePriv *p = ctx->hwctx;
  2957. AVVulkanDeviceContext *hwctx = &p->p;
  2958. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2959. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
  2960. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  2961. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  2962. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  2963. CudaFunctions *cu = cu_internal->cuda_dl;
  2964. CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
  2965. CU_AD_FORMAT_UNSIGNED_INT8;
  2966. dst_f = (AVVkFrame *)frame->data[0];
  2967. dst_int = dst_f->internal;
  2968. if (!dst_int->cuda_fc_ref) {
  2969. dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
  2970. if (!dst_int->cuda_fc_ref)
  2971. return AVERROR(ENOMEM);
  2972. for (int i = 0; i < planes; i++) {
  2973. CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
  2974. .offset = 0,
  2975. .arrayDesc = {
  2976. .Depth = 0,
  2977. .Format = cufmt,
  2978. .NumChannels = 1 + ((planes == 2) && i),
  2979. .Flags = 0,
  2980. },
  2981. .numLevels = 1,
  2982. };
  2983. int p_w, p_h;
  2984. #ifdef _WIN32
  2985. CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
  2986. .type = IsWindows8OrGreater()
  2987. ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
  2988. : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT,
  2989. .size = dst_f->size[i],
  2990. };
  2991. VkMemoryGetWin32HandleInfoKHR export_info = {
  2992. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
  2993. .memory = dst_f->mem[i],
  2994. .handleType = IsWindows8OrGreater()
  2995. ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
  2996. : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
  2997. };
  2998. VkSemaphoreGetWin32HandleInfoKHR sem_export = {
  2999. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
  3000. .semaphore = dst_f->sem[i],
  3001. .handleType = IsWindows8OrGreater()
  3002. ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
  3003. : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
  3004. };
  3005. CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
  3006. .type = 10 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 */,
  3007. };
  3008. ret = vk->GetMemoryWin32HandleKHR(hwctx->act_dev, &export_info,
  3009. &ext_desc.handle.win32.handle);
  3010. if (ret != VK_SUCCESS) {
  3011. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a Win32 Handle: %s!\n",
  3012. ff_vk_ret2str(ret));
  3013. err = AVERROR_EXTERNAL;
  3014. goto fail;
  3015. }
  3016. dst_int->ext_mem_handle[i] = ext_desc.handle.win32.handle;
  3017. #else
  3018. CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
  3019. .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
  3020. .size = dst_f->size[i],
  3021. };
  3022. VkMemoryGetFdInfoKHR export_info = {
  3023. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  3024. .memory = dst_f->mem[i],
  3025. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
  3026. };
  3027. VkSemaphoreGetFdInfoKHR sem_export = {
  3028. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
  3029. .semaphore = dst_f->sem[i],
  3030. .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  3031. };
  3032. CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
  3033. .type = 9 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD */,
  3034. };
  3035. ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
  3036. &ext_desc.handle.fd);
  3037. if (ret != VK_SUCCESS) {
  3038. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD: %s!\n",
  3039. ff_vk_ret2str(ret));
  3040. err = AVERROR_EXTERNAL;
  3041. goto fail;
  3042. }
  3043. #endif
  3044. ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
  3045. if (ret < 0) {
  3046. #ifndef _WIN32
  3047. close(ext_desc.handle.fd);
  3048. #endif
  3049. err = AVERROR_EXTERNAL;
  3050. goto fail;
  3051. }
  3052. get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  3053. tex_desc.arrayDesc.Width = p_w;
  3054. tex_desc.arrayDesc.Height = p_h;
  3055. ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
  3056. dst_int->ext_mem[i],
  3057. &tex_desc));
  3058. if (ret < 0) {
  3059. err = AVERROR_EXTERNAL;
  3060. goto fail;
  3061. }
  3062. ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
  3063. dst_int->cu_mma[i], 0));
  3064. if (ret < 0) {
  3065. err = AVERROR_EXTERNAL;
  3066. goto fail;
  3067. }
  3068. #ifdef _WIN32
  3069. ret = vk->GetSemaphoreWin32HandleKHR(hwctx->act_dev, &sem_export,
  3070. &ext_sem_desc.handle.win32.handle);
  3071. #else
  3072. ret = vk->GetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
  3073. &ext_sem_desc.handle.fd);
  3074. #endif
  3075. if (ret != VK_SUCCESS) {
  3076. av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
  3077. ff_vk_ret2str(ret));
  3078. err = AVERROR_EXTERNAL;
  3079. goto fail;
  3080. }
  3081. #ifdef _WIN32
  3082. dst_int->ext_sem_handle[i] = ext_sem_desc.handle.win32.handle;
  3083. #endif
  3084. ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
  3085. &ext_sem_desc));
  3086. if (ret < 0) {
  3087. #ifndef _WIN32
  3088. close(ext_sem_desc.handle.fd);
  3089. #endif
  3090. err = AVERROR_EXTERNAL;
  3091. goto fail;
  3092. }
  3093. }
  3094. }
  3095. return 0;
  3096. fail:
  3097. vulkan_free_internal(dst_f);
  3098. return err;
  3099. }
  3100. static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
  3101. AVFrame *dst, const AVFrame *src)
  3102. {
  3103. int err;
  3104. CUcontext dummy;
  3105. AVVkFrame *dst_f;
  3106. AVVkFrameInternal *dst_int;
  3107. VulkanFramesPriv *fp = hwfc->hwctx;
  3108. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  3109. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  3110. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  3111. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  3112. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  3113. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  3114. CudaFunctions *cu = cu_internal->cuda_dl;
  3115. CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
  3116. CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
  3117. dst_f = (AVVkFrame *)dst->data[0];
  3118. err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT);
  3119. if (err < 0)
  3120. return err;
  3121. err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  3122. if (err < 0)
  3123. return err;
  3124. err = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
  3125. if (err < 0) {
  3126. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3127. return err;
  3128. }
  3129. dst_int = dst_f->internal;
  3130. for (int i = 0; i < planes; i++) {
  3131. s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0;
  3132. s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
  3133. }
  3134. err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
  3135. planes, cuda_dev->stream));
  3136. if (err < 0)
  3137. goto fail;
  3138. for (int i = 0; i < planes; i++) {
  3139. CUDA_MEMCPY2D cpy = {
  3140. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  3141. .srcDevice = (CUdeviceptr)src->data[i],
  3142. .srcPitch = src->linesize[i],
  3143. .srcY = 0,
  3144. .dstMemoryType = CU_MEMORYTYPE_ARRAY,
  3145. .dstArray = dst_int->cu_array[i],
  3146. };
  3147. int p_w, p_h;
  3148. get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  3149. cpy.WidthInBytes = p_w * desc->comp[i].step;
  3150. cpy.Height = p_h;
  3151. err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  3152. if (err < 0)
  3153. goto fail;
  3154. }
  3155. err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
  3156. planes, cuda_dev->stream));
  3157. if (err < 0)
  3158. goto fail;
  3159. for (int i = 0; i < planes; i++)
  3160. dst_f->sem_value[i]++;
  3161. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3162. av_log(hwfc, AV_LOG_VERBOSE, "Transferred CUDA image to Vulkan!\n");
  3163. return err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT);
  3164. fail:
  3165. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3166. vulkan_free_internal(dst_f);
  3167. av_buffer_unref(&dst->buf[0]);
  3168. return err;
  3169. }
  3170. #endif
  3171. static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
  3172. const AVFrame *src, int flags)
  3173. {
  3174. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3175. switch (src->format) {
  3176. #if CONFIG_LIBDRM
  3177. #if CONFIG_VAAPI
  3178. case AV_PIX_FMT_VAAPI:
  3179. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3180. return vulkan_map_from_vaapi(hwfc, dst, src, flags);
  3181. else
  3182. return AVERROR(ENOSYS);
  3183. #endif
  3184. case AV_PIX_FMT_DRM_PRIME:
  3185. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3186. return vulkan_map_from_drm(hwfc, dst, src, flags);
  3187. else
  3188. return AVERROR(ENOSYS);
  3189. #endif
  3190. default:
  3191. return AVERROR(ENOSYS);
  3192. }
  3193. }
  3194. #if CONFIG_LIBDRM
  3195. typedef struct VulkanDRMMapping {
  3196. AVDRMFrameDescriptor drm_desc;
  3197. AVVkFrame *source;
  3198. } VulkanDRMMapping;
  3199. static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  3200. {
  3201. AVDRMFrameDescriptor *drm_desc = hwmap->priv;
  3202. for (int i = 0; i < drm_desc->nb_objects; i++)
  3203. close(drm_desc->objects[i].fd);
  3204. av_free(drm_desc);
  3205. }
  3206. static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
  3207. {
  3208. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  3209. if (vulkan_drm_format_map[i].vk_format == vkfmt)
  3210. return vulkan_drm_format_map[i].drm_fourcc;
  3211. return DRM_FORMAT_INVALID;
  3212. }
  3213. static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  3214. const AVFrame *src, int flags)
  3215. {
  3216. int err = 0;
  3217. VkResult ret;
  3218. AVVkFrame *f = (AVVkFrame *)src->data[0];
  3219. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3220. AVVulkanDeviceContext *hwctx = &p->p;
  3221. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3222. VulkanFramesPriv *fp = hwfc->hwctx;
  3223. AVVulkanFramesContext *hwfctx = &fp->p;
  3224. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  3225. VkImageDrmFormatModifierPropertiesEXT drm_mod = {
  3226. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
  3227. };
  3228. VkSemaphoreWaitInfo wait_info = {
  3229. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
  3230. .flags = 0x0,
  3231. .semaphoreCount = planes,
  3232. };
  3233. AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
  3234. if (!drm_desc)
  3235. return AVERROR(ENOMEM);
  3236. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_EXPORT);
  3237. if (err < 0)
  3238. goto end;
  3239. /* Wait for the operation to finish so we can cleanly export it. */
  3240. wait_info.pSemaphores = f->sem;
  3241. wait_info.pValues = f->sem_value;
  3242. vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX);
  3243. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
  3244. if (err < 0)
  3245. goto end;
  3246. ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
  3247. &drm_mod);
  3248. if (ret != VK_SUCCESS) {
  3249. av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
  3250. err = AVERROR_EXTERNAL;
  3251. goto end;
  3252. }
  3253. for (int i = 0; (i < planes) && (f->mem[i]); i++) {
  3254. VkMemoryGetFdInfoKHR export_info = {
  3255. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  3256. .memory = f->mem[i],
  3257. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  3258. };
  3259. ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
  3260. &drm_desc->objects[i].fd);
  3261. if (ret != VK_SUCCESS) {
  3262. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  3263. err = AVERROR_EXTERNAL;
  3264. goto end;
  3265. }
  3266. drm_desc->nb_objects++;
  3267. drm_desc->objects[i].size = f->size[i];
  3268. drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
  3269. }
  3270. drm_desc->nb_layers = planes;
  3271. for (int i = 0; i < drm_desc->nb_layers; i++) {
  3272. VkSubresourceLayout layout;
  3273. VkImageSubresource sub = {
  3274. .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT,
  3275. };
  3276. VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
  3277. drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt);
  3278. drm_desc->layers[i].nb_planes = 1;
  3279. if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
  3280. av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
  3281. err = AVERROR_PATCHWELCOME;
  3282. goto end;
  3283. }
  3284. drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
  3285. if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
  3286. continue;
  3287. vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  3288. drm_desc->layers[i].planes[0].offset = layout.offset;
  3289. drm_desc->layers[i].planes[0].pitch = layout.rowPitch;
  3290. if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY)
  3291. drm_desc->layers[i].planes[0].offset += f->offset[i];
  3292. }
  3293. dst->width = src->width;
  3294. dst->height = src->height;
  3295. dst->data[0] = (uint8_t *)drm_desc;
  3296. av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
  3297. return 0;
  3298. end:
  3299. av_free(drm_desc);
  3300. return err;
  3301. }
  3302. #if CONFIG_VAAPI
  3303. static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
  3304. const AVFrame *src, int flags)
  3305. {
  3306. int err;
  3307. AVFrame *tmp = av_frame_alloc();
  3308. if (!tmp)
  3309. return AVERROR(ENOMEM);
  3310. tmp->format = AV_PIX_FMT_DRM_PRIME;
  3311. err = vulkan_map_to_drm(hwfc, tmp, src, flags);
  3312. if (err < 0)
  3313. goto fail;
  3314. err = av_hwframe_map(dst, tmp, flags);
  3315. if (err < 0)
  3316. goto fail;
  3317. err = ff_hwframe_map_replace(dst, src);
  3318. fail:
  3319. av_frame_free(&tmp);
  3320. return err;
  3321. }
  3322. #endif
  3323. #endif
  3324. static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
  3325. const AVFrame *src, int flags)
  3326. {
  3327. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3328. switch (dst->format) {
  3329. #if CONFIG_LIBDRM
  3330. case AV_PIX_FMT_DRM_PRIME:
  3331. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3332. return vulkan_map_to_drm(hwfc, dst, src, flags);
  3333. else
  3334. return AVERROR(ENOSYS);
  3335. #if CONFIG_VAAPI
  3336. case AV_PIX_FMT_VAAPI:
  3337. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3338. return vulkan_map_to_vaapi(hwfc, dst, src, flags);
  3339. else
  3340. return AVERROR(ENOSYS);
  3341. #endif
  3342. #endif
  3343. default:
  3344. break;
  3345. }
  3346. return AVERROR(ENOSYS);
  3347. }
  3348. static int copy_buffer_data(AVHWFramesContext *hwfc, AVBufferRef *buf,
  3349. AVFrame *swf, VkBufferImageCopy *region,
  3350. int planes, int upload)
  3351. {
  3352. VkResult ret;
  3353. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3354. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3355. AVVulkanDeviceContext *hwctx = &p->p;
  3356. FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data;
  3357. const VkMappedMemoryRange flush_info = {
  3358. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  3359. .memory = vkbuf->mem,
  3360. .size = VK_WHOLE_SIZE,
  3361. };
  3362. if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && !upload) {
  3363. ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, 1,
  3364. &flush_info);
  3365. if (ret != VK_SUCCESS) {
  3366. av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: %s\n",
  3367. ff_vk_ret2str(ret));
  3368. return AVERROR_EXTERNAL;
  3369. }
  3370. }
  3371. for (int i = 0; i < planes; i++)
  3372. av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset,
  3373. region[i].bufferRowLength,
  3374. swf->data[i],
  3375. swf->linesize[i],
  3376. swf->linesize[i],
  3377. region[i].imageExtent.height);
  3378. if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && upload) {
  3379. ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, 1,
  3380. &flush_info);
  3381. if (ret != VK_SUCCESS) {
  3382. av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n",
  3383. ff_vk_ret2str(ret));
  3384. return AVERROR_EXTERNAL;
  3385. }
  3386. }
  3387. return 0;
  3388. }
  3389. static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst,
  3390. AVFrame *swf, VkBufferImageCopy *region, int upload)
  3391. {
  3392. int err;
  3393. VulkanFramesPriv *fp = hwfc->hwctx;
  3394. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3395. const int planes = av_pix_fmt_count_planes(swf->format);
  3396. size_t buf_offset = 0;
  3397. for (int i = 0; i < planes; i++) {
  3398. size_t size;
  3399. ptrdiff_t linesize = swf->linesize[i];
  3400. uint32_t p_w, p_h;
  3401. get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
  3402. linesize = FFALIGN(linesize,
  3403. p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
  3404. size = p_h*linesize;
  3405. region[i] = (VkBufferImageCopy) {
  3406. .bufferOffset = buf_offset,
  3407. .bufferRowLength = linesize,
  3408. .bufferImageHeight = p_h,
  3409. .imageSubresource.layerCount = 1,
  3410. .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
  3411. /* Rest of the fields adjusted/filled in later */
  3412. };
  3413. buf_offset = FFALIGN(buf_offset + size,
  3414. p->props.properties.limits.optimalBufferCopyOffsetAlignment);
  3415. }
  3416. err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst,
  3417. VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
  3418. VK_BUFFER_USAGE_TRANSFER_DST_BIT,
  3419. NULL, buf_offset,
  3420. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
  3421. VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
  3422. if (err < 0)
  3423. return err;
  3424. return 0;
  3425. }
  3426. static int create_mapped_buffer(AVHWFramesContext *hwfc,
  3427. FFVkBuffer *vkb, VkBufferUsageFlags usage,
  3428. size_t size,
  3429. VkExternalMemoryBufferCreateInfo *create_desc,
  3430. VkImportMemoryHostPointerInfoEXT *import_desc,
  3431. VkMemoryHostPointerPropertiesEXT props)
  3432. {
  3433. int err;
  3434. VkResult ret;
  3435. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3436. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3437. AVVulkanDeviceContext *hwctx = &p->p;
  3438. VkBufferCreateInfo buf_spawn = {
  3439. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  3440. .pNext = create_desc,
  3441. .usage = usage,
  3442. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  3443. .size = size,
  3444. };
  3445. VkMemoryRequirements req = {
  3446. .size = size,
  3447. .alignment = p->hprops.minImportedHostPointerAlignment,
  3448. .memoryTypeBits = props.memoryTypeBits,
  3449. };
  3450. err = ff_vk_alloc_mem(&p->vkctx, &req,
  3451. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
  3452. import_desc, &vkb->flags, &vkb->mem);
  3453. if (err < 0)
  3454. return err;
  3455. ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, hwctx->alloc, &vkb->buf);
  3456. if (ret != VK_SUCCESS) {
  3457. vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc);
  3458. return AVERROR_EXTERNAL;
  3459. }
  3460. ret = vk->BindBufferMemory(hwctx->act_dev, vkb->buf, vkb->mem, 0);
  3461. if (ret != VK_SUCCESS) {
  3462. vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc);
  3463. vk->DestroyBuffer(hwctx->act_dev, vkb->buf, hwctx->alloc);
  3464. return AVERROR_EXTERNAL;
  3465. }
  3466. return 0;
  3467. }
  3468. static void destroy_avvkbuf(void *opaque, uint8_t *data)
  3469. {
  3470. FFVulkanContext *s = opaque;
  3471. FFVkBuffer *buf = (FFVkBuffer *)data;
  3472. ff_vk_free_buf(s, buf);
  3473. av_free(buf);
  3474. }
  3475. static int host_map_frame(AVHWFramesContext *hwfc, AVBufferRef **dst, int *nb_bufs,
  3476. AVFrame *swf, VkBufferImageCopy *region, int upload)
  3477. {
  3478. int err;
  3479. VkResult ret;
  3480. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3481. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3482. AVVulkanDeviceContext *hwctx = &p->p;
  3483. const int planes = av_pix_fmt_count_planes(swf->format);
  3484. VkExternalMemoryBufferCreateInfo create_desc = {
  3485. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
  3486. .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
  3487. };
  3488. VkImportMemoryHostPointerInfoEXT import_desc = {
  3489. .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
  3490. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
  3491. };
  3492. VkMemoryHostPointerPropertiesEXT props;
  3493. for (int i = 0; i < planes; i++) {
  3494. FFVkBuffer *vkb;
  3495. uint32_t p_w, p_h;
  3496. size_t offs;
  3497. size_t buffer_size;
  3498. /* We can't host map images with negative strides */
  3499. if (swf->linesize[i] < 0) {
  3500. err = AVERROR(EINVAL);
  3501. goto fail;
  3502. }
  3503. get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
  3504. /* Get the previous point at which mapping was possible and use it */
  3505. offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment;
  3506. import_desc.pHostPointer = swf->data[i] - offs;
  3507. props = (VkMemoryHostPointerPropertiesEXT) {
  3508. VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
  3509. };
  3510. ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
  3511. import_desc.handleType,
  3512. import_desc.pHostPointer,
  3513. &props);
  3514. if (!(ret == VK_SUCCESS && props.memoryTypeBits)) {
  3515. err = AVERROR(EINVAL);
  3516. goto fail;
  3517. }
  3518. /* Buffer region for this plane */
  3519. region[i] = (VkBufferImageCopy) {
  3520. .bufferOffset = offs,
  3521. .bufferRowLength = swf->linesize[i],
  3522. .bufferImageHeight = p_h,
  3523. .imageSubresource.layerCount = 1,
  3524. .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
  3525. /* Rest of the fields adjusted/filled in later */
  3526. };
  3527. /* Add the offset at the start, which gets ignored */
  3528. buffer_size = offs + swf->linesize[i]*p_h;
  3529. buffer_size = FFALIGN(buffer_size, p->props.properties.limits.minMemoryMapAlignment);
  3530. buffer_size = FFALIGN(buffer_size, p->hprops.minImportedHostPointerAlignment);
  3531. /* Create a buffer */
  3532. vkb = av_mallocz(sizeof(*vkb));
  3533. if (!vkb) {
  3534. err = AVERROR(ENOMEM);
  3535. goto fail;
  3536. }
  3537. err = create_mapped_buffer(hwfc, vkb,
  3538. upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT :
  3539. VK_BUFFER_USAGE_TRANSFER_DST_BIT,
  3540. buffer_size, &create_desc, &import_desc,
  3541. props);
  3542. if (err < 0) {
  3543. av_free(vkb);
  3544. goto fail;
  3545. }
  3546. /* Create a ref */
  3547. dst[*nb_bufs] = av_buffer_create((uint8_t *)vkb, sizeof(*vkb),
  3548. destroy_avvkbuf, &p->vkctx, 0);
  3549. if (!dst[*nb_bufs]) {
  3550. destroy_avvkbuf(&p->vkctx, (uint8_t *)vkb);
  3551. err = AVERROR(ENOMEM);
  3552. goto fail;
  3553. }
  3554. (*nb_bufs)++;
  3555. }
  3556. return 0;
  3557. fail:
  3558. for (int i = 0; i < (*nb_bufs); i++)
  3559. av_buffer_unref(&dst[i]);
  3560. return err;
  3561. }
  3562. static int vulkan_transfer_frame(AVHWFramesContext *hwfc,
  3563. AVFrame *swf, AVFrame *hwf,
  3564. int upload)
  3565. {
  3566. int err;
  3567. VulkanFramesPriv *fp = hwfc->hwctx;
  3568. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3569. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3570. int host_mapped = 0;
  3571. AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0];
  3572. VkBufferImageCopy region[AV_NUM_DATA_POINTERS]; // always one per plane
  3573. const int planes = av_pix_fmt_count_planes(swf->format);
  3574. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(swf->format);
  3575. const int nb_images = ff_vk_count_images(hwf_vk);
  3576. VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
  3577. int nb_img_bar = 0;
  3578. AVBufferRef *bufs[AV_NUM_DATA_POINTERS];
  3579. int nb_bufs = 0;
  3580. VkCommandBuffer cmd_buf;
  3581. FFVkExecContext *exec;
  3582. /* Sanity checking */
  3583. if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
  3584. av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
  3585. return AVERROR(EINVAL);
  3586. }
  3587. if (swf->width > hwfc->width || swf->height > hwfc->height)
  3588. return AVERROR(EINVAL);
  3589. /* Setup buffers first */
  3590. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
  3591. err = host_map_frame(hwfc, bufs, &nb_bufs, swf, region, upload);
  3592. if (err >= 0)
  3593. host_mapped = 1;
  3594. }
  3595. if (!host_mapped) {
  3596. err = get_plane_buf(hwfc, &bufs[0], swf, region, upload);
  3597. if (err < 0)
  3598. goto end;
  3599. nb_bufs = 1;
  3600. if (upload) {
  3601. err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 1);
  3602. if (err < 0)
  3603. goto end;
  3604. }
  3605. }
  3606. exec = ff_vk_exec_get(&p->vkctx, &fp->upload_exec);
  3607. cmd_buf = exec->buf;
  3608. ff_vk_exec_start(&p->vkctx, exec);
  3609. /* Prep destination Vulkan frame */
  3610. err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, hwf,
  3611. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  3612. VK_PIPELINE_STAGE_2_TRANSFER_BIT);
  3613. if (err < 0)
  3614. goto end;
  3615. /* No need to declare buf deps for synchronous transfers (downloads) */
  3616. if (upload) {
  3617. /* Add the software frame backing the buffers if we're host mapping */
  3618. if (host_mapped) {
  3619. err = ff_vk_exec_add_dep_sw_frame(&p->vkctx, exec, swf);
  3620. if (err < 0) {
  3621. ff_vk_exec_discard_deps(&p->vkctx, exec);
  3622. goto end;
  3623. }
  3624. }
  3625. /* Add the buffers as a dependency */
  3626. err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, nb_bufs, 1);
  3627. if (err < 0) {
  3628. ff_vk_exec_discard_deps(&p->vkctx, exec);
  3629. goto end;
  3630. }
  3631. }
  3632. ff_vk_frame_barrier(&p->vkctx, exec, hwf, img_bar, &nb_img_bar,
  3633. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  3634. VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
  3635. upload ? VK_ACCESS_TRANSFER_WRITE_BIT :
  3636. VK_ACCESS_TRANSFER_READ_BIT,
  3637. upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL :
  3638. VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
  3639. VK_QUEUE_FAMILY_IGNORED);
  3640. vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
  3641. .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
  3642. .pImageMemoryBarriers = img_bar,
  3643. .imageMemoryBarrierCount = nb_img_bar,
  3644. });
  3645. for (int i = 0; i < planes; i++) {
  3646. int buf_idx = FFMIN(i, (nb_bufs - 1));
  3647. int img_idx = FFMIN(i, (nb_images - 1));
  3648. FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[buf_idx]->data;
  3649. uint32_t orig_stride = region[i].bufferRowLength;
  3650. region[i].bufferRowLength /= desc->comp[i].step;
  3651. region[i].imageSubresource.aspectMask = ff_vk_aspect_flag(hwf, i);
  3652. if (upload)
  3653. vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf,
  3654. hwf_vk->img[img_idx],
  3655. img_bar[img_idx].newLayout,
  3656. 1, &region[i]);
  3657. else
  3658. vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->img[img_idx],
  3659. img_bar[img_idx].newLayout,
  3660. vkbuf->buf,
  3661. 1, &region[i]);
  3662. region[i].bufferRowLength = orig_stride;
  3663. }
  3664. err = ff_vk_exec_submit(&p->vkctx, exec);
  3665. if (err < 0) {
  3666. ff_vk_exec_discard_deps(&p->vkctx, exec);
  3667. } else if (!upload) {
  3668. ff_vk_exec_wait(&p->vkctx, exec);
  3669. if (!host_mapped)
  3670. err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 0);
  3671. }
  3672. end:
  3673. for (int i = 0; i < nb_bufs; i++)
  3674. av_buffer_unref(&bufs[i]);
  3675. return err;
  3676. }
  3677. static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
  3678. const AVFrame *src)
  3679. {
  3680. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3681. switch (src->format) {
  3682. #if CONFIG_CUDA
  3683. case AV_PIX_FMT_CUDA:
  3684. #ifdef _WIN32
  3685. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
  3686. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
  3687. #else
  3688. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
  3689. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
  3690. #endif
  3691. return vulkan_transfer_data_from_cuda(hwfc, dst, src);
  3692. #endif
  3693. default:
  3694. if (src->hw_frames_ctx)
  3695. return AVERROR(ENOSYS);
  3696. else
  3697. return vulkan_transfer_frame(hwfc, (AVFrame *)src, dst, 1);
  3698. }
  3699. }
  3700. #if CONFIG_CUDA
  3701. static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
  3702. const AVFrame *src)
  3703. {
  3704. int err;
  3705. CUcontext dummy;
  3706. AVVkFrame *dst_f;
  3707. AVVkFrameInternal *dst_int;
  3708. VulkanFramesPriv *fp = hwfc->hwctx;
  3709. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  3710. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  3711. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
  3712. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  3713. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  3714. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  3715. CudaFunctions *cu = cu_internal->cuda_dl;
  3716. CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
  3717. CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
  3718. dst_f = (AVVkFrame *)src->data[0];
  3719. err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT);
  3720. if (err < 0)
  3721. return err;
  3722. err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  3723. if (err < 0)
  3724. return err;
  3725. err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
  3726. if (err < 0) {
  3727. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3728. return err;
  3729. }
  3730. dst_int = dst_f->internal;
  3731. for (int i = 0; i < planes; i++) {
  3732. s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0;
  3733. s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
  3734. }
  3735. err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
  3736. planes, cuda_dev->stream));
  3737. if (err < 0)
  3738. goto fail;
  3739. for (int i = 0; i < planes; i++) {
  3740. CUDA_MEMCPY2D cpy = {
  3741. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  3742. .dstDevice = (CUdeviceptr)dst->data[i],
  3743. .dstPitch = dst->linesize[i],
  3744. .dstY = 0,
  3745. .srcMemoryType = CU_MEMORYTYPE_ARRAY,
  3746. .srcArray = dst_int->cu_array[i],
  3747. };
  3748. int w, h;
  3749. get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  3750. cpy.WidthInBytes = w * desc->comp[i].step;
  3751. cpy.Height = h;
  3752. err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  3753. if (err < 0)
  3754. goto fail;
  3755. }
  3756. err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
  3757. planes, cuda_dev->stream));
  3758. if (err < 0)
  3759. goto fail;
  3760. for (int i = 0; i < planes; i++)
  3761. dst_f->sem_value[i]++;
  3762. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3763. av_log(hwfc, AV_LOG_VERBOSE, "Transferred Vulkan image to CUDA!\n");
  3764. return prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT);
  3765. fail:
  3766. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3767. vulkan_free_internal(dst_f);
  3768. av_buffer_unref(&dst->buf[0]);
  3769. return err;
  3770. }
  3771. #endif
  3772. static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
  3773. const AVFrame *src)
  3774. {
  3775. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3776. switch (dst->format) {
  3777. #if CONFIG_CUDA
  3778. case AV_PIX_FMT_CUDA:
  3779. #ifdef _WIN32
  3780. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
  3781. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
  3782. #else
  3783. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
  3784. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
  3785. #endif
  3786. return vulkan_transfer_data_to_cuda(hwfc, dst, src);
  3787. #endif
  3788. default:
  3789. if (dst->hw_frames_ctx)
  3790. return AVERROR(ENOSYS);
  3791. else
  3792. return vulkan_transfer_frame(hwfc, dst, (AVFrame *)src, 0);
  3793. }
  3794. }
  3795. static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
  3796. AVHWFramesContext *src_fc, int flags)
  3797. {
  3798. return vulkan_frames_init(dst_fc);
  3799. }
  3800. AVVkFrame *av_vk_frame_alloc(void)
  3801. {
  3802. int err;
  3803. AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
  3804. if (!f)
  3805. return NULL;
  3806. f->internal = av_mallocz(sizeof(*f->internal));
  3807. if (!f->internal) {
  3808. av_free(f);
  3809. return NULL;
  3810. }
  3811. err = pthread_mutex_init(&f->internal->update_mutex, NULL);
  3812. if (err != 0) {
  3813. av_free(f->internal);
  3814. av_free(f);
  3815. return NULL;
  3816. }
  3817. return f;
  3818. }
  3819. const HWContextType ff_hwcontext_type_vulkan = {
  3820. .type = AV_HWDEVICE_TYPE_VULKAN,
  3821. .name = "Vulkan",
  3822. .device_hwctx_size = sizeof(VulkanDevicePriv),
  3823. .frames_hwctx_size = sizeof(VulkanFramesPriv),
  3824. .device_init = &vulkan_device_init,
  3825. .device_uninit = &vulkan_device_uninit,
  3826. .device_create = &vulkan_device_create,
  3827. .device_derive = &vulkan_device_derive,
  3828. .frames_get_constraints = &vulkan_frames_get_constraints,
  3829. .frames_init = vulkan_frames_init,
  3830. .frames_get_buffer = vulkan_get_buffer,
  3831. .frames_uninit = vulkan_frames_uninit,
  3832. .transfer_get_formats = vulkan_transfer_get_formats,
  3833. .transfer_data_to = vulkan_transfer_data_to,
  3834. .transfer_data_from = vulkan_transfer_data_from,
  3835. .map_to = vulkan_map_to,
  3836. .map_from = vulkan_map_from,
  3837. .frames_derive_to = &vulkan_frames_derive_to,
  3838. .pix_fmts = (const enum AVPixelFormat []) {
  3839. AV_PIX_FMT_VULKAN,
  3840. AV_PIX_FMT_NONE
  3841. },
  3842. };