dxva2_h264.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. /*
  2. * DXVA2 H264 HW acceleration.
  3. *
  4. * copyright (c) 2009 Laurent Aimar
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "dxva2_internal.h"
  23. #include "h264.h"
  24. #include "h264data.h"
  25. struct dxva2_picture_context {
  26. DXVA_PicParams_H264 pp;
  27. DXVA_Qmatrix_H264 qm;
  28. unsigned slice_count;
  29. DXVA_Slice_H264_Short slice_short[MAX_SLICES];
  30. DXVA_Slice_H264_Long slice_long[MAX_SLICES];
  31. const uint8_t *bitstream;
  32. unsigned bitstream_size;
  33. };
  34. static void fill_picture_entry(DXVA_PicEntry_H264 *pic,
  35. unsigned index, unsigned flag)
  36. {
  37. assert((index&0x7f) == index && (flag&0x01) == flag);
  38. pic->bPicEntry = index | (flag << 7);
  39. }
  40. static void fill_picture_parameters(struct dxva_context *ctx, const H264Context *h,
  41. DXVA_PicParams_H264 *pp)
  42. {
  43. const MpegEncContext *s = &h->s;
  44. const Picture *current_picture = s->current_picture_ptr;
  45. int i;
  46. memset(pp, 0, sizeof(*pp));
  47. /* Configure current picture */
  48. fill_picture_entry(&pp->CurrPic,
  49. ff_dxva2_get_surface_index(ctx, current_picture),
  50. s->picture_structure == PICT_BOTTOM_FIELD);
  51. /* Configure the set of references */
  52. pp->UsedForReferenceFlags = 0;
  53. pp->NonExistingFrameFlags = 0;
  54. for (i = 0; i < FF_ARRAY_ELEMS(pp->RefFrameList); i++) {
  55. if (i < h->short_ref_count + h->long_ref_count) {
  56. const Picture *r;
  57. if (i < h->short_ref_count) {
  58. r = h->short_ref[i];
  59. assert(!r->long_ref);
  60. } else {
  61. r = h->long_ref[i - h->short_ref_count];
  62. assert(r->long_ref);
  63. }
  64. fill_picture_entry(&pp->RefFrameList[i],
  65. ff_dxva2_get_surface_index(ctx, r),
  66. r->long_ref != 0);
  67. if ((r->reference & PICT_TOP_FIELD) && r->field_poc[0] != INT_MAX)
  68. pp->FieldOrderCntList[i][0] = r->field_poc[0];
  69. if ((r->reference & PICT_BOTTOM_FIELD) && r->field_poc[1] != INT_MAX)
  70. pp->FieldOrderCntList[i][1] = r->field_poc[1];
  71. pp->FrameNumList[i] = r->long_ref ? r->pic_id : r->frame_num;
  72. if (r->reference & PICT_TOP_FIELD)
  73. pp->UsedForReferenceFlags |= 1 << (2*i + 0);
  74. if (r->reference & PICT_BOTTOM_FIELD)
  75. pp->UsedForReferenceFlags |= 1 << (2*i + 1);
  76. } else {
  77. pp->RefFrameList[i].bPicEntry = 0xff;
  78. pp->FieldOrderCntList[i][0] = 0;
  79. pp->FieldOrderCntList[i][1] = 0;
  80. pp->FrameNumList[i] = 0;
  81. }
  82. }
  83. pp->wFrameWidthInMbsMinus1 = s->mb_width - 1;
  84. pp->wFrameHeightInMbsMinus1 = s->mb_height - 1;
  85. pp->num_ref_frames = h->sps.ref_frame_count;
  86. pp->wBitFields = ((s->picture_structure != PICT_FRAME) << 0) |
  87. (h->sps.mb_aff << 1) |
  88. (h->sps.residual_color_transform_flag << 2) |
  89. /* sp_for_switch_flag (not implemented by FFmpeg) */
  90. (0 << 3) |
  91. (h->sps.chroma_format_idc << 4) |
  92. ((h->nal_ref_idc != 0) << 6) |
  93. (h->pps.constrained_intra_pred << 7) |
  94. (h->pps.weighted_pred << 8) |
  95. (h->pps.weighted_bipred_idc << 9) |
  96. /* MbsConsecutiveFlag */
  97. (1 << 11) |
  98. (h->sps.frame_mbs_only_flag << 12) |
  99. (h->pps.transform_8x8_mode << 13) |
  100. ((h->sps.level_idc >= 31) << 14) |
  101. /* IntraPicFlag (Modified if we detect a non
  102. * intra slice in decode_slice) */
  103. (1 << 15);
  104. pp->bit_depth_luma_minus8 = h->sps.bit_depth_luma - 8;
  105. pp->bit_depth_chroma_minus8 = h->sps.bit_depth_chroma - 8;
  106. pp->Reserved16Bits = 3; /* FIXME is there a way to detect the right mode ? */
  107. pp->StatusReportFeedbackNumber = 1 + ctx->report_id++;
  108. pp->CurrFieldOrderCnt[0] = 0;
  109. if ((s->picture_structure & PICT_TOP_FIELD) &&
  110. current_picture->field_poc[0] != INT_MAX)
  111. pp->CurrFieldOrderCnt[0] = current_picture->field_poc[0];
  112. pp->CurrFieldOrderCnt[1] = 0;
  113. if ((s->picture_structure & PICT_BOTTOM_FIELD) &&
  114. current_picture->field_poc[1] != INT_MAX)
  115. pp->CurrFieldOrderCnt[1] = current_picture->field_poc[1];
  116. pp->pic_init_qs_minus26 = h->pps.init_qs - 26;
  117. pp->chroma_qp_index_offset = h->pps.chroma_qp_index_offset[0];
  118. pp->second_chroma_qp_index_offset = h->pps.chroma_qp_index_offset[1];
  119. pp->ContinuationFlag = 1;
  120. pp->pic_init_qp_minus26 = h->pps.init_qp - 26;
  121. pp->num_ref_idx_l0_active_minus1 = h->pps.ref_count[0] - 1;
  122. pp->num_ref_idx_l1_active_minus1 = h->pps.ref_count[1] - 1;
  123. pp->Reserved8BitsA = 0;
  124. pp->frame_num = h->frame_num;
  125. pp->log2_max_frame_num_minus4 = h->sps.log2_max_frame_num - 4;
  126. pp->pic_order_cnt_type = h->sps.poc_type;
  127. if (h->sps.poc_type == 0)
  128. pp->log2_max_pic_order_cnt_lsb_minus4 = h->sps.log2_max_poc_lsb - 4;
  129. else if (h->sps.poc_type == 1)
  130. pp->delta_pic_order_always_zero_flag = h->sps.delta_pic_order_always_zero_flag;
  131. pp->direct_8x8_inference_flag = h->sps.direct_8x8_inference_flag;
  132. pp->entropy_coding_mode_flag = h->pps.cabac;
  133. pp->pic_order_present_flag = h->pps.pic_order_present;
  134. pp->num_slice_groups_minus1 = h->pps.slice_group_count - 1;
  135. pp->slice_group_map_type = h->pps.mb_slice_group_map_type;
  136. pp->deblocking_filter_control_present_flag = h->pps.deblocking_filter_parameters_present;
  137. pp->redundant_pic_cnt_present_flag= h->pps.redundant_pic_cnt_present;
  138. pp->Reserved8BitsB = 0;
  139. pp->slice_group_change_rate_minus1= 0; /* XXX not implemented by FFmpeg */
  140. //pp->SliceGroupMap[810]; /* XXX not implemented by FFmpeg */
  141. }
  142. static void fill_scaling_lists(const H264Context *h, DXVA_Qmatrix_H264 *qm)
  143. {
  144. unsigned i, j;
  145. memset(qm, 0, sizeof(*qm));
  146. for (i = 0; i < 6; i++)
  147. for (j = 0; j < 16; j++)
  148. qm->bScalingLists4x4[i][j] = h->pps.scaling_matrix4[i][zigzag_scan[j]];
  149. for (i = 0; i < 2; i++)
  150. for (j = 0; j < 64; j++)
  151. qm->bScalingLists8x8[i][j] = h->pps.scaling_matrix8[i][ff_zigzag_direct[j]];
  152. }
  153. static int is_slice_short(struct dxva_context *ctx)
  154. {
  155. assert(ctx->cfg->ConfigBitstreamRaw == 1 ||
  156. ctx->cfg->ConfigBitstreamRaw == 2);
  157. return ctx->cfg->ConfigBitstreamRaw == 2;
  158. }
  159. static void fill_slice_short(DXVA_Slice_H264_Short *slice,
  160. unsigned position, unsigned size)
  161. {
  162. memset(slice, 0, sizeof(*slice));
  163. slice->BSNALunitDataLocation = position;
  164. slice->SliceBytesInBuffer = size;
  165. slice->wBadSliceChopping = 0;
  166. }
  167. static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
  168. unsigned position, unsigned size)
  169. {
  170. const H264Context *h = avctx->priv_data;
  171. struct dxva_context *ctx = avctx->hwaccel_context;
  172. const MpegEncContext *s = &h->s;
  173. unsigned list;
  174. memset(slice, 0, sizeof(*slice));
  175. slice->BSNALunitDataLocation = position;
  176. slice->SliceBytesInBuffer = size;
  177. slice->wBadSliceChopping = 0;
  178. slice->first_mb_in_slice = (s->mb_y >> FIELD_OR_MBAFF_PICTURE) * s->mb_width + s->mb_x;
  179. slice->NumMbsForSlice = 0; /* XXX it is set once we have all slices */
  180. slice->BitOffsetToSliceData = get_bits_count(&s->gb) + 8;
  181. slice->slice_type = ff_h264_get_slice_type(h);
  182. if (h->slice_type_fixed)
  183. slice->slice_type += 5;
  184. slice->luma_log2_weight_denom = h->luma_log2_weight_denom;
  185. slice->chroma_log2_weight_denom = h->chroma_log2_weight_denom;
  186. if (h->list_count > 0)
  187. slice->num_ref_idx_l0_active_minus1 = h->ref_count[0] - 1;
  188. if (h->list_count > 1)
  189. slice->num_ref_idx_l1_active_minus1 = h->ref_count[1] - 1;
  190. slice->slice_alpha_c0_offset_div2 = h->slice_alpha_c0_offset / 2 - 26;
  191. slice->slice_beta_offset_div2 = h->slice_beta_offset / 2 - 26;
  192. slice->Reserved8Bits = 0;
  193. for (list = 0; list < 2; list++) {
  194. unsigned i;
  195. for (i = 0; i < FF_ARRAY_ELEMS(slice->RefPicList[list]); i++) {
  196. if (list < h->list_count && i < h->ref_count[list]) {
  197. const Picture *r = &h->ref_list[list][i];
  198. unsigned plane;
  199. fill_picture_entry(&slice->RefPicList[list][i],
  200. ff_dxva2_get_surface_index(ctx, r),
  201. r->reference == PICT_BOTTOM_FIELD);
  202. for (plane = 0; plane < 3; plane++) {
  203. int w, o;
  204. if (plane == 0 && h->luma_weight_flag[list]) {
  205. w = h->luma_weight[i][list][0];
  206. o = h->luma_weight[i][list][1];
  207. } else if (plane >= 1 && h->chroma_weight_flag[list]) {
  208. w = h->chroma_weight[i][list][plane-1][0];
  209. o = h->chroma_weight[i][list][plane-1][1];
  210. } else {
  211. w = 1 << (plane == 0 ? h->luma_log2_weight_denom :
  212. h->chroma_log2_weight_denom);
  213. o = 0;
  214. }
  215. slice->Weights[list][i][plane][0] = w;
  216. slice->Weights[list][i][plane][1] = o;
  217. }
  218. } else {
  219. unsigned plane;
  220. slice->RefPicList[list][i].bPicEntry = 0xff;
  221. for (plane = 0; plane < 3; plane++) {
  222. slice->Weights[list][i][plane][0] = 0;
  223. slice->Weights[list][i][plane][1] = 0;
  224. }
  225. }
  226. }
  227. }
  228. slice->slice_qs_delta = 0; /* XXX not implemented by FFmpeg */
  229. slice->slice_qp_delta = s->qscale - h->pps.init_qp;
  230. slice->redundant_pic_cnt = h->redundant_pic_count;
  231. if (h->slice_type == FF_B_TYPE)
  232. slice->direct_spatial_mv_pred_flag = h->direct_spatial_mv_pred;
  233. slice->cabac_init_idc = h->pps.cabac ? h->cabac_init_idc : 0;
  234. if (h->deblocking_filter < 2)
  235. slice->disable_deblocking_filter_idc = 1 - h->deblocking_filter;
  236. else
  237. slice->disable_deblocking_filter_idc = h->deblocking_filter;
  238. slice->slice_id = h->current_slice - 1;
  239. }
  240. static int commit_bitstream_and_slice_buffer(AVCodecContext *avctx,
  241. DXVA2_DecodeBufferDesc *bs,
  242. DXVA2_DecodeBufferDesc *sc)
  243. {
  244. const H264Context *h = avctx->priv_data;
  245. const MpegEncContext *s = &h->s;
  246. const unsigned mb_count = s->mb_width * s->mb_height;
  247. struct dxva_context *ctx = avctx->hwaccel_context;
  248. const Picture *current_picture = h->s.current_picture_ptr;
  249. struct dxva2_picture_context *ctx_pic = current_picture->hwaccel_picture_private;
  250. DXVA_Slice_H264_Short *slice = NULL;
  251. uint8_t *dxva_data, *current, *end;
  252. unsigned dxva_size;
  253. void *slice_data;
  254. unsigned slice_size;
  255. unsigned padding;
  256. unsigned i;
  257. /* Create an annex B bitstream buffer with only slice NAL and finalize slice */
  258. if (FAILED(IDirectXVideoDecoder_GetBuffer(ctx->decoder,
  259. DXVA2_BitStreamDateBufferType,
  260. &dxva_data, &dxva_size)))
  261. return -1;
  262. current = dxva_data;
  263. end = dxva_data + dxva_size;
  264. for (i = 0; i < ctx_pic->slice_count; i++) {
  265. static const uint8_t start_code[] = { 0, 0, 1 };
  266. static const unsigned start_code_size = sizeof(start_code);
  267. unsigned position, size;
  268. assert(offsetof(DXVA_Slice_H264_Short, BSNALunitDataLocation) ==
  269. offsetof(DXVA_Slice_H264_Long, BSNALunitDataLocation));
  270. assert(offsetof(DXVA_Slice_H264_Short, SliceBytesInBuffer) ==
  271. offsetof(DXVA_Slice_H264_Long, SliceBytesInBuffer));
  272. if (is_slice_short(ctx))
  273. slice = &ctx_pic->slice_short[i];
  274. else
  275. slice = (DXVA_Slice_H264_Short*)&ctx_pic->slice_long[i];
  276. position = slice->BSNALunitDataLocation;
  277. size = slice->SliceBytesInBuffer;
  278. if (start_code_size + size > end - current) {
  279. av_log(avctx, AV_LOG_ERROR, "Failed to build bitstream");
  280. break;
  281. }
  282. slice->BSNALunitDataLocation = current - dxva_data;
  283. slice->SliceBytesInBuffer = start_code_size + size;
  284. if (!is_slice_short(ctx)) {
  285. DXVA_Slice_H264_Long *slice_long = (DXVA_Slice_H264_Long*)slice;
  286. if (i < ctx_pic->slice_count - 1)
  287. slice_long->NumMbsForSlice =
  288. slice_long[1].first_mb_in_slice - slice_long[0].first_mb_in_slice;
  289. else
  290. slice_long->NumMbsForSlice = mb_count - slice_long->first_mb_in_slice;
  291. }
  292. memcpy(current, start_code, start_code_size);
  293. current += start_code_size;
  294. memcpy(current, &ctx_pic->bitstream[position], size);
  295. current += size;
  296. }
  297. padding = FFMIN(128 - ((current - dxva_data) & 127), end - current);
  298. if (slice && padding > 0) {
  299. memset(current, 0, padding);
  300. current += padding;
  301. slice->SliceBytesInBuffer += padding;
  302. }
  303. if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(ctx->decoder,
  304. DXVA2_BitStreamDateBufferType)))
  305. return -1;
  306. if (i < ctx_pic->slice_count)
  307. return -1;
  308. memset(bs, 0, sizeof(*bs));
  309. bs->CompressedBufferType = DXVA2_BitStreamDateBufferType;
  310. bs->DataSize = current - dxva_data;
  311. bs->NumMBsInBuffer = mb_count;
  312. if (is_slice_short(ctx)) {
  313. slice_data = ctx_pic->slice_short;
  314. slice_size = ctx_pic->slice_count * sizeof(*ctx_pic->slice_short);
  315. } else {
  316. slice_data = ctx_pic->slice_long;
  317. slice_size = ctx_pic->slice_count * sizeof(*ctx_pic->slice_long);
  318. }
  319. assert((bs->DataSize & 127) == 0);
  320. return ff_dxva2_commit_buffer(avctx, ctx, sc,
  321. DXVA2_SliceControlBufferType,
  322. slice_data, slice_size, mb_count);
  323. }
  324. static int start_frame(AVCodecContext *avctx,
  325. av_unused const uint8_t *buffer,
  326. av_unused uint32_t size)
  327. {
  328. const H264Context *h = avctx->priv_data;
  329. struct dxva_context *ctx = avctx->hwaccel_context;
  330. struct dxva2_picture_context *ctx_pic = h->s.current_picture_ptr->hwaccel_picture_private;
  331. if (!ctx->decoder || !ctx->cfg || ctx->surface_count <= 0)
  332. return -1;
  333. assert(ctx_pic);
  334. /* Fill up DXVA_PicParams_H264 */
  335. fill_picture_parameters(ctx, h, &ctx_pic->pp);
  336. /* Fill up DXVA_Qmatrix_H264 */
  337. fill_scaling_lists(h, &ctx_pic->qm);
  338. ctx_pic->slice_count = 0;
  339. ctx_pic->bitstream_size = 0;
  340. ctx_pic->bitstream = NULL;
  341. return 0;
  342. }
  343. static int decode_slice(AVCodecContext *avctx,
  344. const uint8_t *buffer, uint32_t size)
  345. {
  346. const H264Context *h = avctx->priv_data;
  347. struct dxva_context *ctx = avctx->hwaccel_context;
  348. const Picture *current_picture = h->s.current_picture_ptr;
  349. struct dxva2_picture_context *ctx_pic = current_picture->hwaccel_picture_private;
  350. unsigned position;
  351. if (ctx_pic->slice_count >= MAX_SLICES)
  352. return -1;
  353. if (!ctx_pic->bitstream)
  354. ctx_pic->bitstream = buffer;
  355. ctx_pic->bitstream_size += size;
  356. position = buffer - ctx_pic->bitstream;
  357. if (is_slice_short(ctx))
  358. fill_slice_short(&ctx_pic->slice_short[ctx_pic->slice_count],
  359. position, size);
  360. else
  361. fill_slice_long(avctx, &ctx_pic->slice_long[ctx_pic->slice_count],
  362. position, size);
  363. ctx_pic->slice_count++;
  364. if (h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE)
  365. ctx_pic->pp.wBitFields &= ~(1 << 15); /* Set IntraPicFlag to 0 */
  366. return 0;
  367. }
  368. static int end_frame(AVCodecContext *avctx)
  369. {
  370. H264Context *h = avctx->priv_data;
  371. MpegEncContext *s = &h->s;
  372. struct dxva2_picture_context *ctx_pic =
  373. h->s.current_picture_ptr->hwaccel_picture_private;
  374. if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0)
  375. return -1;
  376. return ff_dxva2_common_end_frame(avctx, s,
  377. &ctx_pic->pp, sizeof(ctx_pic->pp),
  378. &ctx_pic->qm, sizeof(ctx_pic->qm),
  379. commit_bitstream_and_slice_buffer);
  380. }
  381. AVHWAccel h264_dxva2_hwaccel = {
  382. .name = "h264_dxva2",
  383. .type = CODEC_TYPE_VIDEO,
  384. .id = CODEC_ID_H264,
  385. .pix_fmt = PIX_FMT_DXVA2_VLD,
  386. .capabilities = 0,
  387. .start_frame = start_frame,
  388. .decode_slice = decode_slice,
  389. .end_frame = end_frame,
  390. .priv_data_size = sizeof(struct dxva2_picture_context),
  391. };