analysis_enc.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. // Copyright 2011 Google Inc. All Rights Reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style license
  4. // that can be found in the COPYING file in the root of the source
  5. // tree. An additional intellectual property rights grant can be found
  6. // in the file PATENTS. All contributing project authors may
  7. // be found in the AUTHORS file in the root of the source tree.
  8. // -----------------------------------------------------------------------------
  9. //
  10. // Macroblock analysis
  11. //
  12. // Author: Skal (pascal.massimino@gmail.com)
  13. #include <stdlib.h>
  14. #include <string.h>
  15. #include <assert.h>
  16. #include "./vp8i_enc.h"
  17. #include "./cost_enc.h"
  18. #include "../utils/utils.h"
  19. #define MAX_ITERS_K_MEANS 6
  20. //------------------------------------------------------------------------------
  21. // Smooth the segment map by replacing isolated block by the majority of its
  22. // neighbours.
  23. static void SmoothSegmentMap(VP8Encoder* const enc) {
  24. int n, x, y;
  25. const int w = enc->mb_w_;
  26. const int h = enc->mb_h_;
  27. const int majority_cnt_3_x_3_grid = 5;
  28. uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp));
  29. assert((uint64_t)(w * h) == (uint64_t)w * h); // no overflow, as per spec
  30. if (tmp == NULL) return;
  31. for (y = 1; y < h - 1; ++y) {
  32. for (x = 1; x < w - 1; ++x) {
  33. int cnt[NUM_MB_SEGMENTS] = { 0 };
  34. const VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
  35. int majority_seg = mb->segment_;
  36. // Check the 8 neighbouring segment values.
  37. cnt[mb[-w - 1].segment_]++; // top-left
  38. cnt[mb[-w + 0].segment_]++; // top
  39. cnt[mb[-w + 1].segment_]++; // top-right
  40. cnt[mb[ - 1].segment_]++; // left
  41. cnt[mb[ + 1].segment_]++; // right
  42. cnt[mb[ w - 1].segment_]++; // bottom-left
  43. cnt[mb[ w + 0].segment_]++; // bottom
  44. cnt[mb[ w + 1].segment_]++; // bottom-right
  45. for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
  46. if (cnt[n] >= majority_cnt_3_x_3_grid) {
  47. majority_seg = n;
  48. break;
  49. }
  50. }
  51. tmp[x + y * w] = majority_seg;
  52. }
  53. }
  54. for (y = 1; y < h - 1; ++y) {
  55. for (x = 1; x < w - 1; ++x) {
  56. VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
  57. mb->segment_ = tmp[x + y * w];
  58. }
  59. }
  60. WebPSafeFree(tmp);
  61. }
  62. //------------------------------------------------------------------------------
  63. // set segment susceptibility alpha_ / beta_
  64. static WEBP_INLINE int clip(int v, int m, int M) {
  65. return (v < m) ? m : (v > M) ? M : v;
  66. }
  67. static void SetSegmentAlphas(VP8Encoder* const enc,
  68. const int centers[NUM_MB_SEGMENTS],
  69. int mid) {
  70. const int nb = enc->segment_hdr_.num_segments_;
  71. int min = centers[0], max = centers[0];
  72. int n;
  73. if (nb > 1) {
  74. for (n = 0; n < nb; ++n) {
  75. if (min > centers[n]) min = centers[n];
  76. if (max < centers[n]) max = centers[n];
  77. }
  78. }
  79. if (max == min) max = min + 1;
  80. assert(mid <= max && mid >= min);
  81. for (n = 0; n < nb; ++n) {
  82. const int alpha = 255 * (centers[n] - mid) / (max - min);
  83. const int beta = 255 * (centers[n] - min) / (max - min);
  84. enc->dqm_[n].alpha_ = clip(alpha, -127, 127);
  85. enc->dqm_[n].beta_ = clip(beta, 0, 255);
  86. }
  87. }
  88. //------------------------------------------------------------------------------
  89. // Compute susceptibility based on DCT-coeff histograms:
  90. // the higher, the "easier" the macroblock is to compress.
  91. #define MAX_ALPHA 255 // 8b of precision for susceptibilities.
  92. #define ALPHA_SCALE (2 * MAX_ALPHA) // scaling factor for alpha.
  93. #define DEFAULT_ALPHA (-1)
  94. #define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))
  95. static int FinalAlphaValue(int alpha) {
  96. alpha = MAX_ALPHA - alpha;
  97. return clip(alpha, 0, MAX_ALPHA);
  98. }
  99. static int GetAlpha(const VP8Histogram* const histo) {
  100. // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
  101. // values which happen to be mostly noise. This leaves the maximum precision
  102. // for handling the useful small values which contribute most.
  103. const int max_value = histo->max_value;
  104. const int last_non_zero = histo->last_non_zero;
  105. const int alpha =
  106. (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
  107. return alpha;
  108. }
  109. static void InitHistogram(VP8Histogram* const histo) {
  110. histo->max_value = 0;
  111. histo->last_non_zero = 1;
  112. }
  113. //------------------------------------------------------------------------------
  114. // Simplified k-Means, to assign Nb segments based on alpha-histogram
  115. static void AssignSegments(VP8Encoder* const enc,
  116. const int alphas[MAX_ALPHA + 1]) {
  117. // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
  118. // explicit check is needed to avoid spurious warning about 'n + 1' exceeding
  119. // array bounds of 'centers' with some compilers (noticed with gcc-4.9).
  120. const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ?
  121. enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS;
  122. int centers[NUM_MB_SEGMENTS];
  123. int weighted_average = 0;
  124. int map[MAX_ALPHA + 1];
  125. int a, n, k;
  126. int min_a = 0, max_a = MAX_ALPHA, range_a;
  127. // 'int' type is ok for histo, and won't overflow
  128. int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];
  129. assert(nb >= 1);
  130. assert(nb <= NUM_MB_SEGMENTS);
  131. // bracket the input
  132. for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {}
  133. min_a = n;
  134. for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {}
  135. max_a = n;
  136. range_a = max_a - min_a;
  137. // Spread initial centers evenly
  138. for (k = 0, n = 1; k < nb; ++k, n += 2) {
  139. assert(n < 2 * nb);
  140. centers[k] = min_a + (n * range_a) / (2 * nb);
  141. }
  142. for (k = 0; k < MAX_ITERS_K_MEANS; ++k) { // few iters are enough
  143. int total_weight;
  144. int displaced;
  145. // Reset stats
  146. for (n = 0; n < nb; ++n) {
  147. accum[n] = 0;
  148. dist_accum[n] = 0;
  149. }
  150. // Assign nearest center for each 'a'
  151. n = 0; // track the nearest center for current 'a'
  152. for (a = min_a; a <= max_a; ++a) {
  153. if (alphas[a]) {
  154. while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) {
  155. n++;
  156. }
  157. map[a] = n;
  158. // accumulate contribution into best centroid
  159. dist_accum[n] += a * alphas[a];
  160. accum[n] += alphas[a];
  161. }
  162. }
  163. // All point are classified. Move the centroids to the
  164. // center of their respective cloud.
  165. displaced = 0;
  166. weighted_average = 0;
  167. total_weight = 0;
  168. for (n = 0; n < nb; ++n) {
  169. if (accum[n]) {
  170. const int new_center = (dist_accum[n] + accum[n] / 2) / accum[n];
  171. displaced += abs(centers[n] - new_center);
  172. centers[n] = new_center;
  173. weighted_average += new_center * accum[n];
  174. total_weight += accum[n];
  175. }
  176. }
  177. weighted_average = (weighted_average + total_weight / 2) / total_weight;
  178. if (displaced < 5) break; // no need to keep on looping...
  179. }
  180. // Map each original value to the closest centroid
  181. for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
  182. VP8MBInfo* const mb = &enc->mb_info_[n];
  183. const int alpha = mb->alpha_;
  184. mb->segment_ = map[alpha];
  185. mb->alpha_ = centers[map[alpha]]; // for the record.
  186. }
  187. if (nb > 1) {
  188. const int smooth = (enc->config_->preprocessing & 1);
  189. if (smooth) SmoothSegmentMap(enc);
  190. }
  191. SetSegmentAlphas(enc, centers, weighted_average); // pick some alphas.
  192. }
  193. //------------------------------------------------------------------------------
  194. // Macroblock analysis: collect histogram for each mode, deduce the maximal
  195. // susceptibility and set best modes for this macroblock.
  196. // Segment assignment is done later.
  197. // Number of modes to inspect for alpha_ evaluation. We don't need to test all
  198. // the possible modes during the analysis phase: we risk falling into a local
  199. // optimum, or be subject to boundary effect
  200. #define MAX_INTRA16_MODE 2
  201. #define MAX_INTRA4_MODE 2
  202. #define MAX_UV_MODE 2
  203. static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
  204. const int max_mode = MAX_INTRA16_MODE;
  205. int mode;
  206. int best_alpha = DEFAULT_ALPHA;
  207. int best_mode = 0;
  208. VP8MakeLuma16Preds(it);
  209. for (mode = 0; mode < max_mode; ++mode) {
  210. VP8Histogram histo;
  211. int alpha;
  212. InitHistogram(&histo);
  213. VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC,
  214. it->yuv_p_ + VP8I16ModeOffsets[mode],
  215. 0, 16, &histo);
  216. alpha = GetAlpha(&histo);
  217. if (IS_BETTER_ALPHA(alpha, best_alpha)) {
  218. best_alpha = alpha;
  219. best_mode = mode;
  220. }
  221. }
  222. VP8SetIntra16Mode(it, best_mode);
  223. return best_alpha;
  224. }
  225. static int FastMBAnalyze(VP8EncIterator* const it) {
  226. // Empirical cut-off value, should be around 16 (~=block size). We use the
  227. // [8-17] range and favor intra4 at high quality, intra16 for low quality.
  228. const int q = (int)it->enc_->config_->quality;
  229. const uint32_t kThreshold = 8 + (17 - 8) * q / 100;
  230. int k;
  231. uint32_t dc[16], m, m2;
  232. for (k = 0; k < 16; k += 4) {
  233. VP8Mean16x4(it->yuv_in_ + Y_OFF_ENC + k * BPS, &dc[k]);
  234. }
  235. for (m = 0, m2 = 0, k = 0; k < 16; ++k) {
  236. m += dc[k];
  237. m2 += dc[k] * dc[k];
  238. }
  239. if (kThreshold * m2 < m * m) {
  240. VP8SetIntra16Mode(it, 0); // DC16
  241. } else {
  242. const uint8_t modes[16] = { 0 }; // DC4
  243. VP8SetIntra4Mode(it, modes);
  244. }
  245. return 0;
  246. }
  247. static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
  248. int best_alpha = DEFAULT_ALPHA;
  249. int smallest_alpha = 0;
  250. int best_mode = 0;
  251. const int max_mode = MAX_UV_MODE;
  252. int mode;
  253. VP8MakeChroma8Preds(it);
  254. for (mode = 0; mode < max_mode; ++mode) {
  255. VP8Histogram histo;
  256. int alpha;
  257. InitHistogram(&histo);
  258. VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC,
  259. it->yuv_p_ + VP8UVModeOffsets[mode],
  260. 16, 16 + 4 + 4, &histo);
  261. alpha = GetAlpha(&histo);
  262. if (IS_BETTER_ALPHA(alpha, best_alpha)) {
  263. best_alpha = alpha;
  264. }
  265. // The best prediction mode tends to be the one with the smallest alpha.
  266. if (mode == 0 || alpha < smallest_alpha) {
  267. smallest_alpha = alpha;
  268. best_mode = mode;
  269. }
  270. }
  271. VP8SetIntraUVMode(it, best_mode);
  272. return best_alpha;
  273. }
  274. static void MBAnalyze(VP8EncIterator* const it,
  275. int alphas[MAX_ALPHA + 1],
  276. int* const alpha, int* const uv_alpha) {
  277. const VP8Encoder* const enc = it->enc_;
  278. int best_alpha, best_uv_alpha;
  279. VP8SetIntra16Mode(it, 0); // default: Intra16, DC_PRED
  280. VP8SetSkip(it, 0); // not skipped
  281. VP8SetSegment(it, 0); // default segment, spec-wise.
  282. if (enc->method_ <= 1) {
  283. best_alpha = FastMBAnalyze(it);
  284. } else {
  285. best_alpha = MBAnalyzeBestIntra16Mode(it);
  286. }
  287. best_uv_alpha = MBAnalyzeBestUVMode(it);
  288. // Final susceptibility mix
  289. best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
  290. best_alpha = FinalAlphaValue(best_alpha);
  291. alphas[best_alpha]++;
  292. it->mb_->alpha_ = best_alpha; // for later remapping.
  293. // Accumulate for later complexity analysis.
  294. *alpha += best_alpha; // mixed susceptibility (not just luma)
  295. *uv_alpha += best_uv_alpha;
  296. }
  297. static void DefaultMBInfo(VP8MBInfo* const mb) {
  298. mb->type_ = 1; // I16x16
  299. mb->uv_mode_ = 0;
  300. mb->skip_ = 0; // not skipped
  301. mb->segment_ = 0; // default segment
  302. mb->alpha_ = 0;
  303. }
  304. //------------------------------------------------------------------------------
  305. // Main analysis loop:
  306. // Collect all susceptibilities for each macroblock and record their
  307. // distribution in alphas[]. Segments is assigned a-posteriori, based on
  308. // this histogram.
  309. // We also pick an intra16 prediction mode, which shouldn't be considered
  310. // final except for fast-encode settings. We can also pick some intra4 modes
  311. // and decide intra4/intra16, but that's usually almost always a bad choice at
  312. // this stage.
  313. static void ResetAllMBInfo(VP8Encoder* const enc) {
  314. int n;
  315. for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
  316. DefaultMBInfo(&enc->mb_info_[n]);
  317. }
  318. // Default susceptibilities.
  319. enc->dqm_[0].alpha_ = 0;
  320. enc->dqm_[0].beta_ = 0;
  321. // Note: we can't compute this alpha_ / uv_alpha_ -> set to default value.
  322. enc->alpha_ = 0;
  323. enc->uv_alpha_ = 0;
  324. WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
  325. }
  326. // struct used to collect job result
  327. typedef struct {
  328. WebPWorker worker;
  329. int alphas[MAX_ALPHA + 1];
  330. int alpha, uv_alpha;
  331. VP8EncIterator it;
  332. int delta_progress;
  333. } SegmentJob;
  334. // main work call
  335. static int DoSegmentsJob(void* arg1, void* arg2) {
  336. SegmentJob* const job = (SegmentJob*)arg1;
  337. VP8EncIterator* const it = (VP8EncIterator*)arg2;
  338. int ok = 1;
  339. if (!VP8IteratorIsDone(it)) {
  340. uint8_t tmp[32 + WEBP_ALIGN_CST];
  341. uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp);
  342. do {
  343. // Let's pretend we have perfect lossless reconstruction.
  344. VP8IteratorImport(it, scratch);
  345. MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha);
  346. ok = VP8IteratorProgress(it, job->delta_progress);
  347. } while (ok && VP8IteratorNext(it));
  348. }
  349. return ok;
  350. }
  351. static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
  352. int i;
  353. for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];
  354. dst->alpha += src->alpha;
  355. dst->uv_alpha += src->uv_alpha;
  356. }
  357. // initialize the job struct with some tasks to perform
  358. static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
  359. int start_row, int end_row) {
  360. WebPGetWorkerInterface()->Init(&job->worker);
  361. job->worker.data1 = job;
  362. job->worker.data2 = &job->it;
  363. job->worker.hook = DoSegmentsJob;
  364. VP8IteratorInit(enc, &job->it);
  365. VP8IteratorSetRow(&job->it, start_row);
  366. VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_);
  367. memset(job->alphas, 0, sizeof(job->alphas));
  368. job->alpha = 0;
  369. job->uv_alpha = 0;
  370. // only one of both jobs can record the progress, since we don't
  371. // expect the user's hook to be multi-thread safe
  372. job->delta_progress = (start_row == 0) ? 20 : 0;
  373. }
  374. // main entry point
  375. int VP8EncAnalyze(VP8Encoder* const enc) {
  376. int ok = 1;
  377. const int do_segments =
  378. enc->config_->emulate_jpeg_size || // We need the complexity evaluation.
  379. (enc->segment_hdr_.num_segments_ > 1) ||
  380. (enc->method_ <= 1); // for method 0 - 1, we need preds_[] to be filled.
  381. if (do_segments) {
  382. const int last_row = enc->mb_h_;
  383. // We give a little more than a half work to the main thread.
  384. const int split_row = (9 * last_row + 15) >> 4;
  385. const int total_mb = last_row * enc->mb_w_;
  386. #ifdef WEBP_USE_THREAD
  387. const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it
  388. const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow);
  389. #else
  390. const int do_mt = 0;
  391. #endif
  392. const WebPWorkerInterface* const worker_interface =
  393. WebPGetWorkerInterface();
  394. SegmentJob main_job;
  395. if (do_mt) {
  396. SegmentJob side_job;
  397. // Note the use of '&' instead of '&&' because we must call the functions
  398. // no matter what.
  399. InitSegmentJob(enc, &main_job, 0, split_row);
  400. InitSegmentJob(enc, &side_job, split_row, last_row);
  401. // we don't need to call Reset() on main_job.worker, since we're calling
  402. // WebPWorkerExecute() on it
  403. ok &= worker_interface->Reset(&side_job.worker);
  404. // launch the two jobs in parallel
  405. if (ok) {
  406. worker_interface->Launch(&side_job.worker);
  407. worker_interface->Execute(&main_job.worker);
  408. ok &= worker_interface->Sync(&side_job.worker);
  409. ok &= worker_interface->Sync(&main_job.worker);
  410. }
  411. worker_interface->End(&side_job.worker);
  412. if (ok) MergeJobs(&side_job, &main_job); // merge results together
  413. } else {
  414. // Even for single-thread case, we use the generic Worker tools.
  415. InitSegmentJob(enc, &main_job, 0, last_row);
  416. worker_interface->Execute(&main_job.worker);
  417. ok &= worker_interface->Sync(&main_job.worker);
  418. }
  419. worker_interface->End(&main_job.worker);
  420. if (ok) {
  421. enc->alpha_ = main_job.alpha / total_mb;
  422. enc->uv_alpha_ = main_job.uv_alpha / total_mb;
  423. AssignSegments(enc, main_job.alphas);
  424. }
  425. } else { // Use only one default segment.
  426. ResetAllMBInfo(enc);
  427. }
  428. return ok;
  429. }