123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701 |
- // Copyright 2014 Google Inc. All Rights Reserved.
- //
- // Use of this source code is governed by a BSD-style license
- // that can be found in the COPYING file in the root of the source
- // tree. An additional intellectual property rights grant can be found
- // in the file PATENTS. All contributing project authors may
- // be found in the AUTHORS file in the root of the source tree.
- // -----------------------------------------------------------------------------
- //
- // Image transforms and color space conversion methods for lossless decoder.
- //
- // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
- // Jovan Zelincevic (jovan.zelincevic@imgtec.com)
- #include "./dsp.h"
- #if defined(WEBP_USE_MIPS_DSP_R2)
- #include "./lossless.h"
- #include "./lossless_common.h"
- #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
- static void FUNC_NAME(const TYPE* src, \
- const uint32_t* const color_map, \
- TYPE* dst, int y_start, int y_end, \
- int width) { \
- int y; \
- for (y = y_start; y < y_end; ++y) { \
- int x; \
- for (x = 0; x < (width >> 2); ++x) { \
- int tmp1, tmp2, tmp3, tmp4; \
- __asm__ volatile ( \
- ".ifc " #TYPE ", uint8_t \n\t" \
- "lbu %[tmp1], 0(%[src]) \n\t" \
- "lbu %[tmp2], 1(%[src]) \n\t" \
- "lbu %[tmp3], 2(%[src]) \n\t" \
- "lbu %[tmp4], 3(%[src]) \n\t" \
- "addiu %[src], %[src], 4 \n\t" \
- ".endif \n\t" \
- ".ifc " #TYPE ", uint32_t \n\t" \
- "lw %[tmp1], 0(%[src]) \n\t" \
- "lw %[tmp2], 4(%[src]) \n\t" \
- "lw %[tmp3], 8(%[src]) \n\t" \
- "lw %[tmp4], 12(%[src]) \n\t" \
- "ext %[tmp1], %[tmp1], 8, 8 \n\t" \
- "ext %[tmp2], %[tmp2], 8, 8 \n\t" \
- "ext %[tmp3], %[tmp3], 8, 8 \n\t" \
- "ext %[tmp4], %[tmp4], 8, 8 \n\t" \
- "addiu %[src], %[src], 16 \n\t" \
- ".endif \n\t" \
- "sll %[tmp1], %[tmp1], 2 \n\t" \
- "sll %[tmp2], %[tmp2], 2 \n\t" \
- "sll %[tmp3], %[tmp3], 2 \n\t" \
- "sll %[tmp4], %[tmp4], 2 \n\t" \
- "lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \
- "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \
- "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \
- "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \
- ".ifc " #TYPE ", uint8_t \n\t" \
- "ext %[tmp1], %[tmp1], 8, 8 \n\t" \
- "ext %[tmp2], %[tmp2], 8, 8 \n\t" \
- "ext %[tmp3], %[tmp3], 8, 8 \n\t" \
- "ext %[tmp4], %[tmp4], 8, 8 \n\t" \
- "sb %[tmp1], 0(%[dst]) \n\t" \
- "sb %[tmp2], 1(%[dst]) \n\t" \
- "sb %[tmp3], 2(%[dst]) \n\t" \
- "sb %[tmp4], 3(%[dst]) \n\t" \
- "addiu %[dst], %[dst], 4 \n\t" \
- ".endif \n\t" \
- ".ifc " #TYPE ", uint32_t \n\t" \
- "sw %[tmp1], 0(%[dst]) \n\t" \
- "sw %[tmp2], 4(%[dst]) \n\t" \
- "sw %[tmp3], 8(%[dst]) \n\t" \
- "sw %[tmp4], 12(%[dst]) \n\t" \
- "addiu %[dst], %[dst], 16 \n\t" \
- ".endif \n\t" \
- : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \
- [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \
- : [color_map]"r"(color_map) \
- : "memory" \
- ); \
- } \
- for (x = 0; x < (width & 3); ++x) { \
- *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
- } \
- } \
- }
- MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
- MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
- #undef MAP_COLOR_FUNCS
- static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
- uint32_t c2) {
- int temp0, temp1, temp2, temp3, temp4, temp5;
- __asm__ volatile (
- "preceu.ph.qbr %[temp1], %[c0] \n\t"
- "preceu.ph.qbl %[temp2], %[c0] \n\t"
- "preceu.ph.qbr %[temp3], %[c1] \n\t"
- "preceu.ph.qbl %[temp4], %[c1] \n\t"
- "preceu.ph.qbr %[temp5], %[c2] \n\t"
- "preceu.ph.qbl %[temp0], %[c2] \n\t"
- "subq.ph %[temp3], %[temp3], %[temp5] \n\t"
- "subq.ph %[temp4], %[temp4], %[temp0] \n\t"
- "addq.ph %[temp1], %[temp1], %[temp3] \n\t"
- "addq.ph %[temp2], %[temp2], %[temp4] \n\t"
- "shll_s.ph %[temp1], %[temp1], 7 \n\t"
- "shll_s.ph %[temp2], %[temp2], 7 \n\t"
- "precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t"
- : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
- [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)
- : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
- : "memory"
- );
- return temp2;
- }
- static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
- uint32_t c2) {
- int temp0, temp1, temp2, temp3, temp4, temp5;
- __asm__ volatile (
- "adduh.qb %[temp5], %[c0], %[c1] \n\t"
- "preceu.ph.qbr %[temp3], %[c2] \n\t"
- "preceu.ph.qbr %[temp1], %[temp5] \n\t"
- "preceu.ph.qbl %[temp2], %[temp5] \n\t"
- "preceu.ph.qbl %[temp4], %[c2] \n\t"
- "subq.ph %[temp3], %[temp1], %[temp3] \n\t"
- "subq.ph %[temp4], %[temp2], %[temp4] \n\t"
- "shrl.ph %[temp5], %[temp3], 15 \n\t"
- "shrl.ph %[temp0], %[temp4], 15 \n\t"
- "addq.ph %[temp3], %[temp3], %[temp5] \n\t"
- "addq.ph %[temp4], %[temp0], %[temp4] \n\t"
- "shra.ph %[temp3], %[temp3], 1 \n\t"
- "shra.ph %[temp4], %[temp4], 1 \n\t"
- "addq.ph %[temp1], %[temp1], %[temp3] \n\t"
- "addq.ph %[temp2], %[temp2], %[temp4] \n\t"
- "shll_s.ph %[temp1], %[temp1], 7 \n\t"
- "shll_s.ph %[temp2], %[temp2], 7 \n\t"
- "precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t"
- : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
- [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)
- : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
- : "memory"
- );
- return temp1;
- }
- static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
- int temp0, temp1, temp2, temp3, temp4, temp5;
- __asm__ volatile (
- "cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t"
- "pick.qb %[temp1], %[b], %[c] \n\t"
- "pick.qb %[temp2], %[c], %[b] \n\t"
- "cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t"
- "pick.qb %[temp4], %[a], %[c] \n\t"
- "pick.qb %[temp5], %[c], %[a] \n\t"
- "subu.qb %[temp3], %[temp1], %[temp2] \n\t"
- "subu.qb %[temp0], %[temp4], %[temp5] \n\t"
- "raddu.w.qb %[temp3], %[temp3] \n\t"
- "raddu.w.qb %[temp0], %[temp0] \n\t"
- "subu %[temp3], %[temp3], %[temp0] \n\t"
- "slti %[temp0], %[temp3], 0x1 \n\t"
- "movz %[a], %[b], %[temp0] \n\t"
- : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
- [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),
- [a]"+&r"(a)
- : [b]"r"(b), [c]"r"(c)
- );
- return a;
- }
- static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
- __asm__ volatile (
- "adduh.qb %[a0], %[a0], %[a1] \n\t"
- : [a0]"+r"(a0)
- : [a1]"r"(a1)
- );
- return a0;
- }
- static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
- return Average2(Average2(a0, a2), a1);
- }
- static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
- uint32_t a2, uint32_t a3) {
- return Average2(Average2(a0, a1), Average2(a2, a3));
- }
- static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
- const uint32_t* const top) {
- return Average3(*left, top[0], top[1]);
- }
- static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
- const uint32_t* const top) {
- return Average2(*left, top[-1]);
- }
- static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
- const uint32_t* const top) {
- return Average2(*left, top[0]);
- }
- static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
- const uint32_t* const top) {
- (void)left;
- return Average2(top[-1], top[0]);
- }
- static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
- const uint32_t* const top) {
- (void)left;
- return Average2(top[0], top[1]);
- }
- static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
- const uint32_t* const top) {
- return Average4(*left, top[-1], top[0], top[1]);
- }
- static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
- const uint32_t* const top) {
- return Select(top[0], *left, top[-1]);
- }
- static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
- const uint32_t* const top) {
- return ClampedAddSubtractFull(*left, top[0], top[-1]);
- }
- static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
- const uint32_t* const top) {
- return ClampedAddSubtractHalf(*left, top[0], top[-1]);
- }
- // Add green to blue and red channels (i.e. perform the inverse transform of
- // 'subtract green').
- static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels,
- uint32_t* dst) {
- uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
- const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
- const uint32_t* const p_loop2_end = src + num_pixels;
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "beq %[src], %[p_loop1_end], 3f \n\t"
- " nop \n\t"
- "0: \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "lw %[temp1], 4(%[src]) \n\t"
- "lw %[temp2], 8(%[src]) \n\t"
- "lw %[temp3], 12(%[src]) \n\t"
- "ext %[temp4], %[temp0], 8, 8 \n\t"
- "ext %[temp5], %[temp1], 8, 8 \n\t"
- "ext %[temp6], %[temp2], 8, 8 \n\t"
- "ext %[temp7], %[temp3], 8, 8 \n\t"
- "addiu %[src], %[src], 16 \n\t"
- "addiu %[dst], %[dst], 16 \n\t"
- "replv.ph %[temp4], %[temp4] \n\t"
- "replv.ph %[temp5], %[temp5] \n\t"
- "replv.ph %[temp6], %[temp6] \n\t"
- "replv.ph %[temp7], %[temp7] \n\t"
- "addu.qb %[temp0], %[temp0], %[temp4] \n\t"
- "addu.qb %[temp1], %[temp1], %[temp5] \n\t"
- "addu.qb %[temp2], %[temp2], %[temp6] \n\t"
- "addu.qb %[temp3], %[temp3], %[temp7] \n\t"
- "sw %[temp0], -16(%[dst]) \n\t"
- "sw %[temp1], -12(%[dst]) \n\t"
- "sw %[temp2], -8(%[dst]) \n\t"
- "bne %[src], %[p_loop1_end], 0b \n\t"
- " sw %[temp3], -4(%[dst]) \n\t"
- "3: \n\t"
- "beq %[src], %[p_loop2_end], 2f \n\t"
- " nop \n\t"
- "1: \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "addiu %[src], %[src], 4 \n\t"
- "addiu %[dst], %[dst], 4 \n\t"
- "ext %[temp4], %[temp0], 8, 8 \n\t"
- "replv.ph %[temp4], %[temp4] \n\t"
- "addu.qb %[temp0], %[temp0], %[temp4] \n\t"
- "bne %[src], %[p_loop2_end], 1b \n\t"
- " sw %[temp0], -4(%[dst]) \n\t"
- "2: \n\t"
- ".set pop \n\t"
- : [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
- [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
- [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
- [temp7]"=&r"(temp7)
- : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
- : "memory"
- );
- }
- static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m,
- const uint32_t* src, int num_pixels,
- uint32_t* dst) {
- int temp0, temp1, temp2, temp3, temp4, temp5;
- uint32_t argb, argb1, new_red;
- const uint32_t G_to_R = m->green_to_red_;
- const uint32_t G_to_B = m->green_to_blue_;
- const uint32_t R_to_B = m->red_to_blue_;
- const uint32_t* const p_loop_end = src + (num_pixels & ~1);
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "beq %[src], %[p_loop_end], 1f \n\t"
- " nop \n\t"
- "replv.ph %[temp0], %[G_to_R] \n\t"
- "replv.ph %[temp1], %[G_to_B] \n\t"
- "replv.ph %[temp2], %[R_to_B] \n\t"
- "shll.ph %[temp0], %[temp0], 8 \n\t"
- "shll.ph %[temp1], %[temp1], 8 \n\t"
- "shll.ph %[temp2], %[temp2], 8 \n\t"
- "shra.ph %[temp0], %[temp0], 8 \n\t"
- "shra.ph %[temp1], %[temp1], 8 \n\t"
- "shra.ph %[temp2], %[temp2], 8 \n\t"
- "0: \n\t"
- "lw %[argb], 0(%[src]) \n\t"
- "lw %[argb1], 4(%[src]) \n\t"
- "sw %[argb], 0(%[dst]) \n\t"
- "sw %[argb1], 4(%[dst]) \n\t"
- "addiu %[src], %[src], 8 \n\t"
- "addiu %[dst], %[dst], 8 \n\t"
- "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
- "preceu.ph.qbra %[temp3], %[temp3] \n\t"
- "shll.ph %[temp3], %[temp3], 8 \n\t"
- "shra.ph %[temp3], %[temp3], 8 \n\t"
- "mul.ph %[temp5], %[temp3], %[temp0] \n\t"
- "mul.ph %[temp3], %[temp3], %[temp1] \n\t"
- "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"
- "ins %[argb1], %[argb], 16, 16 \n\t"
- "shra.ph %[temp5], %[temp5], 5 \n\t"
- "shra.ph %[temp3], %[temp3], 5 \n\t"
- "addu.ph %[new_red], %[new_red], %[temp5] \n\t"
- "addu.ph %[argb1], %[argb1], %[temp3] \n\t"
- "preceu.ph.qbra %[temp5], %[new_red] \n\t"
- "shll.ph %[temp4], %[temp5], 8 \n\t"
- "shra.ph %[temp4], %[temp4], 8 \n\t"
- "mul.ph %[temp4], %[temp4], %[temp2] \n\t"
- "sb %[temp5], -2(%[dst]) \n\t"
- "sra %[temp5], %[temp5], 16 \n\t"
- "shra.ph %[temp4], %[temp4], 5 \n\t"
- "addu.ph %[argb1], %[argb1], %[temp4] \n\t"
- "preceu.ph.qbra %[temp3], %[argb1] \n\t"
- "sb %[temp5], -6(%[dst]) \n\t"
- "sb %[temp3], -4(%[dst]) \n\t"
- "sra %[temp3], %[temp3], 16 \n\t"
- "bne %[src], %[p_loop_end], 0b \n\t"
- " sb %[temp3], -8(%[dst]) \n\t"
- "1: \n\t"
- ".set pop \n\t"
- : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
- [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
- [new_red]"=&r"(new_red), [argb]"=&r"(argb),
- [argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
- : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
- [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
- : "memory", "hi", "lo"
- );
- // Fall-back to C-version for left-overs.
- if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
- }
- static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src,
- int num_pixels, uint8_t* dst) {
- int temp0, temp1, temp2, temp3;
- const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
- const uint32_t* const p_loop2_end = src + num_pixels;
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "beq %[src], %[p_loop1_end], 3f \n\t"
- " nop \n\t"
- "0: \n\t"
- "lw %[temp3], 12(%[src]) \n\t"
- "lw %[temp2], 8(%[src]) \n\t"
- "lw %[temp1], 4(%[src]) \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "ins %[temp3], %[temp2], 24, 8 \n\t"
- "sll %[temp2], %[temp2], 8 \n\t"
- "rotr %[temp3], %[temp3], 16 \n\t"
- "ins %[temp2], %[temp1], 0, 16 \n\t"
- "sll %[temp1], %[temp1], 8 \n\t"
- "wsbh %[temp3], %[temp3] \n\t"
- "balign %[temp0], %[temp1], 1 \n\t"
- "wsbh %[temp2], %[temp2] \n\t"
- "wsbh %[temp0], %[temp0] \n\t"
- "usw %[temp3], 8(%[dst]) \n\t"
- "rotr %[temp0], %[temp0], 16 \n\t"
- "usw %[temp2], 4(%[dst]) \n\t"
- "addiu %[src], %[src], 16 \n\t"
- "usw %[temp0], 0(%[dst]) \n\t"
- "bne %[src], %[p_loop1_end], 0b \n\t"
- " addiu %[dst], %[dst], 12 \n\t"
- "3: \n\t"
- "beq %[src], %[p_loop2_end], 2f \n\t"
- " nop \n\t"
- "1: \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "addiu %[src], %[src], 4 \n\t"
- "wsbh %[temp1], %[temp0] \n\t"
- "addiu %[dst], %[dst], 3 \n\t"
- "ush %[temp1], -2(%[dst]) \n\t"
- "sra %[temp0], %[temp0], 16 \n\t"
- "bne %[src], %[p_loop2_end], 1b \n\t"
- " sb %[temp0], -3(%[dst]) \n\t"
- "2: \n\t"
- ".set pop \n\t"
- : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
- [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
- : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
- : "memory"
- );
- }
- static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src,
- int num_pixels, uint8_t* dst) {
- int temp0, temp1, temp2, temp3;
- const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
- const uint32_t* const p_loop2_end = src + num_pixels;
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "beq %[src], %[p_loop1_end], 3f \n\t"
- " nop \n\t"
- "0: \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "lw %[temp1], 4(%[src]) \n\t"
- "lw %[temp2], 8(%[src]) \n\t"
- "lw %[temp3], 12(%[src]) \n\t"
- "wsbh %[temp0], %[temp0] \n\t"
- "wsbh %[temp1], %[temp1] \n\t"
- "wsbh %[temp2], %[temp2] \n\t"
- "wsbh %[temp3], %[temp3] \n\t"
- "addiu %[src], %[src], 16 \n\t"
- "balign %[temp0], %[temp0], 1 \n\t"
- "balign %[temp1], %[temp1], 1 \n\t"
- "balign %[temp2], %[temp2], 1 \n\t"
- "balign %[temp3], %[temp3], 1 \n\t"
- "usw %[temp0], 0(%[dst]) \n\t"
- "usw %[temp1], 4(%[dst]) \n\t"
- "usw %[temp2], 8(%[dst]) \n\t"
- "usw %[temp3], 12(%[dst]) \n\t"
- "bne %[src], %[p_loop1_end], 0b \n\t"
- " addiu %[dst], %[dst], 16 \n\t"
- "3: \n\t"
- "beq %[src], %[p_loop2_end], 2f \n\t"
- " nop \n\t"
- "1: \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "wsbh %[temp0], %[temp0] \n\t"
- "addiu %[src], %[src], 4 \n\t"
- "balign %[temp0], %[temp0], 1 \n\t"
- "usw %[temp0], 0(%[dst]) \n\t"
- "bne %[src], %[p_loop2_end], 1b \n\t"
- " addiu %[dst], %[dst], 4 \n\t"
- "2: \n\t"
- ".set pop \n\t"
- : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
- [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
- : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
- : "memory"
- );
- }
- static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src,
- int num_pixels, uint8_t* dst) {
- int temp0, temp1, temp2, temp3, temp4, temp5;
- const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
- const uint32_t* const p_loop2_end = src + num_pixels;
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "beq %[src], %[p_loop1_end], 3f \n\t"
- " nop \n\t"
- "0: \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "lw %[temp1], 4(%[src]) \n\t"
- "lw %[temp2], 8(%[src]) \n\t"
- "lw %[temp3], 12(%[src]) \n\t"
- "ext %[temp4], %[temp0], 28, 4 \n\t"
- "ext %[temp5], %[temp0], 12, 4 \n\t"
- "ins %[temp0], %[temp4], 0, 4 \n\t"
- "ext %[temp4], %[temp1], 28, 4 \n\t"
- "ins %[temp0], %[temp5], 16, 4 \n\t"
- "ext %[temp5], %[temp1], 12, 4 \n\t"
- "ins %[temp1], %[temp4], 0, 4 \n\t"
- "ext %[temp4], %[temp2], 28, 4 \n\t"
- "ins %[temp1], %[temp5], 16, 4 \n\t"
- "ext %[temp5], %[temp2], 12, 4 \n\t"
- "ins %[temp2], %[temp4], 0, 4 \n\t"
- "ext %[temp4], %[temp3], 28, 4 \n\t"
- "ins %[temp2], %[temp5], 16, 4 \n\t"
- "ext %[temp5], %[temp3], 12, 4 \n\t"
- "ins %[temp3], %[temp4], 0, 4 \n\t"
- "precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t"
- "ins %[temp3], %[temp5], 16, 4 \n\t"
- "addiu %[src], %[src], 16 \n\t"
- "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t"
- #if (WEBP_SWAP_16BIT_CSP == 1)
- "usw %[temp1], 0(%[dst]) \n\t"
- "usw %[temp3], 4(%[dst]) \n\t"
- #else
- "wsbh %[temp1], %[temp1] \n\t"
- "wsbh %[temp3], %[temp3] \n\t"
- "usw %[temp1], 0(%[dst]) \n\t"
- "usw %[temp3], 4(%[dst]) \n\t"
- #endif
- "bne %[src], %[p_loop1_end], 0b \n\t"
- " addiu %[dst], %[dst], 8 \n\t"
- "3: \n\t"
- "beq %[src], %[p_loop2_end], 2f \n\t"
- " nop \n\t"
- "1: \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "ext %[temp4], %[temp0], 28, 4 \n\t"
- "ext %[temp5], %[temp0], 12, 4 \n\t"
- "ins %[temp0], %[temp4], 0, 4 \n\t"
- "ins %[temp0], %[temp5], 16, 4 \n\t"
- "addiu %[src], %[src], 4 \n\t"
- "precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t"
- #if (WEBP_SWAP_16BIT_CSP == 1)
- "ush %[temp0], 0(%[dst]) \n\t"
- #else
- "wsbh %[temp0], %[temp0] \n\t"
- "ush %[temp0], 0(%[dst]) \n\t"
- #endif
- "bne %[src], %[p_loop2_end], 1b \n\t"
- " addiu %[dst], %[dst], 2 \n\t"
- "2: \n\t"
- ".set pop \n\t"
- : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
- [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
- [dst]"+&r"(dst), [src]"+&r"(src)
- : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
- : "memory"
- );
- }
- static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src,
- int num_pixels, uint8_t* dst) {
- int temp0, temp1, temp2, temp3, temp4, temp5;
- const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
- const uint32_t* const p_loop2_end = src + num_pixels;
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "beq %[src], %[p_loop1_end], 3f \n\t"
- " nop \n\t"
- "0: \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "lw %[temp1], 4(%[src]) \n\t"
- "lw %[temp2], 8(%[src]) \n\t"
- "lw %[temp3], 12(%[src]) \n\t"
- "ext %[temp4], %[temp0], 8, 16 \n\t"
- "ext %[temp5], %[temp0], 5, 11 \n\t"
- "ext %[temp0], %[temp0], 3, 5 \n\t"
- "ins %[temp4], %[temp5], 0, 11 \n\t"
- "ext %[temp5], %[temp1], 5, 11 \n\t"
- "ins %[temp4], %[temp0], 0, 5 \n\t"
- "ext %[temp0], %[temp1], 8, 16 \n\t"
- "ext %[temp1], %[temp1], 3, 5 \n\t"
- "ins %[temp0], %[temp5], 0, 11 \n\t"
- "ext %[temp5], %[temp2], 5, 11 \n\t"
- "ins %[temp0], %[temp1], 0, 5 \n\t"
- "ext %[temp1], %[temp2], 8, 16 \n\t"
- "ext %[temp2], %[temp2], 3, 5 \n\t"
- "ins %[temp1], %[temp5], 0, 11 \n\t"
- "ext %[temp5], %[temp3], 5, 11 \n\t"
- "ins %[temp1], %[temp2], 0, 5 \n\t"
- "ext %[temp2], %[temp3], 8, 16 \n\t"
- "ext %[temp3], %[temp3], 3, 5 \n\t"
- "ins %[temp2], %[temp5], 0, 11 \n\t"
- "append %[temp0], %[temp4], 16 \n\t"
- "ins %[temp2], %[temp3], 0, 5 \n\t"
- "addiu %[src], %[src], 16 \n\t"
- "append %[temp2], %[temp1], 16 \n\t"
- #if (WEBP_SWAP_16BIT_CSP == 1)
- "usw %[temp0], 0(%[dst]) \n\t"
- "usw %[temp2], 4(%[dst]) \n\t"
- #else
- "wsbh %[temp0], %[temp0] \n\t"
- "wsbh %[temp2], %[temp2] \n\t"
- "usw %[temp0], 0(%[dst]) \n\t"
- "usw %[temp2], 4(%[dst]) \n\t"
- #endif
- "bne %[src], %[p_loop1_end], 0b \n\t"
- " addiu %[dst], %[dst], 8 \n\t"
- "3: \n\t"
- "beq %[src], %[p_loop2_end], 2f \n\t"
- " nop \n\t"
- "1: \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "ext %[temp4], %[temp0], 8, 16 \n\t"
- "ext %[temp5], %[temp0], 5, 11 \n\t"
- "ext %[temp0], %[temp0], 3, 5 \n\t"
- "ins %[temp4], %[temp5], 0, 11 \n\t"
- "addiu %[src], %[src], 4 \n\t"
- "ins %[temp4], %[temp0], 0, 5 \n\t"
- #if (WEBP_SWAP_16BIT_CSP == 1)
- "ush %[temp4], 0(%[dst]) \n\t"
- #else
- "wsbh %[temp4], %[temp4] \n\t"
- "ush %[temp4], 0(%[dst]) \n\t"
- #endif
- "bne %[src], %[p_loop2_end], 1b \n\t"
- " addiu %[dst], %[dst], 2 \n\t"
- "2: \n\t"
- ".set pop \n\t"
- : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
- [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
- [dst]"+&r"(dst), [src]"+&r"(src)
- : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
- : "memory"
- );
- }
- static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src,
- int num_pixels, uint8_t* dst) {
- int temp0, temp1, temp2, temp3;
- const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
- const uint32_t* const p_loop2_end = src + num_pixels;
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "beq %[src], %[p_loop1_end], 3f \n\t"
- " nop \n\t"
- "0: \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "lw %[temp1], 4(%[src]) \n\t"
- "lw %[temp2], 8(%[src]) \n\t"
- "lw %[temp3], 12(%[src]) \n\t"
- "ins %[temp0], %[temp1], 24, 8 \n\t"
- "sra %[temp1], %[temp1], 8 \n\t"
- "ins %[temp1], %[temp2], 16, 16 \n\t"
- "sll %[temp2], %[temp2], 8 \n\t"
- "balign %[temp3], %[temp2], 1 \n\t"
- "addiu %[src], %[src], 16 \n\t"
- "usw %[temp0], 0(%[dst]) \n\t"
- "usw %[temp1], 4(%[dst]) \n\t"
- "usw %[temp3], 8(%[dst]) \n\t"
- "bne %[src], %[p_loop1_end], 0b \n\t"
- " addiu %[dst], %[dst], 12 \n\t"
- "3: \n\t"
- "beq %[src], %[p_loop2_end], 2f \n\t"
- " nop \n\t"
- "1: \n\t"
- "lw %[temp0], 0(%[src]) \n\t"
- "addiu %[src], %[src], 4 \n\t"
- "addiu %[dst], %[dst], 3 \n\t"
- "ush %[temp0], -3(%[dst]) \n\t"
- "sra %[temp0], %[temp0], 16 \n\t"
- "bne %[src], %[p_loop2_end], 1b \n\t"
- " sb %[temp0], -1(%[dst]) \n\t"
- "2: \n\t"
- ".set pop \n\t"
- : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
- [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
- : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
- : "memory"
- );
- }
- //------------------------------------------------------------------------------
- // Entry point
- extern void VP8LDspInitMIPSdspR2(void);
- WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
- VP8LMapColor32b = MapARGB_MIPSdspR2;
- VP8LMapColor8b = MapAlpha_MIPSdspR2;
- VP8LPredictors[5] = Predictor5_MIPSdspR2;
- VP8LPredictors[6] = Predictor6_MIPSdspR2;
- VP8LPredictors[7] = Predictor7_MIPSdspR2;
- VP8LPredictors[8] = Predictor8_MIPSdspR2;
- VP8LPredictors[9] = Predictor9_MIPSdspR2;
- VP8LPredictors[10] = Predictor10_MIPSdspR2;
- VP8LPredictors[11] = Predictor11_MIPSdspR2;
- VP8LPredictors[12] = Predictor12_MIPSdspR2;
- VP8LPredictors[13] = Predictor13_MIPSdspR2;
- VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2;
- VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2;
- VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2;
- VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2;
- VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2;
- VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2;
- VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2;
- }
- #else // !WEBP_USE_MIPS_DSP_R2
- WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
- #endif // WEBP_USE_MIPS_DSP_R2
|