123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830 |
- // Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT.
- //go:build amd64 && !appengine && !noasm && gc
- // func decompress4x_main_loop_amd64(ctx *decompress4xContext)
- TEXT ·decompress4x_main_loop_amd64(SB), $0-8
- // Preload values
- MOVQ ctx+0(FP), AX
- MOVBQZX 8(AX), DI
- MOVQ 16(AX), BX
- MOVQ 48(AX), SI
- MOVQ 24(AX), R8
- MOVQ 32(AX), R9
- MOVQ (AX), R10
- // Main loop
- main_loop:
- XORL DX, DX
- CMPQ BX, SI
- SETGE DL
- // br0.fillFast32()
- MOVQ 32(R10), R11
- MOVBQZX 40(R10), R12
- CMPQ R12, $0x20
- JBE skip_fill0
- MOVQ 24(R10), AX
- SUBQ $0x20, R12
- SUBQ $0x04, AX
- MOVQ (R10), R13
- // b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R13*1), R13
- MOVQ R12, CX
- SHLQ CL, R13
- MOVQ AX, 24(R10)
- ORQ R13, R11
- // exhausted += (br0.off < 4)
- CMPQ AX, $0x04
- ADCB $+0, DL
- skip_fill0:
- // val0 := br0.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v0 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br0.advance(uint8(v0.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- // val1 := br0.peekTopBits(peekBits)
- MOVQ DI, CX
- MOVQ R11, R13
- SHRQ CL, R13
- // v1 := table[val1&mask]
- MOVW (R9)(R13*2), CX
- // br0.advance(uint8(v1.entry))
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- // these two writes get coalesced
- // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
- // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- MOVW AX, (BX)
- // update the bitreader structure
- MOVQ R11, 32(R10)
- MOVB R12, 40(R10)
- // br1.fillFast32()
- MOVQ 80(R10), R11
- MOVBQZX 88(R10), R12
- CMPQ R12, $0x20
- JBE skip_fill1
- MOVQ 72(R10), AX
- SUBQ $0x20, R12
- SUBQ $0x04, AX
- MOVQ 48(R10), R13
- // b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R13*1), R13
- MOVQ R12, CX
- SHLQ CL, R13
- MOVQ AX, 72(R10)
- ORQ R13, R11
- // exhausted += (br1.off < 4)
- CMPQ AX, $0x04
- ADCB $+0, DL
- skip_fill1:
- // val0 := br1.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v0 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br1.advance(uint8(v0.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- // val1 := br1.peekTopBits(peekBits)
- MOVQ DI, CX
- MOVQ R11, R13
- SHRQ CL, R13
- // v1 := table[val1&mask]
- MOVW (R9)(R13*2), CX
- // br1.advance(uint8(v1.entry))
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- // these two writes get coalesced
- // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
- // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- MOVW AX, (BX)(R8*1)
- // update the bitreader structure
- MOVQ R11, 80(R10)
- MOVB R12, 88(R10)
- // br2.fillFast32()
- MOVQ 128(R10), R11
- MOVBQZX 136(R10), R12
- CMPQ R12, $0x20
- JBE skip_fill2
- MOVQ 120(R10), AX
- SUBQ $0x20, R12
- SUBQ $0x04, AX
- MOVQ 96(R10), R13
- // b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R13*1), R13
- MOVQ R12, CX
- SHLQ CL, R13
- MOVQ AX, 120(R10)
- ORQ R13, R11
- // exhausted += (br2.off < 4)
- CMPQ AX, $0x04
- ADCB $+0, DL
- skip_fill2:
- // val0 := br2.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v0 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br2.advance(uint8(v0.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- // val1 := br2.peekTopBits(peekBits)
- MOVQ DI, CX
- MOVQ R11, R13
- SHRQ CL, R13
- // v1 := table[val1&mask]
- MOVW (R9)(R13*2), CX
- // br2.advance(uint8(v1.entry))
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- // these two writes get coalesced
- // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
- // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- MOVW AX, (BX)(R8*2)
- // update the bitreader structure
- MOVQ R11, 128(R10)
- MOVB R12, 136(R10)
- // br3.fillFast32()
- MOVQ 176(R10), R11
- MOVBQZX 184(R10), R12
- CMPQ R12, $0x20
- JBE skip_fill3
- MOVQ 168(R10), AX
- SUBQ $0x20, R12
- SUBQ $0x04, AX
- MOVQ 144(R10), R13
- // b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R13*1), R13
- MOVQ R12, CX
- SHLQ CL, R13
- MOVQ AX, 168(R10)
- ORQ R13, R11
- // exhausted += (br3.off < 4)
- CMPQ AX, $0x04
- ADCB $+0, DL
- skip_fill3:
- // val0 := br3.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v0 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br3.advance(uint8(v0.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- // val1 := br3.peekTopBits(peekBits)
- MOVQ DI, CX
- MOVQ R11, R13
- SHRQ CL, R13
- // v1 := table[val1&mask]
- MOVW (R9)(R13*2), CX
- // br3.advance(uint8(v1.entry))
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- // these two writes get coalesced
- // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
- // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- LEAQ (R8)(R8*2), CX
- MOVW AX, (BX)(CX*1)
- // update the bitreader structure
- MOVQ R11, 176(R10)
- MOVB R12, 184(R10)
- ADDQ $0x02, BX
- TESTB DL, DL
- JZ main_loop
- MOVQ ctx+0(FP), AX
- SUBQ 16(AX), BX
- SHLQ $0x02, BX
- MOVQ BX, 40(AX)
- RET
- // func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
- TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
- // Preload values
- MOVQ ctx+0(FP), CX
- MOVBQZX 8(CX), DI
- MOVQ 16(CX), BX
- MOVQ 48(CX), SI
- MOVQ 24(CX), R8
- MOVQ 32(CX), R9
- MOVQ (CX), R10
- // Main loop
- main_loop:
- XORL DX, DX
- CMPQ BX, SI
- SETGE DL
- // br0.fillFast32()
- MOVQ 32(R10), R11
- MOVBQZX 40(R10), R12
- CMPQ R12, $0x20
- JBE skip_fill0
- MOVQ 24(R10), R13
- SUBQ $0x20, R12
- SUBQ $0x04, R13
- MOVQ (R10), R14
- // b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R13)(R14*1), R14
- MOVQ R12, CX
- SHLQ CL, R14
- MOVQ R13, 24(R10)
- ORQ R14, R11
- // exhausted += (br0.off < 4)
- CMPQ R13, $0x04
- ADCB $+0, DL
- skip_fill0:
- // val0 := br0.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v0 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br0.advance(uint8(v0.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- // val1 := br0.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v1 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br0.advance(uint8(v1.entry)
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- BSWAPL AX
- // val2 := br0.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v2 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br0.advance(uint8(v2.entry)
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- // val3 := br0.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v3 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br0.advance(uint8(v3.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- BSWAPL AX
- // these four writes get coalesced
- // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
- // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- // out[id * dstEvery + 3] = uint8(v2.entry >> 8)
- // out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- MOVL AX, (BX)
- // update the bitreader structure
- MOVQ R11, 32(R10)
- MOVB R12, 40(R10)
- // br1.fillFast32()
- MOVQ 80(R10), R11
- MOVBQZX 88(R10), R12
- CMPQ R12, $0x20
- JBE skip_fill1
- MOVQ 72(R10), R13
- SUBQ $0x20, R12
- SUBQ $0x04, R13
- MOVQ 48(R10), R14
- // b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R13)(R14*1), R14
- MOVQ R12, CX
- SHLQ CL, R14
- MOVQ R13, 72(R10)
- ORQ R14, R11
- // exhausted += (br1.off < 4)
- CMPQ R13, $0x04
- ADCB $+0, DL
- skip_fill1:
- // val0 := br1.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v0 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br1.advance(uint8(v0.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- // val1 := br1.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v1 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br1.advance(uint8(v1.entry)
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- BSWAPL AX
- // val2 := br1.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v2 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br1.advance(uint8(v2.entry)
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- // val3 := br1.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v3 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br1.advance(uint8(v3.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- BSWAPL AX
- // these four writes get coalesced
- // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
- // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- // out[id * dstEvery + 3] = uint8(v2.entry >> 8)
- // out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- MOVL AX, (BX)(R8*1)
- // update the bitreader structure
- MOVQ R11, 80(R10)
- MOVB R12, 88(R10)
- // br2.fillFast32()
- MOVQ 128(R10), R11
- MOVBQZX 136(R10), R12
- CMPQ R12, $0x20
- JBE skip_fill2
- MOVQ 120(R10), R13
- SUBQ $0x20, R12
- SUBQ $0x04, R13
- MOVQ 96(R10), R14
- // b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R13)(R14*1), R14
- MOVQ R12, CX
- SHLQ CL, R14
- MOVQ R13, 120(R10)
- ORQ R14, R11
- // exhausted += (br2.off < 4)
- CMPQ R13, $0x04
- ADCB $+0, DL
- skip_fill2:
- // val0 := br2.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v0 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br2.advance(uint8(v0.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- // val1 := br2.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v1 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br2.advance(uint8(v1.entry)
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- BSWAPL AX
- // val2 := br2.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v2 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br2.advance(uint8(v2.entry)
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- // val3 := br2.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v3 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br2.advance(uint8(v3.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- BSWAPL AX
- // these four writes get coalesced
- // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
- // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- // out[id * dstEvery + 3] = uint8(v2.entry >> 8)
- // out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- MOVL AX, (BX)(R8*2)
- // update the bitreader structure
- MOVQ R11, 128(R10)
- MOVB R12, 136(R10)
- // br3.fillFast32()
- MOVQ 176(R10), R11
- MOVBQZX 184(R10), R12
- CMPQ R12, $0x20
- JBE skip_fill3
- MOVQ 168(R10), R13
- SUBQ $0x20, R12
- SUBQ $0x04, R13
- MOVQ 144(R10), R14
- // b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R13)(R14*1), R14
- MOVQ R12, CX
- SHLQ CL, R14
- MOVQ R13, 168(R10)
- ORQ R14, R11
- // exhausted += (br3.off < 4)
- CMPQ R13, $0x04
- ADCB $+0, DL
- skip_fill3:
- // val0 := br3.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v0 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br3.advance(uint8(v0.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- // val1 := br3.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v1 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br3.advance(uint8(v1.entry)
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- BSWAPL AX
- // val2 := br3.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v2 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br3.advance(uint8(v2.entry)
- MOVB CH, AH
- SHLQ CL, R11
- ADDB CL, R12
- // val3 := br3.peekTopBits(peekBits)
- MOVQ R11, R13
- MOVQ DI, CX
- SHRQ CL, R13
- // v3 := table[val0&mask]
- MOVW (R9)(R13*2), CX
- // br3.advance(uint8(v3.entry)
- MOVB CH, AL
- SHLQ CL, R11
- ADDB CL, R12
- BSWAPL AX
- // these four writes get coalesced
- // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
- // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- // out[id * dstEvery + 3] = uint8(v2.entry >> 8)
- // out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- LEAQ (R8)(R8*2), CX
- MOVL AX, (BX)(CX*1)
- // update the bitreader structure
- MOVQ R11, 176(R10)
- MOVB R12, 184(R10)
- ADDQ $0x04, BX
- TESTB DL, DL
- JZ main_loop
- MOVQ ctx+0(FP), AX
- SUBQ 16(AX), BX
- SHLQ $0x02, BX
- MOVQ BX, 40(AX)
- RET
- // func decompress1x_main_loop_amd64(ctx *decompress1xContext)
- TEXT ·decompress1x_main_loop_amd64(SB), $0-8
- MOVQ ctx+0(FP), CX
- MOVQ 16(CX), DX
- MOVQ 24(CX), BX
- CMPQ BX, $0x04
- JB error_max_decoded_size_exceeded
- LEAQ (DX)(BX*1), BX
- MOVQ (CX), SI
- MOVQ (SI), R8
- MOVQ 24(SI), R9
- MOVQ 32(SI), R10
- MOVBQZX 40(SI), R11
- MOVQ 32(CX), SI
- MOVBQZX 8(CX), DI
- JMP loop_condition
- main_loop:
- // Check if we have room for 4 bytes in the output buffer
- LEAQ 4(DX), CX
- CMPQ CX, BX
- JGE error_max_decoded_size_exceeded
- // Decode 4 values
- CMPQ R11, $0x20
- JL bitReader_fillFast_1_end
- SUBQ $0x20, R11
- SUBQ $0x04, R9
- MOVL (R8)(R9*1), R12
- MOVQ R11, CX
- SHLQ CL, R12
- ORQ R12, R10
- bitReader_fillFast_1_end:
- MOVQ DI, CX
- MOVQ R10, R12
- SHRQ CL, R12
- MOVW (SI)(R12*2), CX
- MOVB CH, AL
- MOVBQZX CL, CX
- ADDQ CX, R11
- SHLQ CL, R10
- MOVQ DI, CX
- MOVQ R10, R12
- SHRQ CL, R12
- MOVW (SI)(R12*2), CX
- MOVB CH, AH
- MOVBQZX CL, CX
- ADDQ CX, R11
- SHLQ CL, R10
- BSWAPL AX
- CMPQ R11, $0x20
- JL bitReader_fillFast_2_end
- SUBQ $0x20, R11
- SUBQ $0x04, R9
- MOVL (R8)(R9*1), R12
- MOVQ R11, CX
- SHLQ CL, R12
- ORQ R12, R10
- bitReader_fillFast_2_end:
- MOVQ DI, CX
- MOVQ R10, R12
- SHRQ CL, R12
- MOVW (SI)(R12*2), CX
- MOVB CH, AH
- MOVBQZX CL, CX
- ADDQ CX, R11
- SHLQ CL, R10
- MOVQ DI, CX
- MOVQ R10, R12
- SHRQ CL, R12
- MOVW (SI)(R12*2), CX
- MOVB CH, AL
- MOVBQZX CL, CX
- ADDQ CX, R11
- SHLQ CL, R10
- BSWAPL AX
- // Store the decoded values
- MOVL AX, (DX)
- ADDQ $0x04, DX
- loop_condition:
- CMPQ R9, $0x08
- JGE main_loop
- // Update ctx structure
- MOVQ ctx+0(FP), AX
- SUBQ 16(AX), DX
- MOVQ DX, 40(AX)
- MOVQ (AX), AX
- MOVQ R9, 24(AX)
- MOVQ R10, 32(AX)
- MOVB R11, 40(AX)
- RET
- // Report error
- error_max_decoded_size_exceeded:
- MOVQ ctx+0(FP), AX
- MOVQ $-1, CX
- MOVQ CX, 40(AX)
- RET
- // func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
- // Requires: BMI2
- TEXT ·decompress1x_main_loop_bmi2(SB), $0-8
- MOVQ ctx+0(FP), CX
- MOVQ 16(CX), DX
- MOVQ 24(CX), BX
- CMPQ BX, $0x04
- JB error_max_decoded_size_exceeded
- LEAQ (DX)(BX*1), BX
- MOVQ (CX), SI
- MOVQ (SI), R8
- MOVQ 24(SI), R9
- MOVQ 32(SI), R10
- MOVBQZX 40(SI), R11
- MOVQ 32(CX), SI
- MOVBQZX 8(CX), DI
- JMP loop_condition
- main_loop:
- // Check if we have room for 4 bytes in the output buffer
- LEAQ 4(DX), CX
- CMPQ CX, BX
- JGE error_max_decoded_size_exceeded
- // Decode 4 values
- CMPQ R11, $0x20
- JL bitReader_fillFast_1_end
- SUBQ $0x20, R11
- SUBQ $0x04, R9
- MOVL (R8)(R9*1), CX
- SHLXQ R11, CX, CX
- ORQ CX, R10
- bitReader_fillFast_1_end:
- SHRXQ DI, R10, CX
- MOVW (SI)(CX*2), CX
- MOVB CH, AL
- MOVBQZX CL, CX
- ADDQ CX, R11
- SHLXQ CX, R10, R10
- SHRXQ DI, R10, CX
- MOVW (SI)(CX*2), CX
- MOVB CH, AH
- MOVBQZX CL, CX
- ADDQ CX, R11
- SHLXQ CX, R10, R10
- BSWAPL AX
- CMPQ R11, $0x20
- JL bitReader_fillFast_2_end
- SUBQ $0x20, R11
- SUBQ $0x04, R9
- MOVL (R8)(R9*1), CX
- SHLXQ R11, CX, CX
- ORQ CX, R10
- bitReader_fillFast_2_end:
- SHRXQ DI, R10, CX
- MOVW (SI)(CX*2), CX
- MOVB CH, AH
- MOVBQZX CL, CX
- ADDQ CX, R11
- SHLXQ CX, R10, R10
- SHRXQ DI, R10, CX
- MOVW (SI)(CX*2), CX
- MOVB CH, AL
- MOVBQZX CL, CX
- ADDQ CX, R11
- SHLXQ CX, R10, R10
- BSWAPL AX
- // Store the decoded values
- MOVL AX, (DX)
- ADDQ $0x04, DX
- loop_condition:
- CMPQ R9, $0x08
- JGE main_loop
- // Update ctx structure
- MOVQ ctx+0(FP), AX
- SUBQ 16(AX), DX
- MOVQ DX, 40(AX)
- MOVQ (AX), AX
- MOVQ R9, 24(AX)
- MOVQ R10, 32(AX)
- MOVB R11, 40(AX)
- RET
- // Report error
- error_max_decoded_size_exceeded:
- MOVQ ctx+0(FP), AX
- MOVQ $-1, CX
- MOVQ CX, 40(AX)
- RET
|