123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178 |
- //===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file implements the __udivmodsi4 (32-bit unsigned integer divide and
- // modulus) function for the ARM 32-bit architecture.
- //
- //===----------------------------------------------------------------------===//
- #include "../assembly.h"
- .syntax unified
- .text
- DEFINE_CODE_STATE
- @ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor,
- @ unsigned int *remainder)
- @ Calculate the quotient and remainder of the (unsigned) division. The return
- @ value is the quotient, the remainder is placed in the variable.
- .p2align 2
- DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
- #if __ARM_ARCH_EXT_IDIV__
- tst r1, r1
- beq LOCAL_LABEL(divby0)
- mov r3, r0
- udiv r0, r3, r1
- mls r1, r0, r1, r3
- str r1, [r2]
- bx lr
- #else
- cmp r1, #1
- bcc LOCAL_LABEL(divby0)
- beq LOCAL_LABEL(divby1)
- cmp r0, r1
- bcc LOCAL_LABEL(quotient0)
- // Implement division using binary long division algorithm.
- //
- // r0 is the numerator, r1 the denominator.
- //
- // The code before JMP computes the correct shift I, so that
- // r0 and (r1 << I) have the highest bit set in the same position.
- // At the time of JMP, ip := .Ldiv0block - 12 * I.
- // This depends on the fixed instruction size of block.
- // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
- //
- // block(shift) implements the test-and-update-quotient core.
- // It assumes (r0 << shift) can be computed without overflow and
- // that (r0 << shift) < 2 * r1. The quotient is stored in r3.
- # ifdef __ARM_FEATURE_CLZ
- clz ip, r0
- clz r3, r1
- // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
- sub r3, r3, ip
- # if defined(USE_THUMB_2)
- adr ip, LOCAL_LABEL(div0block) + 1
- sub ip, ip, r3, lsl #1
- # else
- adr ip, LOCAL_LABEL(div0block)
- # endif
- sub ip, ip, r3, lsl #2
- sub ip, ip, r3, lsl #3
- mov r3, #0
- bx ip
- # else
- # if defined(USE_THUMB_2)
- # error THUMB mode requires CLZ or UDIV
- # endif
- str r4, [sp, #-8]!
- mov r4, r0
- adr ip, LOCAL_LABEL(div0block)
- lsr r3, r4, #16
- cmp r3, r1
- movhs r4, r3
- subhs ip, ip, #(16 * 12)
- lsr r3, r4, #8
- cmp r3, r1
- movhs r4, r3
- subhs ip, ip, #(8 * 12)
- lsr r3, r4, #4
- cmp r3, r1
- movhs r4, r3
- subhs ip, #(4 * 12)
- lsr r3, r4, #2
- cmp r3, r1
- movhs r4, r3
- subhs ip, ip, #(2 * 12)
- // Last block, no need to update r3 or r4.
- cmp r1, r4, lsr #1
- subls ip, ip, #(1 * 12)
- ldr r4, [sp], #8 // restore r4, we are done with it.
- mov r3, #0
- JMP(ip)
- # endif
- #define IMM #
- #define block(shift) \
- cmp r0, r1, lsl IMM shift; \
- ITT(hs); \
- WIDE(addhs) r3, r3, IMM (1 << shift); \
- WIDE(subhs) r0, r0, r1, lsl IMM shift
- block(31)
- block(30)
- block(29)
- block(28)
- block(27)
- block(26)
- block(25)
- block(24)
- block(23)
- block(22)
- block(21)
- block(20)
- block(19)
- block(18)
- block(17)
- block(16)
- block(15)
- block(14)
- block(13)
- block(12)
- block(11)
- block(10)
- block(9)
- block(8)
- block(7)
- block(6)
- block(5)
- block(4)
- block(3)
- block(2)
- block(1)
- LOCAL_LABEL(div0block):
- block(0)
- str r0, [r2]
- mov r0, r3
- JMP(lr)
- LOCAL_LABEL(quotient0):
- str r0, [r2]
- mov r0, #0
- JMP(lr)
- LOCAL_LABEL(divby1):
- mov r3, #0
- str r3, [r2]
- JMP(lr)
- #endif // __ARM_ARCH_EXT_IDIV__
- LOCAL_LABEL(divby0):
- mov r0, #0
- #ifdef __ARM_EABI__
- b __aeabi_idiv0
- #else
- JMP(lr)
- #endif
- END_COMPILERRT_FUNCTION(__udivmodsi4)
- NO_EXEC_STACK_DIRECTIVE
|