123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- //===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file implements the __udivsi3 (32-bit unsigned integer divide)
- // function for the ARM 32-bit architecture.
- //
- //===----------------------------------------------------------------------===//
- #include "../assembly.h"
- .syntax unified
- .text
- DEFINE_CODE_STATE
- .p2align 2
- DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
- @ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
- @ Calculate and return the quotient of the (unsigned) division.
- DEFINE_COMPILERRT_FUNCTION(__udivsi3)
- #if __ARM_ARCH_EXT_IDIV__
- tst r1, r1
- beq LOCAL_LABEL(divby0)
- udiv r0, r0, r1
- bx lr
- LOCAL_LABEL(divby0):
- // Use movs for compatibility with v8-m.base.
- movs r0, #0
- # ifdef __ARM_EABI__
- b __aeabi_idiv0
- # else
- JMP(lr)
- # endif
- #else // ! __ARM_ARCH_EXT_IDIV__
- cmp r1, #1
- bcc LOCAL_LABEL(divby0)
- #if defined(USE_THUMB_1)
- bne LOCAL_LABEL(num_neq_denom)
- JMP(lr)
- LOCAL_LABEL(num_neq_denom):
- #else
- IT(eq)
- JMPc(lr, eq)
- #endif
- cmp r0, r1
- #if defined(USE_THUMB_1)
- bhs LOCAL_LABEL(num_ge_denom)
- movs r0, #0
- JMP(lr)
- LOCAL_LABEL(num_ge_denom):
- #else
- ITT(cc)
- movcc r0, #0
- JMPc(lr, cc)
- #endif
- // Implement division using binary long division algorithm.
- //
- // r0 is the numerator, r1 the denominator.
- //
- // The code before JMP computes the correct shift I, so that
- // r0 and (r1 << I) have the highest bit set in the same position.
- // At the time of JMP, ip := .Ldiv0block - 12 * I.
- // This depends on the fixed instruction size of block.
- // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
- //
- // block(shift) implements the test-and-update-quotient core.
- // It assumes (r0 << shift) can be computed without overflow and
- // that (r0 << shift) < 2 * r1. The quotient is stored in r3.
- # if defined(__ARM_FEATURE_CLZ)
- clz ip, r0
- clz r3, r1
- // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
- sub r3, r3, ip
- # if defined(USE_THUMB_2)
- adr ip, LOCAL_LABEL(div0block) + 1
- sub ip, ip, r3, lsl #1
- # else
- adr ip, LOCAL_LABEL(div0block)
- # endif
- sub ip, ip, r3, lsl #2
- sub ip, ip, r3, lsl #3
- mov r3, #0
- bx ip
- # else // No CLZ Feature
- # if defined(USE_THUMB_2)
- # error THUMB mode requires CLZ or UDIV
- # endif
- # if defined(USE_THUMB_1)
- # define BLOCK_SIZE 10
- # else
- # define BLOCK_SIZE 12
- # endif
- mov r2, r0
- # if defined(USE_THUMB_1)
- mov ip, r0
- adr r0, LOCAL_LABEL(div0block)
- adds r0, #1
- # else
- adr ip, LOCAL_LABEL(div0block)
- # endif
- lsrs r3, r2, #16
- cmp r3, r1
- # if defined(USE_THUMB_1)
- blo LOCAL_LABEL(skip_16)
- movs r2, r3
- subs r0, r0, #(16 * BLOCK_SIZE)
- LOCAL_LABEL(skip_16):
- # else
- movhs r2, r3
- subhs ip, ip, #(16 * BLOCK_SIZE)
- # endif
- lsrs r3, r2, #8
- cmp r3, r1
- # if defined(USE_THUMB_1)
- blo LOCAL_LABEL(skip_8)
- movs r2, r3
- subs r0, r0, #(8 * BLOCK_SIZE)
- LOCAL_LABEL(skip_8):
- # else
- movhs r2, r3
- subhs ip, ip, #(8 * BLOCK_SIZE)
- # endif
- lsrs r3, r2, #4
- cmp r3, r1
- # if defined(USE_THUMB_1)
- blo LOCAL_LABEL(skip_4)
- movs r2, r3
- subs r0, r0, #(4 * BLOCK_SIZE)
- LOCAL_LABEL(skip_4):
- # else
- movhs r2, r3
- subhs ip, #(4 * BLOCK_SIZE)
- # endif
- lsrs r3, r2, #2
- cmp r3, r1
- # if defined(USE_THUMB_1)
- blo LOCAL_LABEL(skip_2)
- movs r2, r3
- subs r0, r0, #(2 * BLOCK_SIZE)
- LOCAL_LABEL(skip_2):
- # else
- movhs r2, r3
- subhs ip, ip, #(2 * BLOCK_SIZE)
- # endif
- // Last block, no need to update r2 or r3.
- # if defined(USE_THUMB_1)
- lsrs r3, r2, #1
- cmp r3, r1
- blo LOCAL_LABEL(skip_1)
- subs r0, r0, #(1 * BLOCK_SIZE)
- LOCAL_LABEL(skip_1):
- movs r2, r0
- mov r0, ip
- movs r3, #0
- JMP (r2)
- # else
- cmp r1, r2, lsr #1
- subls ip, ip, #(1 * BLOCK_SIZE)
- movs r3, #0
- JMP(ip)
- # endif
- # endif // __ARM_FEATURE_CLZ
- #define IMM #
- // due to the range limit of branch in Thumb1, we have to place the
- // block closer
- LOCAL_LABEL(divby0):
- movs r0, #0
- # if defined(__ARM_EABI__)
- push {r7, lr}
- bl __aeabi_idiv0 // due to relocation limit, can't use b.
- pop {r7, pc}
- # else
- JMP(lr)
- # endif
- #if defined(USE_THUMB_1)
- #define block(shift) \
- lsls r2, r1, IMM shift; \
- cmp r0, r2; \
- blo LOCAL_LABEL(block_skip_##shift); \
- subs r0, r0, r2; \
- LOCAL_LABEL(block_skip_##shift) :; \
- adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2.
- // TODO: if current location counter is not word aligned, we don't
- // need the .p2align and nop
- // Label div0block must be word-aligned. First align block 31
- .p2align 2
- nop // Padding to align div0block as 31 blocks = 310 bytes
- #else
- #define block(shift) \
- cmp r0, r1, lsl IMM shift; \
- ITT(hs); \
- WIDE(addhs) r3, r3, IMM (1 << shift); \
- WIDE(subhs) r0, r0, r1, lsl IMM shift
- #endif
- block(31)
- block(30)
- block(29)
- block(28)
- block(27)
- block(26)
- block(25)
- block(24)
- block(23)
- block(22)
- block(21)
- block(20)
- block(19)
- block(18)
- block(17)
- block(16)
- block(15)
- block(14)
- block(13)
- block(12)
- block(11)
- block(10)
- block(9)
- block(8)
- block(7)
- block(6)
- block(5)
- block(4)
- block(3)
- block(2)
- block(1)
- LOCAL_LABEL(div0block):
- block(0)
- mov r0, r3
- JMP(lr)
- #endif // __ARM_ARCH_EXT_IDIV__
- END_COMPILERRT_FUNCTION(__udivsi3)
- NO_EXEC_STACK_DIRECTIVE
|