llvm-project
158 строк · 4.9 Кб
1//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements __udivmodti4 for the compiler_rt library.
10//
11//===----------------------------------------------------------------------===//
12
13#include "int_lib.h"14
15#ifdef CRT_HAS_128BIT16
17// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
18// Remainder stored in r.
19// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
20// fallback. For a correctness proof see the reference for this algorithm
21// in Knuth, Volume 2, section 4.3.1, Algorithm D.
22UNUSED
23static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,24du_int *r) {25const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;26const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)27du_int un1, un0; // Norm. dividend LSD's28du_int vn1, vn0; // Norm. divisor digits29du_int q1, q0; // Quotient digits30du_int un64, un21, un10; // Dividend digit pairs31du_int rhat; // A remainder32si_int s; // Shift amount for normalization33
34s = __builtin_clzll(v);35if (s > 0) {36// Normalize the divisor.37v = v << s;38un64 = (u1 << s) | (u0 >> (n_udword_bits - s));39un10 = u0 << s; // Shift dividend left40} else {41// Avoid undefined behavior of (u0 >> 64).42un64 = u1;43un10 = u0;44}45
46// Break divisor up into two 32-bit digits.47vn1 = v >> (n_udword_bits / 2);48vn0 = v & 0xFFFFFFFF;49
50// Break right half of dividend into two digits.51un1 = un10 >> (n_udword_bits / 2);52un0 = un10 & 0xFFFFFFFF;53
54// Compute the first quotient digit, q1.55q1 = un64 / vn1;56rhat = un64 - q1 * vn1;57
58// q1 has at most error 2. No more than 2 iterations.59while (q1 >= b || q1 * vn0 > b * rhat + un1) {60q1 = q1 - 1;61rhat = rhat + vn1;62if (rhat >= b)63break;64}65
66un21 = un64 * b + un1 - q1 * v;67
68// Compute the second quotient digit.69q0 = un21 / vn1;70rhat = un21 - q0 * vn1;71
72// q0 has at most error 2. No more than 2 iterations.73while (q0 >= b || q0 * vn0 > b * rhat + un0) {74q0 = q0 - 1;75rhat = rhat + vn1;76if (rhat >= b)77break;78}79
80*r = (un21 * b + un0 - q0 * v) >> s;81return q1 * b + q0;82}
83
84static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,85du_int *r) {86#if defined(__x86_64__)87du_int result;88__asm__("divq %[v]"89: "=a"(result), "=d"(*r)90: [ v ] "r"(v), "a"(u0), "d"(u1));91return result;92#else93return udiv128by64to64default(u1, u0, v, r);94#endif95}
96
97// Effects: if rem != 0, *rem = a % b
98// Returns: a / b
99
100COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {101const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;102utwords dividend;103dividend.all = a;104utwords divisor;105divisor.all = b;106utwords quotient;107utwords remainder;108if (divisor.all > dividend.all) {109if (rem)110*rem = dividend.all;111return 0;112}113// When the divisor fits in 64 bits, we can use an optimized path.114if (divisor.s.high == 0) {115remainder.s.high = 0;116if (dividend.s.high < divisor.s.low) {117// The result fits in 64 bits.118quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,119divisor.s.low, &remainder.s.low);120quotient.s.high = 0;121} else {122// First, divide with the high part to get the remainder in dividend.s.high.123// After that dividend.s.high < divisor.s.low.124quotient.s.high = dividend.s.high / divisor.s.low;125dividend.s.high = dividend.s.high % divisor.s.low;126quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,127divisor.s.low, &remainder.s.low);128}129if (rem)130*rem = remainder.all;131return quotient.all;132}133// 0 <= shift <= 63.134si_int shift =135__builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);136divisor.all <<= shift;137quotient.s.high = 0;138quotient.s.low = 0;139for (; shift >= 0; --shift) {140quotient.s.low <<= 1;141// Branch free version of.142// if (dividend.all >= divisor.all)143// {144// dividend.all -= divisor.all;145// carry = 1;146// }147const ti_int s =148(ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);149quotient.s.low |= s & 1;150dividend.all -= divisor.all & s;151divisor.all >>= 1;152}153if (rem)154*rem = dividend.all;155return quotient.all;156}
157
158#endif // CRT_HAS_128BIT159