llvm-project

Форк
0
158 строк · 4.9 Кб
1
//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements __udivmodti4 for the compiler_rt library.
10
//
11
//===----------------------------------------------------------------------===//
12

13
#include "int_lib.h"
14

15
#ifdef CRT_HAS_128BIT
16

17
// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
18
// Remainder stored in r.
19
// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
20
// fallback. For a correctness proof see the reference for this algorithm
21
// in Knuth, Volume 2, section 4.3.1, Algorithm D.
22
UNUSED
23
static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
24
                                            du_int *r) {
25
  const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
26
  const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
27
  du_int un1, un0;                                // Norm. dividend LSD's
28
  du_int vn1, vn0;                                // Norm. divisor digits
29
  du_int q1, q0;                                  // Quotient digits
30
  du_int un64, un21, un10;                        // Dividend digit pairs
31
  du_int rhat;                                    // A remainder
32
  si_int s;                                       // Shift amount for normalization
33

34
  s = __builtin_clzll(v);
35
  if (s > 0) {
36
    // Normalize the divisor.
37
    v = v << s;
38
    un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
39
    un10 = u0 << s; // Shift dividend left
40
  } else {
41
    // Avoid undefined behavior of (u0 >> 64).
42
    un64 = u1;
43
    un10 = u0;
44
  }
45

46
  // Break divisor up into two 32-bit digits.
47
  vn1 = v >> (n_udword_bits / 2);
48
  vn0 = v & 0xFFFFFFFF;
49

50
  // Break right half of dividend into two digits.
51
  un1 = un10 >> (n_udword_bits / 2);
52
  un0 = un10 & 0xFFFFFFFF;
53

54
  // Compute the first quotient digit, q1.
55
  q1 = un64 / vn1;
56
  rhat = un64 - q1 * vn1;
57

58
  // q1 has at most error 2. No more than 2 iterations.
59
  while (q1 >= b || q1 * vn0 > b * rhat + un1) {
60
    q1 = q1 - 1;
61
    rhat = rhat + vn1;
62
    if (rhat >= b)
63
      break;
64
  }
65

66
  un21 = un64 * b + un1 - q1 * v;
67

68
  // Compute the second quotient digit.
69
  q0 = un21 / vn1;
70
  rhat = un21 - q0 * vn1;
71

72
  // q0 has at most error 2. No more than 2 iterations.
73
  while (q0 >= b || q0 * vn0 > b * rhat + un0) {
74
    q0 = q0 - 1;
75
    rhat = rhat + vn1;
76
    if (rhat >= b)
77
      break;
78
  }
79

80
  *r = (un21 * b + un0 - q0 * v) >> s;
81
  return q1 * b + q0;
82
}
83

84
static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
85
                                     du_int *r) {
86
#if defined(__x86_64__)
87
  du_int result;
88
  __asm__("divq %[v]"
89
          : "=a"(result), "=d"(*r)
90
          : [ v ] "r"(v), "a"(u0), "d"(u1));
91
  return result;
92
#else
93
  return udiv128by64to64default(u1, u0, v, r);
94
#endif
95
}
96

97
// Effects: if rem != 0, *rem = a % b
98
// Returns: a / b
99

100
COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
101
  const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
102
  utwords dividend;
103
  dividend.all = a;
104
  utwords divisor;
105
  divisor.all = b;
106
  utwords quotient;
107
  utwords remainder;
108
  if (divisor.all > dividend.all) {
109
    if (rem)
110
      *rem = dividend.all;
111
    return 0;
112
  }
113
  // When the divisor fits in 64 bits, we can use an optimized path.
114
  if (divisor.s.high == 0) {
115
    remainder.s.high = 0;
116
    if (dividend.s.high < divisor.s.low) {
117
      // The result fits in 64 bits.
118
      quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
119
                                       divisor.s.low, &remainder.s.low);
120
      quotient.s.high = 0;
121
    } else {
122
      // First, divide with the high part to get the remainder in dividend.s.high.
123
      // After that dividend.s.high < divisor.s.low.
124
      quotient.s.high = dividend.s.high / divisor.s.low;
125
      dividend.s.high = dividend.s.high % divisor.s.low;
126
      quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
127
                                       divisor.s.low, &remainder.s.low);
128
    }
129
    if (rem)
130
      *rem = remainder.all;
131
    return quotient.all;
132
  }
133
  // 0 <= shift <= 63.
134
  si_int shift =
135
      __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
136
  divisor.all <<= shift;
137
  quotient.s.high = 0;
138
  quotient.s.low = 0;
139
  for (; shift >= 0; --shift) {
140
    quotient.s.low <<= 1;
141
    // Branch free version of.
142
    // if (dividend.all >= divisor.all)
143
    // {
144
    //    dividend.all -= divisor.all;
145
    //    carry = 1;
146
    // }
147
    const ti_int s =
148
        (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
149
    quotient.s.low |= s & 1;
150
    dividend.all -= divisor.all & s;
151
    divisor.all >>= 1;
152
  }
153
  if (rem)
154
    *rem = dividend.all;
155
  return quotient.all;
156
}
157

158
#endif // CRT_HAS_128BIT
159

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.