/Users/buildslave/jenkins/workspace/coverage/llvm-project/libcxx/src/include/ryu/d2s_intrinsics.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===----------------------------------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | // Copyright (c) Microsoft Corporation. |
10 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
11 | | |
12 | | // Copyright 2018 Ulf Adams |
13 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
14 | | |
15 | | // Boost Software License - Version 1.0 - August 17th, 2003 |
16 | | |
17 | | // Permission is hereby granted, free of charge, to any person or organization |
18 | | // obtaining a copy of the software and accompanying documentation covered by |
19 | | // this license (the "Software") to use, reproduce, display, distribute, |
20 | | // execute, and transmit the Software, and to prepare derivative works of the |
21 | | // Software, and to permit third-parties to whom the Software is furnished to |
22 | | // do so, all subject to the following: |
23 | | |
24 | | // The copyright notices in the Software and this entire statement, including |
25 | | // the above license grant, this restriction and the following disclaimer, |
26 | | // must be included in all copies of the Software, in whole or in part, and |
27 | | // all derivative works of the Software, unless such copies or derivative |
28 | | // works are solely in the form of machine-executable object code generated by |
29 | | // a source language processor. |
30 | | |
31 | | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
32 | | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
33 | | // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT |
34 | | // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE |
35 | | // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, |
36 | | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
37 | | // DEALINGS IN THE SOFTWARE. |
38 | | |
39 | | #ifndef _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H |
40 | | #define _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H |
41 | | |
42 | | // Avoid formatting to keep the changes with the original code minimal. |
43 | | // clang-format off |
44 | | |
45 | | #include <__assert> |
46 | | #include <__config> |
47 | | |
48 | | #include "include/ryu/ryu.h" |
49 | | |
50 | | _LIBCPP_BEGIN_NAMESPACE_STD |
51 | | |
52 | | #if defined(_M_X64) && defined(_MSC_VER) |
53 | | #define _LIBCPP_INTRINSIC128 1 |
54 | | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { |
55 | | return _umul128(__a, __b, __productHi); |
56 | | } |
57 | | |
58 | | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { |
59 | | // For the __shiftright128 intrinsic, the shift value is always |
60 | | // modulo 64. |
61 | | // In the current implementation of the double-precision version |
62 | | // of Ryu, the shift value is always < 64. |
63 | | // (The shift value is in the range [49, 58].) |
64 | | // Check this here in case a future change requires larger shift |
65 | | // values. In this case this function needs to be adjusted. |
66 | | _LIBCPP_ASSERT(__dist < 64, ""); |
67 | | return __shiftright128(__lo, __hi, static_cast<unsigned char>(__dist)); |
68 | | } |
69 | | |
70 | | // ^^^ intrinsics available ^^^ / vvv __int128 available vvv |
71 | | #elif defined(__SIZEOF_INT128__) && ( \ |
72 | | (defined(__clang__) && !defined(_MSC_VER)) || \ |
73 | | (defined(__GNUC__) && !defined(__clang__) && !defined(__CUDACC__))) |
74 | | #define _LIBCPP_INTRINSIC128 1 |
75 | | // We have __uint128 support in clang or gcc |
76 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { |
77 | 0 | auto __temp = __a * (unsigned __int128)__b; |
78 | 0 | *__productHi = __temp >> 64; |
79 | 0 | return static_cast<uint64_t>(__temp); |
80 | 0 | } |
81 | | |
82 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { |
83 | | // In the current implementation of the double-precision version |
84 | | // of Ryu, the shift value is always < 64. |
85 | | // (The shift value is in the range [49, 58].) |
86 | | // Check this here in case a future change requires larger shift |
87 | | // values. In this case this function needs to be adjusted. |
88 | 0 | _LIBCPP_ASSERT(__dist < 64, ""); |
89 | 0 | auto __temp = __lo | ((unsigned __int128)__hi << 64); |
90 | | // For x64 128-bit shfits using the `shrd` instruction and two 64-bit |
91 | | // registers, the shift value is modulo 64. Thus the `& 63` is free. |
92 | 0 | return static_cast<uint64_t>(__temp >> (__dist & 63)); |
93 | 0 | } |
94 | | #else // ^^^ __int128 available ^^^ / vvv intrinsics unavailable vvv |
95 | | |
96 | | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_ALWAYS_INLINE uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { |
97 | | // TRANSITION, VSO-634761 |
98 | | // The casts here help MSVC to avoid calls to the __allmul library function. |
99 | | const uint32_t __aLo = static_cast<uint32_t>(__a); |
100 | | const uint32_t __aHi = static_cast<uint32_t>(__a >> 32); |
101 | | const uint32_t __bLo = static_cast<uint32_t>(__b); |
102 | | const uint32_t __bHi = static_cast<uint32_t>(__b >> 32); |
103 | | |
104 | | const uint64_t __b00 = static_cast<uint64_t>(__aLo) * __bLo; |
105 | | const uint64_t __b01 = static_cast<uint64_t>(__aLo) * __bHi; |
106 | | const uint64_t __b10 = static_cast<uint64_t>(__aHi) * __bLo; |
107 | | const uint64_t __b11 = static_cast<uint64_t>(__aHi) * __bHi; |
108 | | |
109 | | const uint32_t __b00Lo = static_cast<uint32_t>(__b00); |
110 | | const uint32_t __b00Hi = static_cast<uint32_t>(__b00 >> 32); |
111 | | |
112 | | const uint64_t __mid1 = __b10 + __b00Hi; |
113 | | const uint32_t __mid1Lo = static_cast<uint32_t>(__mid1); |
114 | | const uint32_t __mid1Hi = static_cast<uint32_t>(__mid1 >> 32); |
115 | | |
116 | | const uint64_t __mid2 = __b01 + __mid1Lo; |
117 | | const uint32_t __mid2Lo = static_cast<uint32_t>(__mid2); |
118 | | const uint32_t __mid2Hi = static_cast<uint32_t>(__mid2 >> 32); |
119 | | |
120 | | const uint64_t __pHi = __b11 + __mid1Hi + __mid2Hi; |
121 | | const uint64_t __pLo = (static_cast<uint64_t>(__mid2Lo) << 32) | __b00Lo; |
122 | | |
123 | | *__productHi = __pHi; |
124 | | return __pLo; |
125 | | } |
126 | | |
127 | | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { |
128 | | // We don't need to handle the case __dist >= 64 here (see above). |
129 | | _LIBCPP_ASSERT(__dist < 64, ""); |
130 | | #ifdef _LIBCPP_64_BIT |
131 | | _LIBCPP_ASSERT(__dist > 0, ""); |
132 | | return (__hi << (64 - __dist)) | (__lo >> __dist); |
133 | | #else // ^^^ 64-bit ^^^ / vvv 32-bit vvv |
134 | | // Avoid a 64-bit shift by taking advantage of the range of shift values. |
135 | | _LIBCPP_ASSERT(__dist >= 32, ""); |
136 | | return (__hi << (64 - __dist)) | (static_cast<uint32_t>(__lo >> 32) >> (__dist - 32)); |
137 | | #endif // ^^^ 32-bit ^^^ |
138 | | } |
139 | | |
140 | | #endif // ^^^ intrinsics unavailable ^^^ |
141 | | |
142 | | #ifndef _LIBCPP_64_BIT |
143 | | |
144 | | // Returns the high 64 bits of the 128-bit product of __a and __b. |
145 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __umulh(const uint64_t __a, const uint64_t __b) { |
146 | | // Reuse the __ryu_umul128 implementation. |
147 | | // Optimizers will likely eliminate the instructions used to compute the |
148 | | // low part of the product. |
149 | 0 | uint64_t __hi; |
150 | 0 | (void) __ryu_umul128(__a, __b, &__hi); |
151 | 0 | return __hi; |
152 | 0 | } |
153 | | |
154 | | // On 32-bit platforms, compilers typically generate calls to library |
155 | | // functions for 64-bit divisions, even if the divisor is a constant. |
156 | | // |
157 | | // TRANSITION, LLVM-37932 |
158 | | // |
159 | | // The functions here perform division-by-constant using multiplications |
160 | | // in the same way as 64-bit compilers would do. |
161 | | // |
162 | | // NB: |
163 | | // The multipliers and shift values are the ones generated by clang x64 |
164 | | // for expressions like x/5, x/10, etc. |
165 | | |
166 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) { |
167 | 0 | return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 2; |
168 | 0 | } |
169 | | |
170 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) { |
171 | 0 | return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 3; |
172 | 0 | } |
173 | | |
174 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) { |
175 | 0 | return __umulh(__x >> 2, 0x28F5C28F5C28F5C3u) >> 2; |
176 | 0 | } |
177 | | |
178 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) { |
179 | 0 | return __umulh(__x, 0xABCC77118461CEFDu) >> 26; |
180 | 0 | } |
181 | | |
182 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) { |
183 | 0 | return __umulh(__x >> 9, 0x44B82FA09B5A53u) >> 11; |
184 | 0 | } |
185 | | |
186 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) { |
187 | 0 | // Avoid 64-bit math as much as possible. |
188 | 0 | // Returning static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)) would |
189 | 0 | // perform 32x64-bit multiplication and 64-bit subtraction. |
190 | 0 | // __x and 1000000000 * __div1e9(__x) are guaranteed to differ by |
191 | 0 | // less than 10^9, so their highest 32 bits must be identical, |
192 | 0 | // so we can truncate both sides to uint32_t before subtracting. |
193 | 0 | // We can also simplify static_cast<uint32_t>(1000000000 * __div1e9(__x)). |
194 | 0 | // We can truncate before multiplying instead of after, as multiplying |
195 | 0 | // the highest 32 bits of __div1e9(__x) can't affect the lowest 32 bits. |
196 | 0 | return static_cast<uint32_t>(__x) - 1000000000 * static_cast<uint32_t>(__div1e9(__x)); |
197 | 0 | } |
198 | | |
199 | | #else // ^^^ 32-bit ^^^ / vvv 64-bit vvv |
200 | | |
201 | | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) { |
202 | | return __x / 5; |
203 | | } |
204 | | |
205 | | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) { |
206 | | return __x / 10; |
207 | | } |
208 | | |
209 | | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) { |
210 | | return __x / 100; |
211 | | } |
212 | | |
213 | | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) { |
214 | | return __x / 100000000; |
215 | | } |
216 | | |
217 | | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) { |
218 | | return __x / 1000000000; |
219 | | } |
220 | | |
221 | | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) { |
222 | | return static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)); |
223 | | } |
224 | | |
225 | | #endif // ^^^ 64-bit ^^^ |
226 | | |
227 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint64_t __value) { |
228 | 0 | uint32_t __count = 0; |
229 | 0 | for (;;) { |
230 | 0 | _LIBCPP_ASSERT(__value != 0, ""); |
231 | 0 | const uint64_t __q = __div5(__value); |
232 | 0 | const uint32_t __r = static_cast<uint32_t>(__value) - 5 * static_cast<uint32_t>(__q); |
233 | 0 | if (__r != 0) { |
234 | 0 | break; |
235 | 0 | } |
236 | 0 | __value = __q; |
237 | 0 | ++__count; |
238 | 0 | } |
239 | 0 | return __count; |
240 | 0 | } |
241 | | |
242 | | // Returns true if __value is divisible by 5^__p. |
243 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf5(const uint64_t __value, const uint32_t __p) { |
244 | | // I tried a case distinction on __p, but there was no performance difference. |
245 | 0 | return __pow5Factor(__value) >= __p; |
246 | 0 | } |
247 | | |
248 | | // Returns true if __value is divisible by 2^__p. |
249 | 0 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) { |
250 | 0 | _LIBCPP_ASSERT(__value != 0, ""); |
251 | 0 | _LIBCPP_ASSERT(__p < 64, ""); |
252 | | // __builtin_ctzll doesn't appear to be faster here. |
253 | 0 | return (__value & ((1ull << __p) - 1)) == 0; |
254 | 0 | } |
255 | | |
256 | | _LIBCPP_END_NAMESPACE_STD |
257 | | |
258 | | // clang-format on |
259 | | |
260 | | #endif // _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H |