Coverage Report

Created: 2018-07-03 15:31

/home/travis/build/MoarVM/MoarVM/src/math/grisu.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Code in this file is taken from MathGeoLib by JUKKA JYLANKI,
3
 *  licensed under Apache 2.0 license.
4
 *  http://clb.demon.fi/MathGeoLib/nightly/sourcecode.html#license
5
 *  http://www.apache.org/licenses/LICENSE-2.0.html
6
 *  http://clb.demon.fi/MathGeoLib/nightly/docs/grisu3.c_code.html
7
 *
8
 *  It contains minor modifications, to match output expected by Perl 6 along
9
 *  with minor edits to arguments and formatting.
10
 *
11
 *  This file is part of an implementation of the "grisu3" double to string
12
 *  conversion algorithm described in the research paper
13
 *  "Printing Floating-Point Numbers Quickly And Accurately with Integers"
14
 *  by Florian Loitsch, available at
15
 *  http://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf
16
 *
17
 *  Grisu3 is able to handle about 99.5% of numbers, for the remaining 0.5%
18
 *  the code below fallsback to snprintf(). It's possible that using a
19
 *  different fallback (such as Dragon4 algorithm) can improve performance.
20
*/
21
22
#include "grisu.h"
23
#include <stdint.h> // uint64_t etc.
24
#include <math.h>   // ceil
25
#include <stdio.h>  // snprintf
26
27
#ifdef _MSC_VER
28
#pragma warning(disable : 4204) // nonstandard extension used : non-constant aggregate initializer
29
#endif
30
31
116k
#define D64_SIGN         0x8000000000000000ULL
32
90.1k
#define D64_EXP_MASK     0x7FF0000000000000ULL
33
57.8k
#define D64_FRACT_MASK   0x000FFFFFFFFFFFFFULL
34
28.9k
#define D64_IMPLICIT_ONE 0x0010000000000000ULL
35
28.9k
#define D64_EXP_POS      52
36
28.9k
#define D64_EXP_BIAS     1075
37
86.7k
#define DIYFP_FRACT_SIZE 64
38
28.9k
#define D_1_LOG2_10      0.30102999566398114 // 1 / lg(10)
39
28.9k
#define MIN_TARGET_EXP   -60
40
346k
#define MASK32           0xFFFFFFFFULL
41
42
86.8k
#define CAST_U64(d) (*(uint64_t*)&d)
43
#define MIN(x,y) ((x) <= (y) ? (x) : (y))
44
#define MAX(x,y) ((x) >= (y) ? (x) : (y))
45
46
28.9k
#define MIN_CACHED_EXP -348
47
28.9k
#define CACHED_EXP_STEP 8
48
49
typedef struct diy_fp
50
{
51
        uint64_t f;
52
        int e;
53
} diy_fp;
54
55
typedef struct power
56
{
57
        uint64_t fract;
58
        int16_t b_exp, d_exp;
59
} power;
60
61
static const power pow_cache[] =
62
{
63
        { 0xfa8fd5a0081c0288ULL, -1220, -348 },
64
        { 0xbaaee17fa23ebf76ULL, -1193, -340 },
65
        { 0x8b16fb203055ac76ULL, -1166, -332 },
66
        { 0xcf42894a5dce35eaULL, -1140, -324 },
67
        { 0x9a6bb0aa55653b2dULL, -1113, -316 },
68
        { 0xe61acf033d1a45dfULL, -1087, -308 },
69
        { 0xab70fe17c79ac6caULL, -1060, -300 },
70
        { 0xff77b1fcbebcdc4fULL, -1034, -292 },
71
        { 0xbe5691ef416bd60cULL, -1007, -284 },
72
        { 0x8dd01fad907ffc3cULL,  -980, -276 },
73
        { 0xd3515c2831559a83ULL,  -954, -268 },
74
        { 0x9d71ac8fada6c9b5ULL,  -927, -260 },
75
        { 0xea9c227723ee8bcbULL,  -901, -252 },
76
        { 0xaecc49914078536dULL,  -874, -244 },
77
        { 0x823c12795db6ce57ULL,  -847, -236 },
78
        { 0xc21094364dfb5637ULL,  -821, -228 },
79
        { 0x9096ea6f3848984fULL,  -794, -220 },
80
        { 0xd77485cb25823ac7ULL,  -768, -212 },
81
        { 0xa086cfcd97bf97f4ULL,  -741, -204 },
82
        { 0xef340a98172aace5ULL,  -715, -196 },
83
        { 0xb23867fb2a35b28eULL,  -688, -188 },
84
        { 0x84c8d4dfd2c63f3bULL,  -661, -180 },
85
        { 0xc5dd44271ad3cdbaULL,  -635, -172 },
86
        { 0x936b9fcebb25c996ULL,  -608, -164 },
87
        { 0xdbac6c247d62a584ULL,  -582, -156 },
88
        { 0xa3ab66580d5fdaf6ULL,  -555, -148 },
89
        { 0xf3e2f893dec3f126ULL,  -529, -140 },
90
        { 0xb5b5ada8aaff80b8ULL,  -502, -132 },
91
        { 0x87625f056c7c4a8bULL,  -475, -124 },
92
        { 0xc9bcff6034c13053ULL,  -449, -116 },
93
        { 0x964e858c91ba2655ULL,  -422, -108 },
94
        { 0xdff9772470297ebdULL,  -396, -100 },
95
        { 0xa6dfbd9fb8e5b88fULL,  -369,  -92 },
96
        { 0xf8a95fcf88747d94ULL,  -343,  -84 },
97
        { 0xb94470938fa89bcfULL,  -316,  -76 },
98
        { 0x8a08f0f8bf0f156bULL,  -289,  -68 },
99
        { 0xcdb02555653131b6ULL,  -263,  -60 },
100
        { 0x993fe2c6d07b7facULL,  -236,  -52 },
101
        { 0xe45c10c42a2b3b06ULL,  -210,  -44 },
102
        { 0xaa242499697392d3ULL,  -183,  -36 },
103
        { 0xfd87b5f28300ca0eULL,  -157,  -28 },
104
        { 0xbce5086492111aebULL,  -130,  -20 },
105
        { 0x8cbccc096f5088ccULL,  -103,  -12 },
106
        { 0xd1b71758e219652cULL,   -77,   -4 },
107
        { 0x9c40000000000000ULL,   -50,    4 },
108
        { 0xe8d4a51000000000ULL,   -24,   12 },
109
        { 0xad78ebc5ac620000ULL,     3,   20 },
110
        { 0x813f3978f8940984ULL,    30,   28 },
111
        { 0xc097ce7bc90715b3ULL,    56,   36 },
112
        { 0x8f7e32ce7bea5c70ULL,    83,   44 },
113
        { 0xd5d238a4abe98068ULL,   109,   52 },
114
        { 0x9f4f2726179a2245ULL,   136,   60 },
115
        { 0xed63a231d4c4fb27ULL,   162,   68 },
116
        { 0xb0de65388cc8ada8ULL,   189,   76 },
117
        { 0x83c7088e1aab65dbULL,   216,   84 },
118
        { 0xc45d1df942711d9aULL,   242,   92 },
119
        { 0x924d692ca61be758ULL,   269,  100 },
120
        { 0xda01ee641a708deaULL,   295,  108 },
121
        { 0xa26da3999aef774aULL,   322,  116 },
122
        { 0xf209787bb47d6b85ULL,   348,  124 },
123
        { 0xb454e4a179dd1877ULL,   375,  132 },
124
        { 0x865b86925b9bc5c2ULL,   402,  140 },
125
        { 0xc83553c5c8965d3dULL,   428,  148 },
126
        { 0x952ab45cfa97a0b3ULL,   455,  156 },
127
        { 0xde469fbd99a05fe3ULL,   481,  164 },
128
        { 0xa59bc234db398c25ULL,   508,  172 },
129
        { 0xf6c69a72a3989f5cULL,   534,  180 },
130
        { 0xb7dcbf5354e9beceULL,   561,  188 },
131
        { 0x88fcf317f22241e2ULL,   588,  196 },
132
        { 0xcc20ce9bd35c78a5ULL,   614,  204 },
133
        { 0x98165af37b2153dfULL,   641,  212 },
134
        { 0xe2a0b5dc971f303aULL,   667,  220 },
135
        { 0xa8d9d1535ce3b396ULL,   694,  228 },
136
        { 0xfb9b7cd9a4a7443cULL,   720,  236 },
137
        { 0xbb764c4ca7a44410ULL,   747,  244 },
138
        { 0x8bab8eefb6409c1aULL,   774,  252 },
139
        { 0xd01fef10a657842cULL,   800,  260 },
140
        { 0x9b10a4e5e9913129ULL,   827,  268 },
141
        { 0xe7109bfba19c0c9dULL,   853,  276 },
142
        { 0xac2820d9623bf429ULL,   880,  284 },
143
        { 0x80444b5e7aa7cf85ULL,   907,  292 },
144
        { 0xbf21e44003acdd2dULL,   933,  300 },
145
        { 0x8e679c2f5e44ff8fULL,   960,  308 },
146
        { 0xd433179d9c8cb841ULL,   986,  316 },
147
        { 0x9e19db92b4e31ba9ULL,  1013,  324 },
148
        { 0xeb96bf6ebadf77d9ULL,  1039,  332 },
149
        { 0xaf87023b9bf0ee6bULL,  1066,  340 }
150
};
151
152
static int cached_pow(int exp, diy_fp *p)
153
28.9k
{
154
28.9k
        int k = (int)ceil((exp+DIYFP_FRACT_SIZE-1) * D_1_LOG2_10);
155
28.9k
        int i = (k-MIN_CACHED_EXP-1) / CACHED_EXP_STEP + 1;
156
28.9k
        p->f = pow_cache[i].fract;
157
28.9k
        p->e = pow_cache[i].b_exp;
158
28.9k
        return pow_cache[i].d_exp;
159
28.9k
}
160
161
static diy_fp minus(diy_fp x, diy_fp y)
162
57.8k
{
163
57.8k
        diy_fp d; d.f = x.f - y.f; d.e = x.e;
164
57.8k
        return d;
165
57.8k
}
166
167
static diy_fp multiply(diy_fp x, diy_fp y)
168
86.7k
{
169
86.7k
        uint64_t a, b, c, d, ac, bc, ad, bd, tmp;
170
86.7k
        diy_fp r;
171
86.7k
        a = x.f >> 32; b = x.f & MASK32;
172
86.7k
        c = y.f >> 32; d = y.f & MASK32;
173
86.7k
        ac = a*c; bc = b*c;
174
86.7k
        ad = a*d; bd = b*d;
175
86.7k
        tmp = (bd >> 32) + (ad & MASK32) + (bc & MASK32);
176
86.7k
        tmp += 1U << 31; // round
177
86.7k
        r.f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
178
86.7k
        r.e = x.e + y.e + 64;
179
86.7k
        return r;
180
86.7k
}
181
182
static diy_fp normalize_diy_fp(diy_fp n)
183
57.8k
{
184
115k
        while(!(n.f & 0xFFC0000000000000ULL)) { n.f <<= 10; n.e -= 10; }
185
86.7k
        while(!(n.f & D64_SIGN)) { n.f <<= 1; --n.e; }
186
57.8k
        return n;
187
57.8k
}
188
189
static diy_fp double2diy_fp(double d)
190
28.9k
{
191
28.9k
        diy_fp fp;
192
28.9k
        uint64_t u64 = CAST_U64(d);
193
28.9k
        if (!(u64 & D64_EXP_MASK)) { fp.f = u64 & D64_FRACT_MASK; fp.e = 1 - D64_EXP_BIAS; }
194
28.9k
        else { fp.f = (u64 & D64_FRACT_MASK) + D64_IMPLICIT_ONE; fp.e = (int)((u64 & D64_EXP_MASK) >> D64_EXP_POS) - D64_EXP_BIAS; }
195
28.9k
        return fp;
196
28.9k
}
197
198
// pow10_cache[i] = 10^(i-1)
199
static const unsigned int pow10_cache[] = { 0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
200
201
static int largest_pow10(uint32_t n, int n_bits, uint32_t *power)
202
28.9k
{
203
28.9k
        int guess = ((n_bits + 1) * 1233 >> 12) + 1/*skip first entry*/;
204
28.9k
        if (n < pow10_cache[guess]) --guess; // We don't have any guarantees that 2^n_bits <= n.
205
28.9k
        *power = pow10_cache[guess];
206
28.9k
        return guess;
207
28.9k
}
208
209
static int round_weed(char *buffer, int len, uint64_t wp_W, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t ulp)
210
28.9k
{
211
28.9k
        uint64_t wp_Wup = wp_W - ulp;
212
28.9k
        uint64_t wp_Wdown = wp_W + ulp;
213
29.0k
        while(rest < wp_Wup && delta - rest >= ten_kappa
214
220
                && (rest + ten_kappa < wp_Wup || wp_Wup - rest >= rest + ten_kappa - wp_Wup))
215
166
        {
216
166
                --buffer[len-1];
217
166
                rest += ten_kappa;
218
166
        }
219
28.9k
        if (rest < wp_Wdown && delta - rest >= ten_kappa
220
54
                && (rest + ten_kappa < wp_Wdown || wp_Wdown - rest > rest + ten_kappa - wp_Wdown))
221
0
                return 0;
222
28.9k
223
28.9k
        return 2*ulp <= rest && rest <= delta - 4*ulp;
224
28.9k
}
225
226
static int digit_gen(diy_fp low, diy_fp w, diy_fp high, char *buffer, int *length, int *kappa)
227
28.9k
{
228
28.9k
        uint64_t unit = 1;
229
28.9k
        diy_fp too_low = { low.f - unit, low.e };
230
28.9k
        diy_fp too_high = { high.f + unit, high.e };
231
28.9k
        diy_fp unsafe_interval = minus(too_high, too_low);
232
28.9k
        diy_fp one = { 1ULL << -w.e, w.e };
233
28.9k
        uint32_t p1 = (uint32_t)(too_high.f >> -one.e);
234
28.9k
        uint64_t p2 = too_high.f & (one.f - 1);
235
28.9k
        uint32_t div;
236
28.9k
        *kappa = largest_pow10(p1, DIYFP_FRACT_SIZE + one.e, &div);
237
28.9k
        *length = 0;
238
28.9k
239
69.1k
        while(*kappa > 0)
240
67.9k
        {
241
67.9k
                uint64_t rest;
242
67.9k
                int digit = p1 / div;
243
67.9k
                buffer[*length] = (char)('0' + digit);
244
67.9k
                ++*length;
245
67.9k
                p1 %= div;
246
67.9k
                --*kappa;
247
67.9k
                rest = ((uint64_t)p1 << -one.e) + p2;
248
67.9k
                if (rest < unsafe_interval.f) return round_weed(buffer, *length, minus(too_high, w).f, unsafe_interval.f, rest, (uint64_t)div << -one.e, unit);
249
40.2k
                div /= 10;
250
40.2k
        }
251
28.9k
252
1.24k
        for(;;)
253
8.20k
        {
254
8.20k
                int digit;
255
8.20k
                p2 *= 10;
256
8.20k
                unit *= 10;
257
8.20k
                unsafe_interval.f *= 10;
258
8.20k
                // Integer division by one.
259
8.20k
                digit = (int)(p2 >> -one.e);
260
8.20k
                buffer[*length] = (char)('0' + digit);
261
8.20k
                ++*length;
262
8.20k
                p2 &= one.f - 1;  // Modulo by one.
263
8.20k
                --*kappa;
264
8.20k
                if (p2 < unsafe_interval.f) return round_weed(buffer, *length, minus(too_high, w).f * unit, unsafe_interval.f, p2, one.f, unit);
265
8.20k
        }
266
1.24k
}
267
268
static int grisu3(double v, char *buffer, int *length, int *d_exp)
269
28.9k
{
270
28.9k
        int mk, kappa, success;
271
28.9k
        diy_fp dfp = double2diy_fp(v);
272
28.9k
        diy_fp w = normalize_diy_fp(dfp);
273
28.9k
274
28.9k
        // normalize boundaries
275
28.9k
        diy_fp t = { (dfp.f << 1) + 1, dfp.e - 1 };
276
28.9k
        diy_fp b_plus = normalize_diy_fp(t);
277
28.9k
        diy_fp b_minus;
278
28.9k
        diy_fp c_mk; // Cached power of ten: 10^-k
279
28.9k
        uint64_t u64 = CAST_U64(v);
280
28.9k
        if (!(u64 & D64_FRACT_MASK) && (u64 & D64_EXP_MASK) != 0) { b_minus.f = (dfp.f << 2) - 1; b_minus.e =  dfp.e - 2;} // lower boundary is closer?
281
25.4k
        else { b_minus.f = (dfp.f << 1) - 1; b_minus.e = dfp.e - 1; }
282
28.9k
        b_minus.f = b_minus.f << (b_minus.e - b_plus.e);
283
28.9k
        b_minus.e = b_plus.e;
284
28.9k
285
28.9k
        mk = cached_pow(MIN_TARGET_EXP - DIYFP_FRACT_SIZE - w.e, &c_mk);
286
28.9k
287
28.9k
        w = multiply(w, c_mk);
288
28.9k
        b_minus = multiply(b_minus, c_mk);
289
28.9k
        b_plus  = multiply(b_plus,  c_mk);
290
28.9k
291
28.9k
        success = digit_gen(b_minus, w, b_plus, buffer, length, &kappa);
292
28.9k
        *d_exp = kappa - mk;
293
28.9k
        return success;
294
28.9k
}
295
296
static int i_to_str(int val, char *str)
297
1.56k
{
298
1.56k
        int len, i;
299
1.56k
        char *s;
300
1.56k
        char *begin = str;
301
1.56k
        if (val < 0) {
302
830
            *str++ = '-';
303
830
            if (val > -10)
304
32
                *str++ = '0';
305
830
            val = -val;
306
830
        }
307
1.56k
        else
308
731
            *str++ = '+';
309
1.56k
        s = str;
310
1.56k
311
1.56k
        for(;;)
312
3.13k
        {
313
3.13k
                int ni = val / 10;
314
3.13k
                int digit = val - ni*10;
315
3.13k
                *s++ = (char)('0' + digit);
316
3.13k
                if (ni == 0)
317
1.56k
                        break;
318
1.57k
                val = ni;
319
1.57k
        }
320
1.56k
        *s = '\0';
321
1.56k
        len = (int)(s - str);
322
3.09k
        for(i = 0; i < len/2; ++i)
323
1.52k
        {
324
1.52k
                char ch = str[i];
325
1.52k
                str[i] = str[len-1-i];
326
1.52k
                str[len-1-i] = ch;
327
1.52k
        }
328
1.56k
329
1.56k
        return (int)(s - begin);
330
1.56k
}
331
332
28.9k
int dtoa_grisu3(double v, char *dst, int size) {
333
28.9k
        int d_exp, len, success, i, decimal_pos;
334
28.9k
        uint64_t u64 = CAST_U64(v);
335
28.9k
        char *s2 = dst;
336
28.9k
337
28.9k
        // Prehandle NaNs
338
28.9k
        if ((u64 << 1) > 0xFFE0000000000000ULL) {
339
0
            *s2++ = 'N'; *s2++ = 'a'; *s2++ = 'N'; *s2 = '\0';
340
0
            return (int)(s2 - dst);
341
0
        }
342
28.9k
        // Prehandle negative values.
343
28.9k
        if ((u64 & D64_SIGN) != 0) {
344
908
            *s2++ = '-'; v = -v; u64 ^= D64_SIGN;
345
908
        }
346
28.9k
        // Prehandle zero.
347
28.9k
        if (!u64) {
348
87
            *s2++ = '0'; *s2 = '\0';
349
87
            return (int)(s2 - dst);
350
87
        }
351
28.9k
        // Prehandle infinity.
352
28.9k
        if (u64 == D64_EXP_MASK) {
353
0
            *s2++ = 'I'; *s2++ = 'n'; *s2++ = 'f'; *s2 = '\0';
354
0
            return (int)(s2 - dst);
355
0
        }
356
28.9k
357
28.9k
        success = grisu3(v, s2, &len, &d_exp);
358
28.9k
        // If grisu3 was not able to convert the number to a string, then use old snprintf (suboptimal).
359
28.9k
        if (!success) return snprintf(s2, size, "%.17g", v) + (int)(s2 - dst);
360
28.9k
361
28.8k
        decimal_pos = len + d_exp;
362
28.8k
        if (decimal_pos > 0) {
363
27.9k
            decimal_pos -= len;
364
27.9k
            if (decimal_pos > 0) {
365
2.89k
                if (len + d_exp <= 15) {
366
4.66k
                    while (decimal_pos--) s2[len++] = '0';
367
2.16k
                }
368
731
                else {
369
731
                    if (len > 1) {
370
5.84k
                        for (i = 0; i < len-1; i++)
371
5.17k
                            s2[len-i] = s2[len-i-1];
372
671
                        d_exp += i;
373
671
                        s2[1] = '.';
374
671
                        len++;
375
671
                    }
376
731
                    s2[len++] = 'e';
377
731
                    len += i_to_str(d_exp, s2+len);
378
731
                }
379
2.89k
            }
380
25.0k
            else if (decimal_pos < 0) {
381
2.35k
                for (i = 0; i < -decimal_pos; i++)
382
2.00k
                    s2[len-i] = s2[len-i-1];
383
342
                s2[len + decimal_pos] = '.';
384
342
                len++;
385
342
            }
386
27.9k
        }
387
911
        else if (decimal_pos > -4) {
388
746
            for (i = 0; i < len; i++)
389
665
                s2[len-decimal_pos-i+1] = s2[len-i-1];
390
81
            s2[0] = '0';
391
81
            s2[1] = '.';
392
181
            for (i = 0; i < -decimal_pos; i++)
393
100
                s2[2+i] = '0';
394
81
            len -= decimal_pos-2;
395
81
        }
396
830
        else {
397
830
            if (len > 1) {
398
6.41k
                for (i = 0; i < len-1; i++)
399
5.66k
                    s2[len-i] = s2[len-i-1];
400
752
                d_exp += i;
401
752
                s2[1] = '.';
402
752
                len++;
403
752
            }
404
830
            s2[len++] = 'e';
405
830
            len += i_to_str(d_exp, s2+len);
406
830
        }
407
28.8k
408
28.8k
        s2[len] = '\0'; // grisu3 doesn't null terminate, so ensure termination.
409
28.8k
        return (int)(s2+len-dst);
410
28.9k
}