/home/travis/build/MoarVM/MoarVM/src/platform/memmem32.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*- |
2 | | * Original FreeBSD code Copyright (c) 2005-2014 Rich Felker, et al. |
3 | | * Modifications for MVM 32 bit search Copyright (c) 2018 Samantha McVey |
4 | | * |
5 | | * Permission is hereby granted, free of charge, to any person obtaining |
6 | | * a copy of this software and associated documentation files (the |
7 | | * "Software"), to deal in the Software without restriction, including |
8 | | * without limitation the rights to use, copy, modify, merge, publish, |
9 | | * distribute, sublicense, and/or sell copies of the Software, and to |
10 | | * permit persons to whom the Software is furnished to do so, subject to |
11 | | * the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be |
14 | | * included in all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
17 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
18 | | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
19 | | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
20 | | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
21 | | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
22 | | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
23 | | */ |
24 | | |
25 | | #include <string.h> |
26 | | #include <stdint.h> |
27 | | /* memmem_two_uint32 uses 64 bit integer reads extensively. Only use it on 64 |
28 | | * bit processors since it will likely be slower on 32 bit processors. */ |
29 | | #include "moar.h" |
30 | | #if 8 <= MVM_PTR_SIZE && defined(MVM_CAN_UNALIGNED_INT64) |
31 | | #define USE_MEMMEM_TWO_UINT32 1 |
32 | | #endif |
33 | | |
34 | | /* This is a modification of the memmem used in FreeBSD to allow us to quickly |
35 | | * search 32 bit strings. This is much more efficient than searching by byte |
36 | | * since we can skip every 4 bytes instead of every 1 byte, as well as the fact |
37 | | * that in a 32 bit integer, some of the bytes will be empty, making it even |
38 | | * less efficient. |
39 | | * The only caveats is the table uses a modulus so it can only jump to the next |
40 | | * codepoint of the same modulus. */ |
41 | | |
42 | | /* For memmem_one32 we just look for a single 32 bit integer in the haystack, |
43 | | * simple. */ |
44 | 182k | static uint32_t * memmem_one_uint32(const uint32_t *h0, const uint32_t *n0, const uint32_t *end_h0) { |
45 | 182k | uint32_t *h = (uint32_t*)h0; |
46 | 182k | const uint32_t n = *n0; |
47 | 182k | const uint32_t *end_h = end_h0 - 1; |
48 | 1.41M | for (; h <= end_h; h++) { |
49 | 1.36M | if (*h == n) return h; |
50 | 1.36M | } |
51 | 52.4k | return NULL; |
52 | 182k | } |
53 | | /* For memmem_two_uint32 we work similarly to memmem_one_uint32 except we read |
54 | | * the needle of length 2 as a 64 bit integer, progressing along the haystack |
55 | | * 32 bits at a time, and comparing the 64 bit needle to the haystack's pointer |
56 | | * casted as a 64 bit integer. */ |
57 | | #if defined(USE_MEMMEM_TWO_UINT32) |
58 | 1.06k | static uint32_t * memmem_two_uint32(const uint32_t *h0, const uint32_t *n0, const uint32_t *end_h0) { |
59 | 1.06k | uint32_t *h = (uint32_t*)h0; |
60 | 1.06k | const uint64_t n = *((uint64_t*)n0); |
61 | 1.06k | const uint32_t *end_h = end_h0 - 2; |
62 | 14.1k | for (; h <= end_h; h++) { |
63 | 13.1k | if (*((uint64_t*)h) == n) return h; |
64 | 13.1k | } |
65 | 998 | return NULL; |
66 | 1.06k | } |
67 | | #endif |
68 | | |
69 | 126 | #define MAX(a,b) ((a)>(b)?(a):(b)) |
70 | | #define MIN(a,b) ((a)<(b)?(a):(b)) |
71 | | |
72 | | #define BITOP(a,b,op) \ |
73 | 20.6k | ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a)))) |
74 | | |
75 | | /* Original FreeBSD comment: |
76 | | * Two Way string search algorithm, with a bad shift table applied to the last |
77 | | * byte of the window. A bit array marks which entries in the shift table are |
78 | | * initialized to avoid fully initializing a 1kb/2kb table. |
79 | | * |
80 | | * Reference: CROCHEMORE M., PERRIN D., 1991, Two-way string-matching, |
81 | | * Journal of the ACM 38(3):651-675 |
82 | | */ |
83 | | |
84 | | /* The modulus number is the length of the shift table. We use this to quickly |
85 | | * lookup a codepoint and see how much we can shift forward. Caveat: if two numbers |
86 | | * have the same modulus we can only shift as much forward as much as a codepoint |
87 | | * with that modulus. */ |
88 | 20.6k | #define MODNUMBER 256 |
89 | 20.6k | #define MOD_OP(a) ((a) % MODNUMBER) |
90 | | static char *twoway_memmem_uint32(const uint32_t *h, const uint32_t *z, const uint32_t *n, size_t l) |
91 | 65 | { |
92 | 65 | size_t i, ip, jp, k, p, ms, p0, mem, mem0; |
93 | 65 | size_t byteset[32 / sizeof(size_t)] = { 0 }; |
94 | 65 | uint32_t shift[MODNUMBER]; |
95 | 65 | |
96 | 65 | /* Computing length of needle and fill shift table */ |
97 | 657 | for (i=0; i<l; i++) { |
98 | 592 | BITOP(byteset, MOD_OP(n[i]), |=); |
99 | 592 | shift[MOD_OP(n[i])] = i+1; |
100 | 592 | } |
101 | 65 | |
102 | 65 | /* Compute maximal suffix */ |
103 | 65 | ip = -1; jp = 0; k = p = 1; |
104 | 592 | while (jp+k<l) { |
105 | 527 | if (n[ip+k] == n[jp+k]) { |
106 | 2 | if (k == p) { |
107 | 1 | jp += p; |
108 | 1 | k = 1; |
109 | 1 | } else k++; |
110 | 525 | } else if (n[ip+k] > n[jp+k]) { |
111 | 293 | jp += k; |
112 | 293 | k = 1; |
113 | 293 | p = jp - ip; |
114 | 232 | } else { |
115 | 232 | ip = jp++; |
116 | 232 | k = p = 1; |
117 | 232 | } |
118 | 527 | } |
119 | 65 | ms = ip; |
120 | 65 | p0 = p; |
121 | 65 | |
122 | 65 | /* And with the opposite comparison */ |
123 | 65 | ip = -1; jp = 0; k = p = 1; |
124 | 593 | while (jp+k<l) { |
125 | 528 | if (n[ip+k] == n[jp+k]) { |
126 | 16 | if (k == p) { |
127 | 7 | jp += p; |
128 | 7 | k = 1; |
129 | 9 | } else k++; |
130 | 512 | } else if (n[ip+k] < n[jp+k]) { |
131 | 483 | jp += k; |
132 | 483 | k = 1; |
133 | 483 | p = jp - ip; |
134 | 29 | } else { |
135 | 29 | ip = jp++; |
136 | 29 | k = p = 1; |
137 | 29 | } |
138 | 528 | } |
139 | 65 | if (ip+1 > ms+1) ms = ip; |
140 | 65 | else p = p0; |
141 | 65 | |
142 | 65 | /* Periodic needle? */ |
143 | 65 | if (memcmp(n, n+p, ms+1)) { |
144 | 65 | mem0 = 0; |
145 | 65 | p = MAX(ms, l-ms-1) + 1; |
146 | 0 | } else mem0 = l-p; |
147 | 65 | mem = 0; |
148 | 65 | |
149 | 65 | /* Search loop */ |
150 | 20.0k | for (;;) { |
151 | 20.0k | /* If remainder of haystack is shorter than needle, done */ |
152 | 20.0k | if (z-h < l) { |
153 | 5 | return 0; |
154 | 5 | } |
155 | 20.0k | |
156 | 20.0k | /* Check last byte first; advance by shift on mismatch */ |
157 | 20.0k | if (BITOP(byteset, MOD_OP(h[l-1]), &)) { |
158 | 20.0k | k = l-shift[MOD_OP(h[l-1])]; |
159 | 20.0k | if (k) { |
160 | 20.0k | if (mem0 && mem && k < p) k = l-p; |
161 | 20.0k | h += k; |
162 | 20.0k | mem = 0; |
163 | 20.0k | continue; |
164 | 20.0k | } |
165 | 10 | } else { |
166 | 10 | h += l; |
167 | 10 | mem = 0; |
168 | 10 | continue; |
169 | 10 | } |
170 | 20.0k | |
171 | 20.0k | /* Compare right half */ |
172 | 206 | for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++); |
173 | 61 | if (k < l) { |
174 | 1 | h += k-ms; |
175 | 1 | mem = 0; |
176 | 1 | continue; |
177 | 1 | } |
178 | 61 | /* Compare left half */ |
179 | 472 | for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--); |
180 | 60 | if (k <= mem) { |
181 | 60 | return (char *)h; |
182 | 60 | } |
183 | 0 | h += p; |
184 | 0 | mem = mem0; |
185 | 0 | } |
186 | 65 | } |
187 | | /* Finds the memory location of the needle in the haystack. Arguments are the |
188 | | * memory location of the start of the Haystack, the memory location of the start |
189 | | * of the needle and the needle length as well as the Haystack length. |
190 | | * H_len and n_len are measured the size of a uint32_t. */ |
191 | | void * memmem_uint32(const void *h0, size_t H_len, const void *n0, size_t n_len) |
192 | 183k | { |
193 | 183k | const uint32_t *h = (uint32_t*)h0, |
194 | 183k | *n = (uint32_t*)n0; |
195 | 183k | |
196 | 183k | /* The empty string can be found at the start of any string. */ |
197 | 183k | if (!n_len) return (void *)h; |
198 | 183k | |
199 | 183k | /* Return immediately when needle is longer than haystack. */ |
200 | 183k | if (H_len < n_len) |
201 | 93 | return NULL; |
202 | 183k | |
203 | 183k | #if defined(USE_MEMMEM_TWO_UINT32) |
204 | 183k | if (n_len == 1) { |
205 | 182k | return memmem_one_uint32(h, n, h+H_len); |
206 | 182k | } |
207 | 183k | /* Otherwise do a search for the first two uint32_t integers at the start |
208 | 183k | * of the needle. */ |
209 | 1.06k | else { |
210 | 1.06k | h = memmem_two_uint32(h, n, h+H_len); |
211 | 1.06k | } |
212 | 183k | /* If nothing found or if we have a needle of length 2, we already have |
213 | 183k | * our result. */ |
214 | 1.06k | if (!h || n_len == 2) |
215 | 1.00k | return (void *)h; |
216 | 1.06k | #else |
217 | | /* With needle length 1 use fast search for only one uint32_t. */ |
218 | | h = memmem_one_uint32(h, n, h+H_len); |
219 | | if (!h || n_len == 1) |
220 | | return (void *)h; |
221 | | #endif |
222 | 1.06k | /* Since our Haystack (may) have been moved forward to the first match |
223 | 1.06k | * of the start of the needle, also reduce the Haystack's length to match. */ |
224 | 65 | H_len -= h - (uint32_t*)h0; |
225 | 65 | /* No more work to do if the needle is longer than where we are now. */ |
226 | 65 | if (H_len < n_len) |
227 | 0 | return NULL; |
228 | 65 | |
229 | 65 | return twoway_memmem_uint32(h, h+H_len, n, n_len); |
230 | 65 | } |