/home/travis/build/MoarVM/MoarVM/src/platform/memmem32.c

Source (jump to first uncovered line)
/*-
 * Original FreeBSD code Copyright (c) 2005-2014 Rich Felker, et al.
 * Modifications for MVM 32 bit search Copyright (c) 2018 Samantha McVey
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include <string.h>
#include <stdint.h>
/* memmem_two_uint32 uses 64 bit integer reads extensively. Only use it on 64
 * bit processors since it will likely be slower on 32 bit processors. */
#include "moar.h"
#if 8 <= MVM_PTR_SIZE && defined(MVM_CAN_UNALIGNED_INT64)
#define USE_MEMMEM_TWO_UINT32 1
#endif

/* This is a modification of the memmem used in FreeBSD to allow us to quickly
 * search 32 bit strings. This is much more efficient than searching by byte
 * since we can skip every 4 bytes instead of every 1 byte, as well as the fact
 * that in a 32 bit integer, some of the bytes will be empty, making it even
 * less efficient.
 * The only caveats is the table uses a modulus so it can only jump to the next
 * codepoint of the same modulus. */

/* For memmem_one32 we just look for a single 32 bit integer in the haystack,
 * simple. */
static uint32_t * memmem_one_uint32(const uint32_t *h0, const uint32_t *n0, const uint32_t *end_h0) {
  uint32_t *h           = (uint32_t*)h0;
  const uint32_t  n     = *n0;
  const uint32_t *end_h = end_h0 - 1;
  for (; h <= end_h; h++) {
    if (*h == n) return h;
  }
  return NULL;
}
/* For memmem_two_uint32 we work similarly to memmem_one_uint32 except we read
 * the needle of length 2 as a 64 bit integer, progressing along the haystack
 * 32 bits at a time, and comparing the 64 bit needle to the haystack's pointer
 * casted as a 64 bit integer. */
#if defined(USE_MEMMEM_TWO_UINT32)
static uint32_t * memmem_two_uint32(const uint32_t *h0, const uint32_t *n0, const uint32_t *end_h0) {
  uint32_t *h           = (uint32_t*)h0;
  const uint64_t  n     = *((uint64_t*)n0);
  const uint32_t *end_h = end_h0 - 2;
  for (; h <= end_h; h++) {
    if (*((uint64_t*)h) == n) return h;
  }
  return NULL;
}
#endif

#define MAX(a,b) ((a)>(b)?(a):(b))
#define MIN(a,b) ((a)<(b)?(a):(b))

#define BITOP(a,b,op) \
 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))

/* Original FreeBSD comment:
 * Two Way string search algorithm, with a bad shift table applied to the last
 * byte of the window. A bit array marks which entries in the shift table are
 * initialized to avoid fully initializing a 1kb/2kb table.
 *
 * Reference: CROCHEMORE M., PERRIN D., 1991, Two-way string-matching,
 * Journal of the ACM 38(3):651-675
 */

/* The modulus number is the length of the shift table. We use this to quickly
 * lookup a codepoint and see how much we can shift forward. Caveat: if two numbers
 * have the same modulus we can only shift as much forward as much as a codepoint
 * with that modulus. */
#define MODNUMBER 256
#define MOD_OP(a) ((a) % MODNUMBER)
static char *twoway_memmem_uint32(const uint32_t *h, const uint32_t *z, const uint32_t *n, size_t l)
{
  size_t i, ip, jp, k, p, ms, p0, mem, mem0;
  size_t byteset[32 / sizeof(size_t)] = { 0 };
  uint32_t shift[MODNUMBER];

  /* Computing length of needle and fill shift table */
  for (i=0; i<l; i++) {
    BITOP(byteset, MOD_OP(n[i]), |=);
    shift[MOD_OP(n[i])] = i+1;
  }

  /* Compute maximal suffix */
  ip = -1; jp = 0; k = p = 1;
  while (jp+k<l) {
    if (n[ip+k] == n[jp+k]) {
      if (k == p) {
        jp += p;
        k = 1;
      } else k++;
    } else if (n[ip+k] > n[jp+k]) {
      jp += k;
      k = 1;
      p = jp - ip;
    } else {
      ip = jp++;
      k = p = 1;
    }
  }
  ms = ip;
  p0 = p;

  /* And with the opposite comparison */
  ip = -1; jp = 0; k = p = 1;
  while (jp+k<l) {
    if (n[ip+k] == n[jp+k]) {
      if (k == p) {
        jp += p;
        k = 1;
      } else k++;
    } else if (n[ip+k] < n[jp+k]) {
      jp += k;
      k = 1;
      p = jp - ip;
    } else {
      ip = jp++;
      k = p = 1;
    }
  }
  if (ip+1 > ms+1) ms = ip;
  else p = p0;

  /* Periodic needle? */
  if (memcmp(n, n+p, ms+1)) {
    mem0 = 0;
    p = MAX(ms, l-ms-1) + 1;
  } else mem0 = l-p;
  mem = 0;

  /* Search loop */
  for (;;) {
    /* If remainder of haystack is shorter than needle, done */
    if (z-h < l) {
      return 0;
    }

    /* Check last byte first; advance by shift on mismatch */
    if (BITOP(byteset, MOD_OP(h[l-1]), &)) {
      k = l-shift[MOD_OP(h[l-1])];
      if (k) {
        if (mem0 && mem && k < p) k = l-p;
        h += k;
        mem = 0;
        continue;
      }
    } else {
      h += l;
      mem = 0;
      continue;
    }

    /* Compare right half */
    for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
    if (k < l) {
      h += k-ms;
      mem = 0;
      continue;
    }
    /* Compare left half */
    for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
    if (k <= mem) {
      return (char *)h;
    }
    h += p;
    mem = mem0;
  }
}
/* Finds the memory location of the needle in the haystack. Arguments are the
 * memory location of the start of the Haystack, the memory location of the start
 * of the needle and the needle length as well as the Haystack length.
 * H_len and n_len are measured the size of a uint32_t. */
void * memmem_uint32(const void *h0, size_t H_len, const void *n0, size_t n_len)
{
  const uint32_t *h = (uint32_t*)h0,
                 *n = (uint32_t*)n0;

  /* The empty string can be found at the start of any string. */
  if (!n_len) return (void *)h;

  /* Return immediately when needle is longer than haystack. */
  if (H_len < n_len)
    return NULL;

#if defined(USE_MEMMEM_TWO_UINT32)
  if (n_len == 1) {
    return memmem_one_uint32(h, n, h+H_len);
  }
  /* Otherwise do a search for the first two uint32_t integers at the start
   * of the needle. */
  else {
    h = memmem_two_uint32(h, n, h+H_len);
  }
  /* If nothing found or if we have a needle of length 2, we already have
   * our result. */
  if (!h || n_len == 2)
    return (void *)h;
#else
  /* With needle length 1 use fast search for only one uint32_t. */
  h = memmem_one_uint32(h, n, h+H_len);
  if (!h || n_len == 1)
    return (void *)h;
#endif
  /* Since our Haystack (may) have been moved forward to the first match
   * of the start of the needle, also reduce the Haystack's length to match. */
  H_len -= h - (uint32_t*)h0;
  /* No more work to do if the needle is longer than where we are now. */
  if (H_len < n_len)
    return NULL;

  return twoway_memmem_uint32(h, h+H_len, n, n_len);
}

Coverage Report

Created: 2018-07-03 15:31

Line	Count	Source (jump to first uncovered line)
1		/*-
2		* Original FreeBSD code Copyright (c) 2005-2014 Rich Felker, et al.
3		* Modifications for MVM 32 bit search Copyright (c) 2018 Samantha McVey
4		*
5		* Permission is hereby granted, free of charge, to any person obtaining
6		* a copy of this software and associated documentation files (the
7		* "Software"), to deal in the Software without restriction, including
8		* without limitation the rights to use, copy, modify, merge, publish,
9		* distribute, sublicense, and/or sell copies of the Software, and to
10		* permit persons to whom the Software is furnished to do so, subject to
11		* the following conditions:
12		*
13		* The above copyright notice and this permission notice shall be
14		* included in all copies or substantial portions of the Software.
15		*
16		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17		* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18		* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19		* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20		* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21		* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22		* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23		*/
24
25		#include <string.h>
26		#include <stdint.h>
27		/* memmem_two_uint32 uses 64 bit integer reads extensively. Only use it on 64
28		* bit processors since it will likely be slower on 32 bit processors. */
29		#include "moar.h"
30		#if 8 <= MVM_PTR_SIZE && defined(MVM_CAN_UNALIGNED_INT64)
31		#define USE_MEMMEM_TWO_UINT32 1
32		#endif
33
34		/* This is a modification of the memmem used in FreeBSD to allow us to quickly
35		* search 32 bit strings. This is much more efficient than searching by byte
36		* since we can skip every 4 bytes instead of every 1 byte, as well as the fact
37		* that in a 32 bit integer, some of the bytes will be empty, making it even
38		* less efficient.
39		* The only caveats is the table uses a modulus so it can only jump to the next
40		* codepoint of the same modulus. */
41
42		/* For memmem_one32 we just look for a single 32 bit integer in the haystack,
43		* simple. */
44	182k	static uint32_t * memmem_one_uint32(const uint32_t h0, const uint32_t n0, const uint32_t *end_h0) {
45	182k	uint32_t h = (uint32_t)h0;
46	182k	const uint32_t n = *n0;
47	182k	const uint32_t *end_h = end_h0 - 1;
48	1.41M	for (; h <= end_h; h++) {
49	1.36M	if (*h == n) return h;
50	1.36M	}
51	52.4k	return NULL;
52	182k	}
53		/* For memmem_two_uint32 we work similarly to memmem_one_uint32 except we read
54		* the needle of length 2 as a 64 bit integer, progressing along the haystack
55		* 32 bits at a time, and comparing the 64 bit needle to the haystack's pointer
56		* casted as a 64 bit integer. */
57		#if defined(USE_MEMMEM_TWO_UINT32)
58	1.06k	static uint32_t * memmem_two_uint32(const uint32_t h0, const uint32_t n0, const uint32_t *end_h0) {
59	1.06k	uint32_t h = (uint32_t)h0;
60	1.06k	const uint64_t n = ((uint64_t)n0);
61	1.06k	const uint32_t *end_h = end_h0 - 2;
62	14.1k	for (; h <= end_h; h++) {
63	13.1k	if (((uint64_t)h) == n) return h;
64	13.1k	}
65	998	return NULL;
66	1.06k	}
67		#endif
68
69	126	#define MAX(a,b) ((a)>(b)?(a):(b))
70		#define MIN(a,b) ((a)<(b)?(a):(b))
71
72		#define BITOP(a,b,op) \
73	20.6k	((a)[(size_t)(b)/(8sizeof (a))] op (size_t)1<<((size_t)(b)%(8sizeof (a))))
74
75		/* Original FreeBSD comment:
76		* Two Way string search algorithm, with a bad shift table applied to the last
77		* byte of the window. A bit array marks which entries in the shift table are
78		* initialized to avoid fully initializing a 1kb/2kb table.
79		*
80		* Reference: CROCHEMORE M., PERRIN D., 1991, Two-way string-matching,
81		* Journal of the ACM 38(3):651-675
82		*/
83
84		/* The modulus number is the length of the shift table. We use this to quickly
85		* lookup a codepoint and see how much we can shift forward. Caveat: if two numbers
86		* have the same modulus we can only shift as much forward as much as a codepoint
87		* with that modulus. */
88	20.6k	#define MODNUMBER 256
89	20.6k	#define MOD_OP(a) ((a) % MODNUMBER)
90		static char twoway_memmem_uint32(const uint32_t h, const uint32_t z, const uint32_t n, size_t l)
91	65	{
92	65	size_t i, ip, jp, k, p, ms, p0, mem, mem0;
93	65	size_t byteset[32 / sizeof(size_t)] = { 0 };
94	65	uint32_t shift[MODNUMBER];
95	65
96	65	/* Computing length of needle and fill shift table */
97	657	for (i=0; i<l; i++) {
98	592	BITOP(byteset, MOD_OP(n[i]), \|=);
99	592	shift[MOD_OP(n[i])] = i+1;
100	592	}
101	65
102	65	/* Compute maximal suffix */
103	65	ip = -1; jp = 0; k = p = 1;
104	592	while (jp+k<l) {
105	527	if (n[ip+k] == n[jp+k]) {
106	2	if (k == p) {
107	1	jp += p;
108	1	k = 1;
109	1	} else k++;
110	525	} else if (n[ip+k] > n[jp+k]) {
111	293	jp += k;
112	293	k = 1;
113	293	p = jp - ip;
114	232	} else {
115	232	ip = jp++;
116	232	k = p = 1;
117	232	}
118	527	}
119	65	ms = ip;
120	65	p0 = p;
121	65
122	65	/* And with the opposite comparison */
123	65	ip = -1; jp = 0; k = p = 1;
124	593	while (jp+k<l) {
125	528	if (n[ip+k] == n[jp+k]) {
126	16	if (k == p) {
127	7	jp += p;
128	7	k = 1;
129	9	} else k++;
130	512	} else if (n[ip+k] < n[jp+k]) {
131	483	jp += k;
132	483	k = 1;
133	483	p = jp - ip;
134	29	} else {
135	29	ip = jp++;
136	29	k = p = 1;
137	29	}
138	528	}
139	65	if (ip+1 > ms+1) ms = ip;
140	65	else p = p0;
141	65
142	65	/* Periodic needle? */
143	65	if (memcmp(n, n+p, ms+1)) {
144	65	mem0 = 0;
145	65	p = MAX(ms, l-ms-1) + 1;
146	0	} else mem0 = l-p;
147	65	mem = 0;
148	65
149	65	/* Search loop */
150	20.0k	for (;;) {
151	20.0k	/* If remainder of haystack is shorter than needle, done */
152	20.0k	if (z-h < l) {
153	5	return 0;
154	5	}
155	20.0k
156	20.0k	/* Check last byte first; advance by shift on mismatch */
157	20.0k	if (BITOP(byteset, MOD_OP(h[l-1]), &)) {
158	20.0k	k = l-shift[MOD_OP(h[l-1])];
159	20.0k	if (k) {
160	20.0k	if (mem0 && mem && k < p) k = l-p;
161	20.0k	h += k;
162	20.0k	mem = 0;
163	20.0k	continue;
164	20.0k	}
165	10	} else {
166	10	h += l;
167	10	mem = 0;
168	10	continue;
169	10	}
170	20.0k
171	20.0k	/* Compare right half */
172	206	for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
173	61	if (k < l) {
174	1	h += k-ms;
175	1	mem = 0;
176	1	continue;
177	1	}
178	61	/* Compare left half */
179	472	for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
180	60	if (k <= mem) {
181	60	return (char *)h;
182	60	}
183	0	h += p;
184	0	mem = mem0;
185	0	}
186	65	}
187		/* Finds the memory location of the needle in the haystack. Arguments are the
188		* memory location of the start of the Haystack, the memory location of the start
189		* of the needle and the needle length as well as the Haystack length.
190		* H_len and n_len are measured the size of a uint32_t. */
191		void * memmem_uint32(const void h0, size_t H_len, const void n0, size_t n_len)
192	183k	{
193	183k	const uint32_t h = (uint32_t)h0,
194	183k	n = (uint32_t)n0;
195	183k
196	183k	/* The empty string can be found at the start of any string. */
197	183k	if (!n_len) return (void *)h;
198	183k
199	183k	/* Return immediately when needle is longer than haystack. */
200	183k	if (H_len < n_len)
201	93	return NULL;
202	183k
203	183k	#if defined(USE_MEMMEM_TWO_UINT32)
204	183k	if (n_len == 1) {
205	182k	return memmem_one_uint32(h, n, h+H_len);
206	182k	}
207	183k	/* Otherwise do a search for the first two uint32_t integers at the start
208	183k	* of the needle. */
209	1.06k	else {
210	1.06k	h = memmem_two_uint32(h, n, h+H_len);
211	1.06k	}
212	183k	/* If nothing found or if we have a needle of length 2, we already have
213	183k	* our result. */
214	1.06k	if (!h \|\| n_len == 2)
215	1.00k	return (void *)h;
216	1.06k	#else
217		/* With needle length 1 use fast search for only one uint32_t. */
218		h = memmem_one_uint32(h, n, h+H_len);
219		if (!h \|\| n_len == 1)
220		return (void *)h;
221		#endif
222	1.06k	/* Since our Haystack (may) have been moved forward to the first match
223	1.06k	* of the start of the needle, also reduce the Haystack's length to match. */
224	65	H_len -= h - (uint32_t*)h0;
225	65	/* No more work to do if the needle is longer than where we are now. */
226	65	if (H_len < n_len)
227	0	return NULL;
228	65
229	65	return twoway_memmem_uint32(h, h+H_len, n, n_len);
230	65	}