/home/travis/build/MoarVM/MoarVM/src/strings/ops.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* Encoding types and encoding validity check. */ |
2 | | #define MVM_encoding_type_MIN 1 |
3 | | #define MVM_encoding_type_utf8 1 |
4 | | #define MVM_encoding_type_ascii 2 |
5 | | #define MVM_encoding_type_latin1 3 |
6 | | #define MVM_encoding_type_utf16 4 |
7 | | #define MVM_encoding_type_windows1252 5 |
8 | | #define MVM_encoding_type_utf8_c8 6 |
9 | | #define MVM_encoding_type_windows1251 7 |
10 | | #define MVM_encoding_type_shiftjis 8 |
11 | | #define MVM_encoding_type_MAX 8 |
12 | | #define ENCODING_VALID(enc) \ |
13 | | (((enc) >= MVM_encoding_type_MIN && (enc) <= MVM_encoding_type_MAX) \ |
14 | | || (MVM_exception_throw_adhoc(tc, "invalid encoding type flag: %d", (enc)),1)) |
15 | | |
16 | | /* Character class constants (map to nqp::const::CCLASS_* values). */ |
17 | | #define MVM_CCLASS_ANY 65535 |
18 | | #define MVM_CCLASS_UPPERCASE 1 |
19 | | #define MVM_CCLASS_LOWERCASE 2 |
20 | | #define MVM_CCLASS_ALPHABETIC 4 |
21 | | #define MVM_CCLASS_NUMERIC 8 |
22 | | #define MVM_CCLASS_HEXADECIMAL 16 |
23 | | #define MVM_CCLASS_WHITESPACE 32 |
24 | | #define MVM_CCLASS_PRINTING 64 |
25 | | #define MVM_CCLASS_BLANK 256 |
26 | | #define MVM_CCLASS_CONTROL 512 |
27 | | #define MVM_CCLASS_PUNCTUATION 1024 |
28 | | #define MVM_CCLASS_ALPHANUMERIC 2048 |
29 | | #define MVM_CCLASS_NEWLINE 4096 |
30 | | #define MVM_CCLASS_WORD 8192 |
31 | | |
32 | | /* Checks a string is not null or non-concrete and throws if so. */ |
33 | 0 | MVM_STATIC_INLINE void MVM_string_check_arg(MVMThreadContext *tc, const MVMString *s, const char *operation) { |
34 | 0 | if (!s || !IS_CONCRETE(s)) |
35 | 0 | MVM_exception_throw_adhoc(tc, "%s requires a concrete string, but got %s", |
36 | 0 | operation, s ? "a type object" : "null"); |
37 | 0 | } |
38 | | |
39 | 0 | MVM_STATIC_INLINE MVMuint32 MVM_string_graphs(MVMThreadContext *tc, MVMString *s) { |
40 | 0 | MVM_string_check_arg(tc, s, "chars"); |
41 | 0 | return s->body.num_graphs; |
42 | 0 | } |
43 | 0 | MVM_STATIC_INLINE MVMuint32 MVM_string_graphs_nocheck(MVMThreadContext *tc, MVMString *s) { |
44 | 0 | return s->body.num_graphs; |
45 | 0 | } |
46 | 0 | MVM_STATIC_INLINE MVMuint32 MVM_string_codes(MVMThreadContext *tc, MVMString *s) { |
47 | 0 | MVMGraphemeIter gi; |
48 | 0 | MVMint64 codes = 0; |
49 | 0 | MVM_string_check_arg(tc, s, "codes"); |
50 | 0 | if (MVM_string_graphs_nocheck(tc, s) == 0) |
51 | 0 | return 0; |
52 | 0 | MVM_string_gi_init(tc, &gi, s); |
53 | 0 |
|
54 | 0 | while(MVM_string_gi_has_more(tc, &gi)) { |
55 | 0 | MVMGrapheme32 g = MVM_string_gi_get_grapheme(tc, &gi); |
56 | 0 | codes += g < 0 ? |
57 | 0 | MVM_nfg_get_synthetic_info(tc, g)->num_codes |
58 | 0 | : 1; |
59 | 0 | } |
60 | 0 | return codes; |
61 | 0 | } |
62 | 0 | MVM_STATIC_INLINE int MVM_string_buf32_can_fit_into_8bit(MVMGrapheme32 *active_blob, MVMStringIndex blob_len) { |
63 | 0 | MVMStringIndex i; |
64 | 0 | MVMGrapheme32 val = 0; |
65 | 0 | MVM_VECTORIZE_LOOP |
66 | 0 | for (i = 0; i < blob_len; i++) { |
67 | 0 | /* This could be written val |= ..., but GCC 7 doesn't recognize the |
68 | 0 | * operation as ossociative unless we use a temp variable (clang has no issue). */ |
69 | 0 | MVMGrapheme32 val2 = ((active_blob[i] & 0xffffff80) + 0x80) & (0xffffff80-1); |
70 | 0 | val |= val2; |
71 | 0 | } |
72 | 0 | return val ? 0 : 1; |
73 | 0 | } |
74 | | MVMGrapheme32 MVM_string_get_grapheme_at_nocheck(MVMThreadContext *tc, MVMString *a, MVMint64 index); |
75 | | MVMint64 MVM_string_equal(MVMThreadContext *tc, MVMString *a, MVMString *b); |
76 | | MVMint64 MVM_string_index(MVMThreadContext *tc, MVMString *haystack, MVMString *needle, MVMint64 start); |
77 | | MVMint64 MVM_string_index_ignore_case(MVMThreadContext *tc, MVMString *haystack, MVMString *needle, MVMint64 start); |
78 | | MVMint64 MVM_string_index_ignore_mark(MVMThreadContext *tc, MVMString *Haystack, MVMString *needle, MVMint64 start); |
79 | | MVMint64 MVM_string_index_ignore_case_ignore_mark(MVMThreadContext *tc, MVMString *haystack, MVMString *needle, MVMint64 start); |
80 | | MVMint64 MVM_string_index_from_end(MVMThreadContext *tc, MVMString *haystack, MVMString *needle, MVMint64 start); |
81 | | MVMString * MVM_string_concatenate(MVMThreadContext *tc, MVMString *a, MVMString *b); |
82 | | MVMString * MVM_string_repeat(MVMThreadContext *tc, MVMString *a, MVMint64 count); |
83 | | MVMString * MVM_string_substring(MVMThreadContext *tc, MVMString *a, MVMint64 start, MVMint64 length); |
84 | | MVMString * MVM_string_replace(MVMThreadContext *tc, MVMString *a, MVMint64 start, MVMint64 length, MVMString *replacement); |
85 | | void MVM_string_say(MVMThreadContext *tc, MVMString *a); |
86 | | void MVM_string_print(MVMThreadContext *tc, MVMString *a); |
87 | | MVMint64 MVM_string_equal_at(MVMThreadContext *tc, MVMString *a, MVMString *b, MVMint64 offset); |
88 | | MVMint64 MVM_string_equal_at_ignore_case(MVMThreadContext *tc, MVMString *a, MVMString *b, MVMint64 offset); |
89 | | MVMint64 MVM_string_equal_at_ignore_mark(MVMThreadContext *tc, MVMString *Haystack, MVMString *needle, MVMint64 H_offset); |
90 | | MVMint64 MVM_string_equal_at_ignore_case_ignore_mark(MVMThreadContext *tc, MVMString *a, MVMString *b, MVMint64 offset); |
91 | | MVMGrapheme32 MVM_string_ord_basechar_at(MVMThreadContext *tc, MVMString *s, MVMint64 offset); |
92 | | MVMGrapheme32 MVM_string_ord_at(MVMThreadContext *tc, MVMString *s, MVMint64 offset); |
93 | | MVMint64 MVM_string_have_at(MVMThreadContext *tc, MVMString *a, MVMint64 starta, MVMint64 length, MVMString *b, MVMint64 startb); |
94 | | MVMint64 MVM_string_get_grapheme_at(MVMThreadContext *tc, MVMString *a, MVMint64 index); |
95 | | MVMint64 MVM_string_index_of_grapheme(MVMThreadContext *tc, MVMString *a, MVMGrapheme32 codepoint); |
96 | | MVMString * MVM_string_uc(MVMThreadContext *tc, MVMString *s); |
97 | | MVMString * MVM_string_lc(MVMThreadContext *tc, MVMString *s); |
98 | | MVMString * MVM_string_tc(MVMThreadContext *tc, MVMString *s); |
99 | | MVMString * MVM_string_fc(MVMThreadContext *tc, MVMString *s); |
100 | | MVMString * MVM_string_decode(MVMThreadContext *tc, const MVMObject *type_object, char *Cbuf, MVMint64 byte_length, MVMint64 encoding_flag); |
101 | | char * MVM_string_encode(MVMThreadContext *tc, MVMString *s, MVMint64 start, MVMint64 length, MVMuint64 *output_size, MVMint64 encoding_flag, MVMString *replacement, MVMint32 translate_newlines); |
102 | | MVMObject * MVM_string_encode_to_buf(MVMThreadContext *tc, MVMString *s, MVMString *enc_name, MVMObject *buf, MVMString *replacement); |
103 | | MVMObject * MVM_string_encode_to_buf_config(MVMThreadContext *tc, MVMString *s, MVMString *enc_name, MVMObject *buf, MVMString *replacement, MVMint64 bitmap); |
104 | | MVMString * MVM_string_decode_from_buf(MVMThreadContext *tc, MVMObject *buf, MVMString *enc_name); |
105 | | MVMString * MVM_string_decode_from_buf_config(MVMThreadContext *tc, MVMObject *buf, |
106 | | MVMString *enc_name, MVMString *replacement, MVMint64 bitmap); |
107 | | MVMObject * MVM_string_split(MVMThreadContext *tc, MVMString *separator, MVMString *input); |
108 | | MVMString * MVM_string_join(MVMThreadContext *tc, MVMString *separator, MVMObject *input); |
109 | | MVMint64 MVM_string_char_at_in_string(MVMThreadContext *tc, MVMString *a, MVMint64 offset, MVMString *b); |
110 | | MVMint64 MVM_string_offset_has_unicode_property_value(MVMThreadContext *tc, MVMString *s, MVMint64 offset, MVMint64 property_code, MVMint64 property_value_code); |
111 | | MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code, MVMint64 property_value_code); |
112 | | MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code); |
113 | | const char * MVM_unicode_codepoint_get_property_cstr(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code); |
114 | | MVMint64 MVM_unicode_codepoint_get_property_int(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code); |
115 | | MVMint64 MVM_unicode_codepoint_get_property_bool(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code); |
116 | | MVMString * MVM_unicode_get_name(MVMThreadContext *tc, MVMint64 grapheme); |
117 | | MVMString * MVM_string_indexing_optimized(MVMThreadContext *tc, MVMString *s); |
118 | | MVMString * MVM_string_escape(MVMThreadContext *tc, MVMString *s); |
119 | | MVMString * MVM_string_flip(MVMThreadContext *tc, MVMString *s); |
120 | | MVMint64 MVM_string_compare(MVMThreadContext *tc, MVMString *a, MVMString *b); |
121 | | MVMString * MVM_string_bitand(MVMThreadContext *tc, MVMString *a, MVMString *b); |
122 | | MVMString * MVM_string_bitor(MVMThreadContext *tc, MVMString *a, MVMString *b); |
123 | | MVMString * MVM_string_bitxor(MVMThreadContext *tc, MVMString *a, MVMString *b); |
124 | | MVMint64 MVM_string_is_cclass(MVMThreadContext *tc, MVMint64 cclass, MVMString *s, MVMint64 offset); |
125 | | MVMint64 MVM_string_find_cclass(MVMThreadContext *tc, MVMint64 cclass, MVMString *s, MVMint64 offset, MVMint64 count); |
126 | | MVMint64 MVM_string_find_not_cclass(MVMThreadContext *tc, MVMint64 cclass, MVMString *s, MVMint64 offset, MVMint64 count); |
127 | | MVMuint8 MVM_string_find_encoding(MVMThreadContext *tc, MVMString *name); |
128 | | MVMString * MVM_string_chr(MVMThreadContext *tc, MVMint64 cp); |
129 | | MVMint64 MVM_string_grapheme_is_cclass(MVMThreadContext *tc, MVMint64 cclass, MVMGrapheme32 g); |
130 | | void MVM_string_compute_hash_code(MVMThreadContext *tc, MVMString *s); |
131 | | /* If MVM_DEBUG_NFG is 1, calls to NFG_CHECK will re_nfg the given string |
132 | | * and compare num_graphs before and after the normalization. |
133 | | * If it is different debug information will be printed out.*/ |
134 | | #define MVM_DEBUG_NFG 0 |
135 | | /* MVM_DEBUG_NFG_STRICT does as above but does not only rely on num_graphs. It |
136 | | * always checks every grapheme manually. Slower. (requires MVM_DEBUG_NFG)*/ |
137 | | #define MVM_DEBUG_NFG_STRICT 0 |
138 | | #if MVM_DEBUG_NFG |
139 | | void NFG_check (MVMThreadContext *tc, MVMString *orig, char *varname); |
140 | | void NFG_check_concat (MVMThreadContext *tc, MVMString *result, MVMString *a, MVMString *b, char *varname); |
141 | | #define NFG_CHECK(tc, s, varname) NFG_check(tc, s, varname); |
142 | | #define NFG_CHECK_CONCAT(tc, s, a, b, varname) NFG_check_concat(tc, s, a, b, varname); |
143 | | #else |
144 | | #define NFG_CHECK(tc, s, varname) |
145 | | #define NFG_CHECK_CONCAT(tc, s, a, b, varname) |
146 | | #endif |