/home/travis/build/MoarVM/MoarVM/src/strings/nfg.h
Line | Count | Source |
1 | | /* State kept around for implementing Normal Form Grapheme. The design is such |
2 | | * that we can always do lookups without needing to acquire a lock. When we |
3 | | * do additions of new synthetics, we must acquire the lock before doing so, |
4 | | * and be sure to validate nothing changed. We also must do sufficient copying |
5 | | * to ensure that we never break another thread doing a read. Memory to be |
6 | | * freed is thus done at a global safe point, which means we never have one |
7 | | * thread reading memory freed by another. */ |
8 | | struct MVMNFGState { |
9 | | /* Table of information about synthetic graphemes. Given some (negative) |
10 | | * synthetic S, we look up in this table with (-S - 1). */ |
11 | | MVMNFGSynthetic *synthetics; |
12 | | |
13 | | /* Trie used to do lookups by codepoints (already in NFC) to an (NFG) |
14 | | * grapheme. */ |
15 | | MVMNFGTrieNode *grapheme_lookup; |
16 | | |
17 | | /* Mutex used when we wish to do updates to the grapheme table. */ |
18 | | uv_mutex_t update_mutex; |
19 | | |
20 | | /* Number of synthetics we have. */ |
21 | | MVMint32 num_synthetics; |
22 | | |
23 | | /* Cached CRLF grapheme index, since we need it so often. */ |
24 | | MVMGrapheme32 crlf_grapheme; |
25 | | }; |
26 | | |
27 | | /* State held about a synthetic. */ |
28 | | struct MVMNFGSynthetic { |
29 | | /* The base (non-combining) grapheme. */ |
30 | | /* The index of the base (non-combining) grapheme |
31 | | * set to -1 if it does not exist */ |
32 | | MVMint32 base_index; |
33 | | |
34 | | /* The number of codepoints we have. */ |
35 | | MVMint32 num_codes; |
36 | | |
37 | | /* Array of codepoints. */ |
38 | | MVMCodepoint *codes; |
39 | | |
40 | | /* Cached case transforms, NULL if not calculated. */ |
41 | | MVMGrapheme32 *case_uc; |
42 | | MVMGrapheme32 *case_lc; |
43 | | MVMGrapheme32 *case_tc; |
44 | | MVMGrapheme32 *case_fc; |
45 | | |
46 | | /* Grapheme counts of cached case transforms. */ |
47 | | MVMint32 case_uc_graphs; |
48 | | MVMint32 case_lc_graphs; |
49 | | MVMint32 case_tc_graphs; |
50 | | MVMint32 case_fc_graphs; |
51 | | |
52 | | /* Is this a UTF-8 C-8 synthetic? */ |
53 | | MVMint32 is_utf8_c8; |
54 | | }; |
55 | | |
56 | | /* A node in the NFG trie. */ |
57 | | struct MVMNFGTrieNode { |
58 | | /* Set of entries for further traversal, sorted ascending on codepoint |
59 | | * so we can find an entry using binary search. */ |
60 | | MVMNFGTrieNodeEntry *next_codes; |
61 | | |
62 | | /* Number of entries in next_cps. */ |
63 | | MVMint32 num_entries; |
64 | | |
65 | | /* Non-zero if we reach a result at this node (and will always be negative |
66 | | * since it's an NFG synthetic). */ |
67 | | MVMGrapheme32 graph; |
68 | | }; |
69 | | |
70 | | /* An entry in the list of next possible codepoints in the NFG trie. */ |
71 | | struct MVMNFGTrieNodeEntry { |
72 | | /* The codepoint. */ |
73 | | MVMCodepoint code; |
74 | | |
75 | | /* Trie node to traverse to if we find this node. */ |
76 | | MVMNFGTrieNode *node; |
77 | | }; |
78 | | |
79 | | /* The maximum number of codepoints we will allow in a synthetic grapheme. |
80 | | * This is a good bit higher than any real-world use case is going to run |
81 | | * in to. */ |
82 | 373 | #define MVM_GRAPHEME_MAX_CODEPOINTS 1024 |
83 | | |
84 | | /* Functions related to grapheme handling. */ |
85 | | MVMGrapheme32 MVM_nfg_codes_to_grapheme(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes); |
86 | | MVMGrapheme32 MVM_nfg_codes_to_grapheme_utf8_c8(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes); |
87 | | MVMGrapheme32 MVM_nfg_crlf_grapheme(MVMThreadContext *tc); |
88 | | MVMNFGSynthetic * MVM_nfg_get_synthetic_info(MVMThreadContext *tc, MVMGrapheme32 synth); |
89 | | MVMuint32 MVM_nfg_get_case_change(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint32 case_, MVMGrapheme32 **result); |
90 | | MVMint32 MVM_nfg_is_concat_stable(MVMThreadContext *tc, MVMString *a, MVMString *b); |
91 | | |
92 | | /* NFG subsystem initialization and cleanup. */ |
93 | | void MVM_nfg_init(MVMThreadContext *tc); |
94 | | void MVM_nfg_destroy(MVMThreadContext *tc); |