/home/travis/build/MoarVM/MoarVM/src/strings/nfg.h
Line | Count | Source |
1 | | /* State kept around for implementing Normal Form Grapheme. The design is such |
2 | | * that we can always do lookups without needing to acquire a lock. When we |
3 | | * do additions of new synthetics, we must acquire the lock before doing so, |
4 | | * and be sure to validate nothing changed. We also must do sufficient copying |
5 | | * to ensure that we never break another thread doing a read. Memory to be |
6 | | * freed is thus done at a global safe point, which means we never have one |
7 | | * thread reading memory freed by another. */ |
8 | | struct MVMNFGState { |
9 | | /* Table of information about synthetic graphemes. Given some (negative) |
10 | | * synthetic S, we look up in this table with (-S - 1). */ |
11 | | MVMNFGSynthetic *synthetics; |
12 | | |
13 | | /* Trie used to do lookups by codepoints (already in NFC) to an (NFG) |
14 | | * grapheme. */ |
15 | | MVMNFGTrieNode *grapheme_lookup; |
16 | | |
17 | | /* Mutex used when we wish to do updates to the grapheme table. */ |
18 | | uv_mutex_t update_mutex; |
19 | | |
20 | | /* Number of synthetics we have. */ |
21 | | MVMint32 num_synthetics; |
22 | | }; |
23 | | |
24 | | /* State held about a synthetic. */ |
25 | | struct MVMNFGSynthetic { |
26 | | /* The base (non-combining) grapheme. */ |
27 | | MVMCodepoint base; |
28 | | |
29 | | /* The number of combiners we have. */ |
30 | | MVMint32 num_combs; |
31 | | |
32 | | /* Array of combiners. */ |
33 | | MVMCodepoint *combs; |
34 | | |
35 | | /* Cached case transforms, NULL if not calculated. */ |
36 | | MVMGrapheme32 *case_uc; |
37 | | MVMGrapheme32 *case_lc; |
38 | | MVMGrapheme32 *case_tc; |
39 | | MVMGrapheme32 *case_fc; |
40 | | |
41 | | /* Grapheme counts of cached case transforms. */ |
42 | | MVMint32 case_uc_graphs; |
43 | | MVMint32 case_lc_graphs; |
44 | | MVMint32 case_tc_graphs; |
45 | | MVMint32 case_fc_graphs; |
46 | | |
47 | | /* Is this a UTF-8 C-8 synthetic? */ |
48 | | MVMint32 is_utf8_c8; |
49 | | }; |
50 | | |
51 | | /* A node in the NFG trie. */ |
52 | | struct MVMNFGTrieNode { |
53 | | /* Set of entries for further traversal, sorted ascending on codepoint |
54 | | * so we can find an entry using binary search. */ |
55 | | MVMNFGTrieNodeEntry *next_codes; |
56 | | |
57 | | /* Number of entries in next_cps. */ |
58 | | MVMint32 num_entries; |
59 | | |
60 | | /* Non-zero if we reach a result at this node (and will always be negative |
61 | | * since it's an NFG synthetic). */ |
62 | | MVMGrapheme32 graph; |
63 | | }; |
64 | | |
65 | | /* An entry in the list of next possible codepoints in the NFG trie. */ |
66 | | struct MVMNFGTrieNodeEntry { |
67 | | /* The codepoint. */ |
68 | | MVMCodepoint code; |
69 | | |
70 | | /* Trie node to traverse to if we find this node. */ |
71 | | MVMNFGTrieNode *node; |
72 | | }; |
73 | | |
74 | | /* The maximum number of codepoints we will allow in a synthetic grapheme. |
75 | | * This is a good bit higher than any real-world use case is going to run |
76 | | * in to. */ |
77 | 179 | #define MVM_GRAPHEME_MAX_CODEPOINTS 1024 |
78 | | |
79 | | /* Functions related to grapheme handling. */ |
80 | | MVMGrapheme32 MVM_nfg_codes_to_grapheme(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes); |
81 | | MVMGrapheme32 MVM_nfg_codes_to_grapheme_utf8_c8(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes); |
82 | | MVMGrapheme32 MVM_nfg_crlf_grapheme(MVMThreadContext *tc); |
83 | | MVMNFGSynthetic * MVM_nfg_get_synthetic_info(MVMThreadContext *tc, MVMGrapheme32 synth); |
84 | | MVMuint32 MVM_nfg_get_case_change(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint32 case_, MVMGrapheme32 **result); |
85 | | MVMint32 MVM_nfg_is_concat_stable(MVMThreadContext *tc, MVMString *a, MVMString *b); |
86 | | |
87 | | /* NFG subsystem cleanup. */ |
88 | | void MVM_nfg_destroy(MVMThreadContext *tc); |