summaryrefslogtreecommitdiff
path: root/unicode-data.h
diff options
context:
space:
mode:
authorJared Van Bortel <jared@nomic.ai>2024-03-26 17:46:21 -0400
committerGitHub <noreply@github.com>2024-03-26 17:46:21 -0400
commit32c8486e1f0297393cb22ac0a0d26a6b17ad4d54 (patch)
treeaae4f945bf3355c009cb9643376439fcba5558d0 /unicode-data.h
parent557410b8f06380560155ac7fcb8316d71ddc9837 (diff)
wpm : portable unicode tolower (#6305)
Also use C locale for ispunct/isspace, and split unicode-data.cpp from unicode.cpp.
Diffstat (limited to 'unicode-data.h')
-rw-r--r--unicode-data.h16
1 files changed, 16 insertions, 0 deletions
diff --git a/unicode-data.h b/unicode-data.h
new file mode 100644
index 00000000..b99500b8
--- /dev/null
+++ b/unicode-data.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <utility>
+#include <vector>
+
+extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_digit;
+extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_letter;
+extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_whitespace;
+extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_accent_mark;
+extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_punctuation;
+extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_symbol;
+extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_control;
+extern const std::multimap<uint32_t, uint32_t> unicode_map_nfd;
+extern const std::map<char32_t, char32_t> unicode_map_lowercase;