blob: 8c3aea61bd65fe81929d7954e32ba35e3ac175a8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
#include "html.h"
#include "document_container.h"
void litehtml::document_container::split_text(const char* text, const std::function<void(const char*)>& on_word, const std::function<void(const char*)>& on_space)
{
std::u32string str;
std::u32string str_in = (const char32_t*)utf8_to_utf32(text);
for (auto c : str_in)
{
if (c <= ' ' && (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'))
{
if (!str.empty())
{
on_word(utf32_to_utf8(str));
str.clear();
}
str += c;
on_space(utf32_to_utf8(str));
str.clear();
}
// CJK character range
else if (c >= 0x4E00 && c <= 0x9FCC)
{
if (!str.empty())
{
on_word(utf32_to_utf8(str));
str.clear();
}
str += c;
on_word(utf32_to_utf8(str));
str.clear();
}
else
{
str += c;
}
}
if (!str.empty())
{
on_word(utf32_to_utf8(str));
}
}
|