From efc336e60cf1331bf5f3213d296981b87b8b2a6c Mon Sep 17 00:00:00 2001 From: George Hazan Date: Sun, 4 Jun 2023 19:24:05 +0300 Subject: =?UTF-8?q?fixes=20#3537=20(Telegram:=2032-=D1=80=D0=B0=D0=B7?= =?UTF-8?q?=D1=80=D1=8F=D0=B4=D0=BD=D0=B0=D1=8F=20=D0=B2=D0=B5=D1=80=D1=81?= =?UTF-8?q?=D0=B8=D1=8F=20=D0=BF=D0=B0=D0=B4=D0=B0=D0=B5=D1=82=20=D0=B2=20?= =?UTF-8?q?64-=D1=80=D0=B0=D0=B7=D1=80=D1=8F=D0=B4=D0=BD=D0=BE=D0=B9=20Win?= =?UTF-8?q?dows)=20+=20update=20to=20the=20fresh=20TDLIB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Telegram/tdlib/td/tdutils/td/utils/utf8.h | 24 ++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'protocols/Telegram/tdlib/td/tdutils/td/utils/utf8.h') diff --git a/protocols/Telegram/tdlib/td/tdutils/td/utils/utf8.h b/protocols/Telegram/tdlib/td/tdutils/td/utils/utf8.h index 27c8b5bd5d..21a02eca1c 100644 --- a/protocols/Telegram/tdlib/td/tdutils/td/utils/utf8.h +++ b/protocols/Telegram/tdlib/td/tdutils/td/utils/utf8.h @@ -1,5 +1,5 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2023 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -32,7 +32,24 @@ inline size_t utf8_length(Slice str) { size_t utf8_utf16_length(Slice str); /// appends a Unicode character using UTF-8 encoding -void append_utf8_character(string &str, uint32 ch); +template +void append_utf8_character(T &str, uint32 code) { + if (code <= 0x7f) { + str.push_back(static_cast(code)); + } else if (code <= 0x7ff) { + str.push_back(static_cast(0xc0 | (code >> 6))); // implementation-defined + str.push_back(static_cast(0x80 | (code & 0x3f))); + } else if (code <= 0xffff) { + str.push_back(static_cast(0xe0 | (code >> 12))); // implementation-defined + str.push_back(static_cast(0x80 | ((code >> 6) & 0x3f))); + str.push_back(static_cast(0x80 | (code & 0x3f))); + } else { + str.push_back(static_cast(0xf0 | (code >> 18))); // implementation-defined + str.push_back(static_cast(0x80 | ((code >> 12) & 0x3f))); + str.push_back(static_cast(0x80 | ((code >> 6) & 0x3f))); + str.push_back(static_cast(0x80 | (code & 0x3f))); + } +} /// moves pointer one UTF-8 character back inline const unsigned char *prev_utf8_unsafe(const unsigned char *ptr) { @@ -45,6 +62,9 @@ inline const unsigned char *prev_utf8_unsafe(const unsigned char *ptr) { /// moves pointer one UTF-8 character forward and saves code of the skipped character in *code const unsigned char *next_utf8_unsafe(const unsigned char *ptr, uint32 *code); +/// appends a Unicode character using UTF-8 encoding and returns updated pointer +unsigned char *append_utf8_character_unsafe(unsigned char *ptr, uint32 code); + /// truncates UTF-8 string to the given length in Unicode characters template T utf8_truncate(T str, size_t length) { -- cgit v1.2.3