diff options
author | George Hazan <ghazan@miranda.im> | 2022-08-30 17:13:21 +0300 |
---|---|---|
committer | George Hazan <ghazan@miranda.im> | 2022-08-30 17:13:21 +0300 |
commit | 3ad2f2b7c2bfb3166363239d67a6645692ffb2b6 (patch) | |
tree | 0201fd31d0c0e5c193752f7b80cdc69096b563cf /libs/hunspell/src/csutil.c++ | |
parent | d82b809f6af58a1d10fa503138b912d336dca75e (diff) |
fixes #3183 (Update hunspell to 1.7.1)
Diffstat (limited to 'libs/hunspell/src/csutil.c++')
-rw-r--r-- | libs/hunspell/src/csutil.c++ | 98 |
1 files changed, 47 insertions, 51 deletions
diff --git a/libs/hunspell/src/csutil.c++ b/libs/hunspell/src/csutil.c++ index 59a9d28353..fbaa768b40 100644 --- a/libs/hunspell/src/csutil.c++ +++ b/libs/hunspell/src/csutil.c++ @@ -1,7 +1,7 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * - * Copyright (C) 2002-2017 Németh László + * Copyright (C) 2002-2022 Németh László * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with @@ -69,6 +69,7 @@ */ #include <algorithm> +#include <assert.h> #include <stdlib.h> #include <string.h> #include <stdio.h> @@ -79,13 +80,6 @@ #include "atypes.hxx" #include "langnum.hxx" -// Unicode character encoding information -struct unicode_info { - unsigned short c; - unsigned short cupper; - unsigned short clower; -}; - #ifdef _WIN32 #include <windows.h> #include <wchar.h> @@ -102,12 +96,10 @@ struct unicode_info { #ifdef MOZILLA_CLIENT #include "nsCOMPtr.h" -#include "nsIUnicodeEncoder.h" -#include "nsIUnicodeDecoder.h" #include "nsUnicharUtils.h" -#include "mozilla/dom/EncodingUtils.h" +#include "mozilla/Encoding.h" -using mozilla::dom::EncodingUtils; +using namespace mozilla; #endif struct unicode_info2 { @@ -495,20 +487,17 @@ void uniqlist(std::vector<std::string>& list) { namespace { unsigned char cupper(const struct cs_info* csconv, int nIndex) { - if (nIndex < 0 || nIndex > 255) - return nIndex; + assert(nIndex >= 0 && nIndex <= 255); return csconv[nIndex].cupper; } unsigned char clower(const struct cs_info* csconv, int nIndex) { - if (nIndex < 0 || nIndex > 255) - return nIndex; + assert(nIndex >= 0 && nIndex <= 255); return csconv[nIndex].clower; } unsigned char ccase(const struct cs_info* csconv, int nIndex) { - if (nIndex < 0 || nIndex > 255) - return nIndex; + assert(nIndex >= 0 && nIndex <= 255); return csconv[nIndex].ccase; } } @@ -2306,20 +2295,12 @@ struct cs_info* get_current_cs(const std::string& es) { ccs[i].cupper = i; } - nsCOMPtr<nsIUnicodeEncoder> encoder; - nsCOMPtr<nsIUnicodeDecoder> decoder; - - nsresult rv; - - nsAutoCString label(es.c_str()); - nsAutoCString encoding; - if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) { + auto encoding = Encoding::ForLabelNoReplacement(es); + if (!encoding) { return ccs; } - encoder = EncodingUtils::EncoderForEncoding(encoding); - decoder = EncodingUtils::DecoderForEncoding(encoding); - encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nullptr, '?'); - decoder->SetInputErrorBehavior(decoder->kOnError_Signal); + auto encoder = encoding->NewEncoder(); + auto decoder = encoding->NewDecoderWithoutBOMHandling(); for (unsigned int i = 0; i <= 0xff; ++i) { bool success = false; @@ -2327,36 +2308,50 @@ struct cs_info* get_current_cs(const std::string& es) { // in this 1-byte character encoding. Call our encoding/decoding // APIs separately for each byte since they may reject some of the // bytes, and we want to handle errors separately for each byte. - char lower, upper; + uint8_t lower, upper; do { if (i == 0) break; - const char source = char(i); - char16_t uni, uniCased; - int32_t charLength = 1, uniLength = 1; - - rv = decoder->Convert(&source, &charLength, &uni, &uniLength); - // Explicitly check NS_OK because we don't want to allow - // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. - if (rv != NS_OK || charLength != 1 || uniLength != 1) + uint8_t source = uint8_t(i); + char16_t uni[2]; + char16_t uniCased; + uint8_t destination[4]; + auto src1 = MakeSpan(&source, 1); + auto dst1 = MakeSpan(uni); + auto src2 = MakeSpan(&uniCased, 1); + auto dst2 = MakeSpan(destination); + + uint32_t result; + size_t read; + size_t written; + Tie(result, read, written) = + decoder->DecodeToUTF16WithoutReplacement(src1, dst1, true); + if (result != kInputEmpty || read != 1 || written != 1) { break; - uniCased = ToLowerCase(uni); - rv = encoder->Convert(&uniCased, &uniLength, &lower, &charLength); - // Explicitly check NS_OK because we don't want to allow - // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. - if (rv != NS_OK || charLength != 1 || uniLength != 1) + } + + uniCased = ToLowerCase(uni[0]); + Tie(result, read, written) = + encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); + if (result != kInputEmpty || read != 1 || written != 1) { break; + } + lower = destination[0]; - uniCased = ToUpperCase(uni); - rv = encoder->Convert(&uniCased, &uniLength, &upper, &charLength); - // Explicitly check NS_OK because we don't want to allow - // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. - if (rv != NS_OK || charLength != 1 || uniLength != 1) + uniCased = ToUpperCase(uni[0]); + Tie(result, read, written) = + encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); + if (result != kInputEmpty || read != 1 || written != 1) { break; + } + upper = destination[0]; success = true; } while (0); + encoding->NewEncoderInto(*encoder); + encoding->NewDecoderWithoutBOMHandlingInto(*decoder); + if (success) { ccs[i].cupper = upper; ccs[i].clower = lower; @@ -2401,6 +2396,7 @@ static struct lang_map lang2enc[] = {{"ar", LANG_ar}, {"az", LANG_az}, {"az_AZ", LANG_az}, // for back-compatibility {"bg", LANG_bg}, {"ca", LANG_ca}, + {"crh", LANG_crh}, {"cs", LANG_cs}, {"da", LANG_da}, {"de", LANG_de}, {"el", LANG_el}, {"en", LANG_en}, {"es", LANG_es}, @@ -2458,7 +2454,7 @@ unsigned short unicodetoupper(unsigned short c, int langnum) { // In Azeri and Turkish, I and i dictinct letters: // There are a dotless lower case i pair of upper `I', // and an upper I with dot pair of lower `i'. - if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr))) + if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh))) return 0x0130; #ifdef OPENOFFICEORG return static_cast<unsigned short>(u_toupper(c)); @@ -2475,7 +2471,7 @@ unsigned short unicodetolower(unsigned short c, int langnum) { // In Azeri and Turkish, I and i dictinct letters: // There are a dotless lower case i pair of upper `I', // and an upper I with dot pair of lower `i'. - if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr))) + if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh))) return 0x0131; #ifdef OPENOFFICEORG return static_cast<unsigned short>(u_tolower(c)); |