summaryrefslogtreecommitdiff
path: root/libs/hunspell/src/csutil.c++
diff options
context:
space:
mode:
authorGeorge Hazan <ghazan@miranda.im>2022-08-30 17:13:21 +0300
committerGeorge Hazan <ghazan@miranda.im>2022-08-30 17:13:21 +0300
commit3ad2f2b7c2bfb3166363239d67a6645692ffb2b6 (patch)
tree0201fd31d0c0e5c193752f7b80cdc69096b563cf /libs/hunspell/src/csutil.c++
parentd82b809f6af58a1d10fa503138b912d336dca75e (diff)
fixes #3183 (Update hunspell to 1.7.1)
Diffstat (limited to 'libs/hunspell/src/csutil.c++')
-rw-r--r--libs/hunspell/src/csutil.c++98
1 files changed, 47 insertions, 51 deletions
diff --git a/libs/hunspell/src/csutil.c++ b/libs/hunspell/src/csutil.c++
index 59a9d28353..fbaa768b40 100644
--- a/libs/hunspell/src/csutil.c++
+++ b/libs/hunspell/src/csutil.c++
@@ -1,7 +1,7 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
- * Copyright (C) 2002-2017 Németh László
+ * Copyright (C) 2002-2022 Németh László
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
@@ -69,6 +69,7 @@
*/
#include <algorithm>
+#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@@ -79,13 +80,6 @@
#include "atypes.hxx"
#include "langnum.hxx"
-// Unicode character encoding information
-struct unicode_info {
- unsigned short c;
- unsigned short cupper;
- unsigned short clower;
-};
-
#ifdef _WIN32
#include <windows.h>
#include <wchar.h>
@@ -102,12 +96,10 @@ struct unicode_info {
#ifdef MOZILLA_CLIENT
#include "nsCOMPtr.h"
-#include "nsIUnicodeEncoder.h"
-#include "nsIUnicodeDecoder.h"
#include "nsUnicharUtils.h"
-#include "mozilla/dom/EncodingUtils.h"
+#include "mozilla/Encoding.h"
-using mozilla::dom::EncodingUtils;
+using namespace mozilla;
#endif
struct unicode_info2 {
@@ -495,20 +487,17 @@ void uniqlist(std::vector<std::string>& list) {
namespace {
unsigned char cupper(const struct cs_info* csconv, int nIndex) {
- if (nIndex < 0 || nIndex > 255)
- return nIndex;
+ assert(nIndex >= 0 && nIndex <= 255);
return csconv[nIndex].cupper;
}
unsigned char clower(const struct cs_info* csconv, int nIndex) {
- if (nIndex < 0 || nIndex > 255)
- return nIndex;
+ assert(nIndex >= 0 && nIndex <= 255);
return csconv[nIndex].clower;
}
unsigned char ccase(const struct cs_info* csconv, int nIndex) {
- if (nIndex < 0 || nIndex > 255)
- return nIndex;
+ assert(nIndex >= 0 && nIndex <= 255);
return csconv[nIndex].ccase;
}
}
@@ -2306,20 +2295,12 @@ struct cs_info* get_current_cs(const std::string& es) {
ccs[i].cupper = i;
}
- nsCOMPtr<nsIUnicodeEncoder> encoder;
- nsCOMPtr<nsIUnicodeDecoder> decoder;
-
- nsresult rv;
-
- nsAutoCString label(es.c_str());
- nsAutoCString encoding;
- if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
+ auto encoding = Encoding::ForLabelNoReplacement(es);
+ if (!encoding) {
return ccs;
}
- encoder = EncodingUtils::EncoderForEncoding(encoding);
- decoder = EncodingUtils::DecoderForEncoding(encoding);
- encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nullptr, '?');
- decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
+ auto encoder = encoding->NewEncoder();
+ auto decoder = encoding->NewDecoderWithoutBOMHandling();
for (unsigned int i = 0; i <= 0xff; ++i) {
bool success = false;
@@ -2327,36 +2308,50 @@ struct cs_info* get_current_cs(const std::string& es) {
// in this 1-byte character encoding. Call our encoding/decoding
// APIs separately for each byte since they may reject some of the
// bytes, and we want to handle errors separately for each byte.
- char lower, upper;
+ uint8_t lower, upper;
do {
if (i == 0)
break;
- const char source = char(i);
- char16_t uni, uniCased;
- int32_t charLength = 1, uniLength = 1;
-
- rv = decoder->Convert(&source, &charLength, &uni, &uniLength);
- // Explicitly check NS_OK because we don't want to allow
- // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
- if (rv != NS_OK || charLength != 1 || uniLength != 1)
+ uint8_t source = uint8_t(i);
+ char16_t uni[2];
+ char16_t uniCased;
+ uint8_t destination[4];
+ auto src1 = MakeSpan(&source, 1);
+ auto dst1 = MakeSpan(uni);
+ auto src2 = MakeSpan(&uniCased, 1);
+ auto dst2 = MakeSpan(destination);
+
+ uint32_t result;
+ size_t read;
+ size_t written;
+ Tie(result, read, written) =
+ decoder->DecodeToUTF16WithoutReplacement(src1, dst1, true);
+ if (result != kInputEmpty || read != 1 || written != 1) {
break;
- uniCased = ToLowerCase(uni);
- rv = encoder->Convert(&uniCased, &uniLength, &lower, &charLength);
- // Explicitly check NS_OK because we don't want to allow
- // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
- if (rv != NS_OK || charLength != 1 || uniLength != 1)
+ }
+
+ uniCased = ToLowerCase(uni[0]);
+ Tie(result, read, written) =
+ encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true);
+ if (result != kInputEmpty || read != 1 || written != 1) {
break;
+ }
+ lower = destination[0];
- uniCased = ToUpperCase(uni);
- rv = encoder->Convert(&uniCased, &uniLength, &upper, &charLength);
- // Explicitly check NS_OK because we don't want to allow
- // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
- if (rv != NS_OK || charLength != 1 || uniLength != 1)
+ uniCased = ToUpperCase(uni[0]);
+ Tie(result, read, written) =
+ encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true);
+ if (result != kInputEmpty || read != 1 || written != 1) {
break;
+ }
+ upper = destination[0];
success = true;
} while (0);
+ encoding->NewEncoderInto(*encoder);
+ encoding->NewDecoderWithoutBOMHandlingInto(*decoder);
+
if (success) {
ccs[i].cupper = upper;
ccs[i].clower = lower;
@@ -2401,6 +2396,7 @@ static struct lang_map lang2enc[] =
{{"ar", LANG_ar}, {"az", LANG_az},
{"az_AZ", LANG_az}, // for back-compatibility
{"bg", LANG_bg}, {"ca", LANG_ca},
+ {"crh", LANG_crh},
{"cs", LANG_cs}, {"da", LANG_da},
{"de", LANG_de}, {"el", LANG_el},
{"en", LANG_en}, {"es", LANG_es},
@@ -2458,7 +2454,7 @@ unsigned short unicodetoupper(unsigned short c, int langnum) {
// In Azeri and Turkish, I and i dictinct letters:
// There are a dotless lower case i pair of upper `I',
// and an upper I with dot pair of lower `i'.
- if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr)))
+ if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh)))
return 0x0130;
#ifdef OPENOFFICEORG
return static_cast<unsigned short>(u_toupper(c));
@@ -2475,7 +2471,7 @@ unsigned short unicodetolower(unsigned short c, int langnum) {
// In Azeri and Turkish, I and i dictinct letters:
// There are a dotless lower case i pair of upper `I',
// and an upper I with dot pair of lower `i'.
- if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr)))
+ if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh)))
return 0x0131;
#ifdef OPENOFFICEORG
return static_cast<unsigned short>(u_tolower(c));