fixes #3183 (Update hunspell to 1.7.1)

author: George Hazan <ghazan@miranda.im> 2022-08-30 17:13:21 +0300
committer: George Hazan <ghazan@miranda.im> 2022-08-30 17:13:21 +0300
commit: 3ad2f2b7c2bfb3166363239d67a6645692ffb2b6 (patch)
tree: 0201fd31d0c0e5c193752f7b80cdc69096b563cf /libs/hunspell/src/csutil.c++
parent: d82b809f6af58a1d10fa503138b912d336dca75e (diff)
1 files changed, 47 insertions, 51 deletions
diff --git a/libs/hunspell/src/csutil.c++ b/libs/hunspell/src/csutil.c++
index 59a9d28353..fbaa768b40 100644
--- a/libs/hunspell/src/csutil.c++
+++ b/libs/hunspell/src/csutil.c++
@@ -1,7 +1,7 @@
 /* ***** BEGIN LICENSE BLOCK *****
  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  *
- * Copyright (C) 2002-2017 Németh László
+ * Copyright (C) 2002-2022 Németh László
  *
  * The contents of this file are subject to the Mozilla Public License Version
  * 1.1 (the "License"); you may not use this file except in compliance with
@@ -69,6 +69,7 @@
  */
 
 #include <algorithm>
+#include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
@@ -79,13 +80,6 @@
 #include "atypes.hxx"
 #include "langnum.hxx"
 
-// Unicode character encoding information
-struct unicode_info {
-  unsigned short c;
-  unsigned short cupper;
-  unsigned short clower;
-};
-
 #ifdef _WIN32
 #include <windows.h>
 #include <wchar.h>
@@ -102,12 +96,10 @@ struct unicode_info {
 
 #ifdef MOZILLA_CLIENT
 #include "nsCOMPtr.h"
-#include "nsIUnicodeEncoder.h"
-#include "nsIUnicodeDecoder.h"
 #include "nsUnicharUtils.h"
-#include "mozilla/dom/EncodingUtils.h"
+#include "mozilla/Encoding.h"
 
-using mozilla::dom::EncodingUtils;
+using namespace mozilla;
 #endif
 
 struct unicode_info2 {
@@ -495,20 +487,17 @@ void uniqlist(std::vector<std::string>& list) {
 
 namespace {
 unsigned char cupper(const struct cs_info* csconv, int nIndex) {
-  if (nIndex < 0 || nIndex > 255)
-    return nIndex;
+  assert(nIndex >= 0 && nIndex <= 255);
   return csconv[nIndex].cupper;
 }
 
 unsigned char clower(const struct cs_info* csconv, int nIndex) {
-  if (nIndex < 0 || nIndex > 255)
-    return nIndex;
+  assert(nIndex >= 0 && nIndex <= 255);
   return csconv[nIndex].clower;
 }
 
 unsigned char ccase(const struct cs_info* csconv, int nIndex) {
-  if (nIndex < 0 || nIndex > 255)
-    return nIndex;
+  assert(nIndex >= 0 && nIndex <= 255);
   return csconv[nIndex].ccase;
 }
 }
@@ -2306,20 +2295,12 @@ struct cs_info* get_current_cs(const std::string& es) {
     ccs[i].cupper = i;
   }
 
-  nsCOMPtr<nsIUnicodeEncoder> encoder;
-  nsCOMPtr<nsIUnicodeDecoder> decoder;
-
-  nsresult rv;
-
-  nsAutoCString label(es.c_str());
-  nsAutoCString encoding;
-  if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
+  auto encoding = Encoding::ForLabelNoReplacement(es);
+  if (!encoding) {
     return ccs;
   }
-  encoder = EncodingUtils::EncoderForEncoding(encoding);
-  decoder = EncodingUtils::DecoderForEncoding(encoding);
-  encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nullptr, '?');
-  decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
+  auto encoder = encoding->NewEncoder();
+  auto decoder = encoding->NewDecoderWithoutBOMHandling();
 
   for (unsigned int i = 0; i <= 0xff; ++i) {
     bool success = false;
@@ -2327,36 +2308,50 @@ struct cs_info* get_current_cs(const std::string& es) {
     // in this 1-byte character encoding.  Call our encoding/decoding
     // APIs separately for each byte since they may reject some of the
     // bytes, and we want to handle errors separately for each byte.
-    char lower, upper;
+    uint8_t lower, upper;
     do {
       if (i == 0)
         break;
-      const char source = char(i);
-      char16_t uni, uniCased;
-      int32_t charLength = 1, uniLength = 1;
-
-      rv = decoder->Convert(&source, &charLength, &uni, &uniLength);
-      // Explicitly check NS_OK because we don't want to allow
-      // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
-      if (rv != NS_OK || charLength != 1 || uniLength != 1)
+      uint8_t source = uint8_t(i);
+      char16_t uni[2];
+      char16_t uniCased;
+      uint8_t destination[4];
+      auto src1 = MakeSpan(&source, 1);
+      auto dst1 = MakeSpan(uni);
+      auto src2 = MakeSpan(&uniCased, 1);
+      auto dst2 = MakeSpan(destination);
+
+      uint32_t result;
+      size_t read;
+      size_t written;
+      Tie(result, read, written) =
+        decoder->DecodeToUTF16WithoutReplacement(src1, dst1, true);
+      if (result != kInputEmpty || read != 1 || written != 1) {
         break;
-      uniCased = ToLowerCase(uni);
-      rv = encoder->Convert(&uniCased, &uniLength, &lower, &charLength);
-      // Explicitly check NS_OK because we don't want to allow
-      // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
-      if (rv != NS_OK || charLength != 1 || uniLength != 1)
+      }
+
+      uniCased = ToLowerCase(uni[0]);
+      Tie(result, read, written) =
+        encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true);
+      if (result != kInputEmpty || read != 1 || written != 1) {
         break;
+      }
+      lower = destination[0];
 
-      uniCased = ToUpperCase(uni);
-      rv = encoder->Convert(&uniCased, &uniLength, &upper, &charLength);
-      // Explicitly check NS_OK because we don't want to allow
-      // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
-      if (rv != NS_OK || charLength != 1 || uniLength != 1)
+      uniCased = ToUpperCase(uni[0]);
+      Tie(result, read, written) =
+        encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true);
+      if (result != kInputEmpty || read != 1 || written != 1) {
         break;
+      }
+      upper = destination[0];
 
       success = true;
     } while (0);
 
+    encoding->NewEncoderInto(*encoder);
+    encoding->NewDecoderWithoutBOMHandlingInto(*decoder);
+
     if (success) {
       ccs[i].cupper = upper;
       ccs[i].clower = lower;
@@ -2401,6 +2396,7 @@ static struct lang_map lang2enc[] =
     {{"ar", LANG_ar},    {"az", LANG_az},
      {"az_AZ", LANG_az},  // for back-compatibility
      {"bg", LANG_bg},    {"ca", LANG_ca},
+     {"crh", LANG_crh},
      {"cs", LANG_cs},    {"da", LANG_da},
      {"de", LANG_de},    {"el", LANG_el},
      {"en", LANG_en},    {"es", LANG_es},
@@ -2458,7 +2454,7 @@ unsigned short unicodetoupper(unsigned short c, int langnum) {
   // In Azeri and Turkish, I and i dictinct letters:
   // There are a dotless lower case i pair of upper `I',
   // and an upper I with dot pair of lower `i'.
-  if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr)))
+  if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh)))
     return 0x0130;
 #ifdef OPENOFFICEORG
   return static_cast<unsigned short>(u_toupper(c));
@@ -2475,7 +2471,7 @@ unsigned short unicodetolower(unsigned short c, int langnum) {
   // In Azeri and Turkish, I and i dictinct letters:
   // There are a dotless lower case i pair of upper `I',
   // and an upper I with dot pair of lower `i'.
-  if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr)))
+  if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh)))
     return 0x0131;
 #ifdef OPENOFFICEORG
   return static_cast<unsigned short>(u_tolower(c));
author	George Hazan <ghazan@miranda.im>	2022-08-30 17:13:21 +0300
committer	George Hazan <ghazan@miranda.im>	2022-08-30 17:13:21 +0300
commit	3ad2f2b7c2bfb3166363239d67a6645692ffb2b6 (patch)
tree	0201fd31d0c0e5c193752f7b80cdc69096b563cf /libs/hunspell/src/csutil.c++
parent	d82b809f6af58a1d10fa503138b912d336dca75e (diff)