1 files changed, 541 insertions, 1013 deletions
diff --git a/libs/hunspell/src/hunspell.c++ b/libs/hunspell/src/hunspell.c++
index 726c72931a..f7c1581087 100644
--- a/libs/hunspell/src/hunspell.c++
+++ b/libs/hunspell/src/hunspell.c++
@@ -85,6 +85,9 @@
 #include <limits>
 #include <string>
 
+#define MAXWORDLEN 176
+#define MAXWORDUTF8LEN (MAXWORDLEN * 3)
+
 Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) {
   encoding = NULL;
   csconv = NULL;
@@ -158,14 +161,16 @@ int Hunspell::add_dic(const char* dpath, const char* key) {
 // set the capitalization type
 // return the length of the "cleaned" (and UTF-8 encoded) word
 
-int Hunspell::cleanword2(char* dest,
+size_t Hunspell::cleanword2(std::string& dest,
+                         std::vector<w_char>& dest_utf,
                          const char* src,
-                         w_char* dest_utf,
                          int* nc,
                          int* pcaptype,
-                         int* pabbrev) {
-  unsigned char* p = (unsigned char*)dest;
-  const unsigned char* q = (const unsigned char*)src;
+                         size_t* pabbrev) {
+  dest.clear();
+  dest_utf.clear();
+
+  const char* q = src;
 
   // first skip over any leading blanks
   while ((*q != '\0') && (*q == ' '))
@@ -173,7 +178,7 @@ int Hunspell::cleanword2(char* dest,
 
   // now strip off any trailing periods (recording their presence)
   *pabbrev = 0;
-  int nl = strlen((const char*)q);
+  int nl = strlen(q);
   while ((nl > 0) && (*(q + nl - 1) == '.')) {
     nl--;
     (*pabbrev)++;
@@ -182,35 +187,26 @@ int Hunspell::cleanword2(char* dest,
   // if no characters are left it can't be capitalized
   if (nl <= 0) {
     *pcaptype = NOCAP;
-    *p = '\0';
     return 0;
   }
 
-  strncpy(dest, (char*)q, nl);
-  *(dest + nl) = '\0';
-  nl = strlen(dest);
+  dest.append(q, nl);
+  nl = dest.size();
   if (utf8) {
-    *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
-    // don't check too long words
-    if (*nc >= MAXWORDLEN)
-      return 0;
-    if (*nc == -1) {  // big Unicode character (non BMP area)
-      *pcaptype = NOCAP;
-      return nl;
-    }
-    *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
+    *nc = u8_u16(dest_utf, dest);
+    *pcaptype = get_captype_utf8(dest_utf, langnum);
   } else {
-    *pcaptype = get_captype(dest, nl, csconv);
+    *pcaptype = get_captype(dest, csconv);
     *nc = nl;
   }
   return nl;
 }
 
-int Hunspell::cleanword(char* dest,
+void Hunspell::cleanword(std::string& dest,
                         const char* src,
                         int* pcaptype,
                         int* pabbrev) {
-  unsigned char* p = (unsigned char*)dest;
+  dest.clear();
   const unsigned char* q = (const unsigned char*)src;
   int firstcap = 0;
 
@@ -229,8 +225,7 @@ int Hunspell::cleanword(char* dest,
   // if no characters are left it can't be capitalized
   if (nl <= 0) {
     *pcaptype = NOCAP;
-    *p = '\0';
-    return 0;
+    return;
   }
 
   // now determine the capitalization type of the first nl letters
@@ -245,27 +240,25 @@ int Hunspell::cleanword(char* dest,
         ncap++;
       if (csconv[(*q)].cupper == csconv[(*q)].clower)
         nneutral++;
-      *p++ = *q++;
+      dest.push_back(*q++);
       nl--;
     }
     // remember to terminate the destination string
-    *p = '\0';
-    firstcap = csconv[(unsigned char)(*dest)].ccase;
+    firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
   } else {
-    unsigned short idx;
-    w_char t[MAXWORDLEN];
-    nc = u8_u16(t, MAXWORDLEN, src);
-    for (int i = 0; i < nc; i++) {
-      idx = (t[i].h << 8) + t[i].l;
+    std::vector<w_char> t;
+    u8_u16(t, src);
+    for (size_t i = 0; i < t.size(); ++i) {
+      unsigned short idx = (t[i].h << 8) + t[i].l;
       unsigned short low = unicodetolower(idx, langnum);
       if (idx != low)
         ncap++;
       if (unicodetoupper(idx, langnum) == low)
         nneutral++;
     }
-    u16_u8(dest, MAXWORDUTF8LEN, t, nc);
+    u16_u8(dest, t);
     if (ncap) {
-      idx = (t[0].h << 8) + t[0].l;
+      unsigned short idx = (t[0].h << 8) + t[0].l;
       firstcap = (idx != unicodetolower(idx, langnum));
     }
   }
@@ -282,117 +275,60 @@ int Hunspell::cleanword(char* dest,
   } else {
     *pcaptype = HUHCAP;
   }
-  return strlen(dest);
 }
 
-void Hunspell::mkallcap(char* p) {
+void Hunspell::mkallcap(std::string& u8) {
   if (utf8) {
-    w_char u[MAXWORDLEN];
-    int nc = u8_u16(u, MAXWORDLEN, p);
-    unsigned short idx;
-    for (int i = 0; i < nc; i++) {
-      idx = (u[i].h << 8) + u[i].l;
-      if (idx != unicodetoupper(idx, langnum)) {
-        u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
-        u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
-      }
-    }
-    u16_u8(p, MAXWORDUTF8LEN, u, nc);
+    std::vector<w_char> u16;
+    u8_u16(u16, u8);
+    ::mkallcap_utf(u16, langnum);
+    u16_u8(u8, u16);
   } else {
-    while (*p != '\0') {
-      *p = csconv[((unsigned char)*p)].cupper;
-      p++;
-    }
-  }
-}
-
-int Hunspell::mkallcap2(char* p, w_char* u, int nc) {
-  if (utf8) {
-    unsigned short idx;
-    for (int i = 0; i < nc; i++) {
-      idx = (u[i].h << 8) + u[i].l;
-      unsigned short up = unicodetoupper(idx, langnum);
-      if (idx != up) {
-        u[i].h = (unsigned char)(up >> 8);
-        u[i].l = (unsigned char)(up & 0x00FF);
-      }
-    }
-    u16_u8(p, MAXWORDUTF8LEN, u, nc);
-    return strlen(p);
-  } else {
-    while (*p != '\0') {
-      *p = csconv[((unsigned char)*p)].cupper;
-      p++;
-    }
-  }
-  return nc;
-}
-
-void Hunspell::mkallsmall(char* p) {
-  while (*p != '\0') {
-    *p = csconv[((unsigned char)*p)].clower;
-    p++;
+    ::mkallcap(u8, csconv);
   }
 }
 
-int Hunspell::mkallsmall2(char* p, w_char* u, int nc) {
+int Hunspell::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
   if (utf8) {
-    unsigned short idx;
-    for (int i = 0; i < nc; i++) {
-      idx = (u[i].h << 8) + u[i].l;
-      unsigned short low = unicodetolower(idx, langnum);
-      if (idx != low) {
-        u[i].h = (unsigned char)(low >> 8);
-        u[i].l = (unsigned char)(low & 0x00FF);
-      }
-    }
-    u16_u8(p, MAXWORDUTF8LEN, u, nc);
-    return strlen(p);
+    ::mkallsmall_utf(u16, langnum);
+    u16_u8(u8, u16);
   } else {
-    while (*p != '\0') {
-      *p = csconv[((unsigned char)*p)].clower;
-      p++;
-    }
+    ::mkallsmall(u8, csconv);
   }
-  return nc;
+  return u8.size();
 }
 
 // convert UTF-8 sharp S codes to latin 1
-char* Hunspell::sharps_u8_l1(char* dest, char* source) {
-  char* p = dest;
-  *p = *source;
-  for (p++, source++; *(source - 1); p++, source++) {
-    *p = *source;
-    if (*source == '\x9F')
-      *--p = '\xDF';
-  }
+std::string Hunspell::sharps_u8_l1(const std::string& source) {
+  std::string dest(source);
+  mystrrep(dest, "\xC3\x9F", "\xDF");
   return dest;
 }
 
 // recursive search for right ss - sharp s permutations
-hentry* Hunspell::spellsharps(char* base,
-                              char* pos,
+hentry* Hunspell::spellsharps(std::string& base,
+                              size_t n_pos,
                               int n,
                               int repnum,
-                              char* tmp,
                               int* info,
                               char** root) {
-  pos = strstr(pos, "ss");
-  if (pos && (n < MAXSHARPS)) {
-    *pos = '\xC3';
-    *(pos + 1) = '\x9F';
-    hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
+  size_t pos = base.find("ss", n_pos);
+  if (pos != std::string::npos && (n < MAXSHARPS)) {
+    base[pos] = '\xC3';
+    base[pos + 1] = '\x9F';
+    hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
     if (h)
       return h;
-    *pos = 's';
-    *(pos + 1) = 's';
-    h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
+    base[pos] = 's';
+    base[pos + 1] = 's';
+    h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
     if (h)
       return h;
   } else if (repnum > 0) {
     if (utf8)
-      return checkword(base, info, root);
-    return checkword(sharps_u8_l1(tmp, base), info, root);
+      return checkword(base.c_str(), info, root);
+    std::string tmp(sharps_u8_l1(base));
+    return checkword(tmp.c_str(), info, root);
   }
   return NULL;
 }
@@ -403,7 +339,7 @@ int Hunspell::is_keepcase(const hentry* rv) {
 }
 
 /* insert a word to the beginning of the suggestion array and return ns */
-int Hunspell::insert_sug(char*** slst, char* word, int ns) {
+int Hunspell::insert_sug(char*** slst, const char* word, int ns) {
   if (!*slst)
     return ns;
   char* dup = mystrdup(word);
@@ -421,11 +357,6 @@ int Hunspell::insert_sug(char*** slst, char* word, int ns) {
 
 int Hunspell::spell(const char* word, int* info, char** root) {
   struct hentry* rv = NULL;
-  // need larger vector. For example, Turkish capital letter I converted a
-  // 2-byte UTF-8 character (dotless i) by mkallsmall.
-  char cw[MAXWORDUTF8LEN];
-  char wspace[MAXWORDUTF8LEN];
-  w_char unicw[MAXWORDLEN];
 
   int info2 = 0;
   if (!info)
@@ -437,7 +368,6 @@ int Hunspell::spell(const char* word, int* info, char** root) {
   if (strcmp(word, SPELL_XML) == 0)
     return 1;
   int nc = strlen(word);
-  int wl2 = 0;
   if (utf8) {
     if (nc >= MAXWORDUTF8LEN)
       return 0;
@@ -445,19 +375,26 @@ int Hunspell::spell(const char* word, int* info, char** root) {
     if (nc >= MAXWORDLEN)
       return 0;
   }
-  int captype = 0;
-  int abbv = 0;
-  int wl = 0;
+  int captype = NOCAP;
+  size_t abbv = 0;
+  size_t wl = 0;
+
+  std::string scw;
+  std::vector<w_char> sunicw;
 
   // input conversion
   RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-  int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0;
-  if (convstatus < 0)
-    return 0;
-  else if (convstatus > 0)
-    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
-  else
-    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+  {
+    std::string wspace;
+
+    int convstatus = rl ? rl->conv(word, wspace) : 0;
+    if (convstatus < 0)
+      return 0;
+    else if (convstatus > 0)
+      wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv);
+    else
+      wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv);
+  }
 
 #ifdef MOZILLA_CLIENT
   // accept the abbreviated words without dots
@@ -474,12 +411,12 @@ int Hunspell::spell(const char* word, int* info, char** root) {
   // "..", "--" etc.)
   enum { NBEGIN, NNUM, NSEP };
   int nstate = NBEGIN;
-  int i;
+  size_t i;
 
   for (i = 0; (i < wl); i++) {
-    if ((cw[i] <= '9') && (cw[i] >= '0')) {
+    if ((scw[i] <= '9') && (scw[i] >= '0')) {
       nstate = NNUM;
-    } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
+    } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
       if ((nstate == NSEP) || (i == 0))
         break;
       nstate = NSEP;
@@ -496,75 +433,75 @@ int Hunspell::spell(const char* word, int* info, char** root) {
       *info += SPELL_ORIGCAP;
     /* FALLTHROUGH */
     case NOCAP:
-      rv = checkword(cw, info, root);
+      rv = checkword(scw.c_str(), info, root);
       if ((abbv) && !(rv)) {
-        memcpy(wspace, cw, wl);
-        *(wspace + wl) = '.';
-        *(wspace + wl + 1) = '\0';
-        rv = checkword(wspace, info, root);
+        std::string u8buffer(scw);
+        u8buffer.push_back('.');
+        rv = checkword(u8buffer.c_str(), info, root);
       }
       break;
     case ALLCAP: {
       *info += SPELL_ORIGCAP;
-      rv = checkword(cw, info, root);
+      rv = checkword(scw.c_str(), info, root);
       if (rv)
         break;
       if (abbv) {
-        memcpy(wspace, cw, wl);
-        *(wspace + wl) = '.';
-        *(wspace + wl + 1) = '\0';
-        rv = checkword(wspace, info, root);
+        std::string u8buffer(scw);
+        u8buffer.push_back('.');
+        rv = checkword(u8buffer.c_str(), info, root);
         if (rv)
           break;
       }
       // Spec. prefix handling for Catalan, French, Italian:
       // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
-      if (pAMgr && strchr(cw, '\'')) {
-        mkallsmall2(cw, unicw, nc);
-        // There are no really sane circumstances where this could fail,
-        // but anyway...
-        if (char* apostrophe = strchr(cw, '\'')) {
+      size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
+      if (apos != std::string::npos) {
+        mkallsmall2(scw, sunicw);
+        //conversion may result in string with different len to pre-mkallsmall2
+        //so re-scan
+        if (apos != std::string::npos && apos < scw.size() - 1) {
+          std::string part1 = scw.substr(0, apos+1);
+          std::string part2 = scw.substr(apos+1);
           if (utf8) {
-            w_char tmpword[MAXWORDLEN];
-            *apostrophe = '\0';
-            wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
-            *apostrophe = '\'';
-            if (wl2 >= 0 && wl2 < nc) {
-              mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
-              rv = checkword(cw, info, root);
-              if (rv)
-                break;
-            }
+            std::vector<w_char> part1u, part2u;
+            u8_u16(part1u, part1);
+            u8_u16(part2u, part2);
+            mkinitcap2(part2, part2u);
+            scw = part1 + part2;
+            sunicw = part1u;
+            sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
+            rv = checkword(scw.c_str(), info, root);
+            if (rv)
+              break;
           } else {
-            mkinitcap2(apostrophe + 1, unicw, nc);
-            rv = checkword(cw, info, root);
+            mkinitcap2(part2, sunicw);
+            scw = part1 + part2;
+            rv = checkword(scw.c_str(), info, root);
             if (rv)
               break;
           }
+          mkinitcap2(scw, sunicw);
+          rv = checkword(scw.c_str(), info, root);
+          if (rv)
+            break;
         }
-        mkinitcap2(cw, unicw, nc);
-        rv = checkword(cw, info, root);
-        if (rv)
-          break;
       }
-      if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
-        char tmpword[MAXWORDUTF8LEN];
-        wl = mkallsmall2(cw, unicw, nc);
-        memcpy(wspace, cw, (wl + 1));
-        rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
+      if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
+
+        mkallsmall2(scw, sunicw);
+        std::string u8buffer(scw);
+        rv = spellsharps(u8buffer, 0, 0, 0, info, root);
         if (!rv) {
-          wl2 = mkinitcap2(cw, unicw, nc);
-          rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
+          mkinitcap2(scw, sunicw);
+          rv = spellsharps(scw, 0, 0, 0, info, root);
         }
         if ((abbv) && !(rv)) {
-          *(wspace + wl) = '.';
-          *(wspace + wl + 1) = '\0';
-          rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
+          u8buffer.push_back('.');
+          rv = spellsharps(u8buffer, 0, 0, 0, info, root);
           if (!rv) {
-            memcpy(wspace, cw, wl2);
-            *(wspace + wl2) = '.';
-            *(wspace + wl2 + 1) = '\0';
-            rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
+            u8buffer = std::string(scw);
+            u8buffer.push_back('.');
+            rv = spellsharps(u8buffer, 0, 0, 0, info, root);
           }
         }
         if (rv)
@@ -572,13 +509,14 @@ int Hunspell::spell(const char* word, int* info, char** root) {
       }
     }
     case INITCAP: {
+
       *info += SPELL_ORIGCAP;
-      wl = mkallsmall2(cw, unicw, nc);
-      memcpy(wspace, cw, (wl + 1));
-      wl2 = mkinitcap2(cw, unicw, nc);
+      mkallsmall2(scw, sunicw);
+      std::string u8buffer(scw);
+      mkinitcap2(scw, sunicw);
       if (captype == INITCAP)
         *info += SPELL_INITCAP;
-      rv = checkword(cw, info, root);
+      rv = checkword(scw.c_str(), info, root);
       if (captype == INITCAP)
         *info -= SPELL_INITCAP;
       // forbid bad capitalization
@@ -593,18 +531,16 @@ int Hunspell::spell(const char* word, int* info, char** root) {
       if (rv)
         break;
 
-      rv = checkword(wspace, info, root);
+      rv = checkword(u8buffer.c_str(), info, root);
       if (abbv && !rv) {
-        *(wspace + wl) = '.';
-        *(wspace + wl + 1) = '\0';
-        rv = checkword(wspace, info, root);
+        u8buffer.push_back('.');
+        rv = checkword(u8buffer.c_str(), info, root);
         if (!rv) {
-          memcpy(wspace, cw, wl2);
-          *(wspace + wl2) = '.';
-          *(wspace + wl2 + 1) = '\0';
+          u8buffer = scw;
+          u8buffer.push_back('.');
           if (captype == INITCAP)
             *info += SPELL_INITCAP;
-          rv = checkword(wspace, info, root);
+          rv = checkword(u8buffer.c_str(), info, root);
           if (captype == INITCAP)
             *info -= SPELL_INITCAP;
           if (rv && is_keepcase(rv) && (captype == ALLCAP))
@@ -617,8 +553,8 @@ int Hunspell::spell(const char* word, int* info, char** root) {
            // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
            // in INITCAP form, too.
            !(pAMgr->get_checksharps() &&
-             ((utf8 && strstr(wspace, "\xC3\x9F")) ||
-              (!utf8 && strchr(wspace, '\xDF'))))))
+             ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
+              (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
         rv = NULL;
       break;
     }
@@ -637,67 +573,66 @@ int Hunspell::spell(const char* word, int* info, char** root) {
 
   // recursive breaking at break points
   if (wordbreak) {
-    char* s;
-    char r;
+
     int nbr = 0;
-    wl = strlen(cw);
+    wl = scw.size();
     int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
 
     // calculate break points for recursion limit
     for (int j = 0; j < numbreak; j++) {
-      s = cw;
-      do {
-        s = (char*)strstr(s, wordbreak[j]);
-        if (s) {
-          nbr++;
-          s++;
-        }
-      } while (s);
+      size_t len = strlen(wordbreak[j]);
+      size_t pos = 0;
+      while ((pos = scw.find(wordbreak[j], pos, len)) != std::string::npos) {
+        ++nbr;
+        pos += len;
+      }
     }
     if (nbr >= 10)
       return 0;
 
     // check boundary patterns (^begin and end$)
     for (int j = 0; j < numbreak; j++) {
-      int plen = strlen(wordbreak[j]);
+      size_t plen = strlen(wordbreak[j]);
       if (plen == 1 || plen > wl)
         continue;
+
       if (wordbreak[j][0] == '^' &&
-          strncmp(cw, wordbreak[j] + 1, plen - 1) == 0 && spell(cw + plen - 1))
+          scw.compare(0, plen - 1, wordbreak[j] + 1, plen -1) == 0 && spell(scw.c_str() + plen - 1))
         return 1;
+
       if (wordbreak[j][plen - 1] == '$' &&
-          strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
-        r = cw[wl - plen + 1];
-        cw[wl - plen + 1] = '\0';
-        if (spell(cw))
+          scw.compare(wl - plen + 1, plen - 1, wordbreak[j], plen - 1) == 0) {
+        char r = scw[wl - plen + 1];
+        scw[wl - plen + 1] = '\0';
+        if (spell(scw.c_str()))
           return 1;
-        cw[wl - plen + 1] = r;
+        scw[wl - plen + 1] = r;
       }
     }
 
     // other patterns
     for (int j = 0; j < numbreak; j++) {
-      int plen = strlen(wordbreak[j]);
-      s = (char*)strstr(cw, wordbreak[j]);
-      if (s && (s > cw) && (s < cw + wl - plen)) {
-        if (!spell(s + plen))
+      size_t plen = strlen(wordbreak[j]);
+      size_t found = scw.find(wordbreak[j]);
+      if ((found > 0) && (found < wl - plen)) {
+        if (!spell(scw.c_str() + found + plen))
           continue;
-        r = *s;
-        *s = '\0';
+        char r = scw[found];
+        scw[found] = '\0';
         // examine 2 sides of the break point
-        if (spell(cw))
+        if (spell(scw.c_str()))
           return 1;
-        *s = r;
+        scw[found] = r;
 
         // LANG_hu: spec. dash rule
         if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
-          r = s[1];
-          s[1] = '\0';
-          if (spell(cw))
+          r = scw[found + 1];
+          scw[found + 1] = '\0';
+          if (spell(scw.c_str()))
             return 1;  // check the first part with dash
-          s[1] = r;
+          scw[found + 1] = r;
         }
-        // end of LANG speficic region
+        // end of LANG specific region
       }
     }
   }
@@ -716,10 +651,9 @@ struct hentry* Hunspell::checkword(const char* w, int* info, char** root) {
   if (ignoredchars != NULL) {
     w2.assign(w);
     if (utf8) {
-      int ignoredchars_utf16_len;
-      unsigned short* ignoredchars_utf16 =
-          pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
-      remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
+      const std::vector<w_char>& ignoredchars_utf16 =
+          pAMgr->get_ignore_utf16();
+      remove_ignored_chars_utf(w2, ignoredchars_utf16);
     } else {
       remove_ignored_chars(w2, ignoredchars);
     }
@@ -802,37 +736,40 @@ struct hentry* Hunspell::checkword(const char* w, int* info, char** root) {
         return NULL;
       }
       if (root) {
-        *root = mystrdup(he->word);
-        if (*root && complexprefixes) {
+        std::string word_root(he->word);
+        if (complexprefixes) {
           if (utf8)
-            reverseword_utf(*root);
+            reverseword_utf(word_root);
           else
-            reverseword(*root);
+            reverseword(word_root);
         }
+        *root = mystrdup(word_root.c_str());
       }
       // try check compound word
     } else if (pAMgr->get_compound()) {
-      he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
+      struct hentry* rwords[100];  // buffer for COMPOUND pattern checking
+      he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
       // LANG_hu section: `moving rule' with last dash
       if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
         char* dup = mystrdup(word);
         if (!dup)
           return NULL;
         dup[len - 1] = '\0';
-        he = pAMgr->compound_check(dup, len - 1, -5, 0, 100, 0, NULL, 1, 0,
+        he = pAMgr->compound_check(dup, len - 1, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0,
                                    info);
         free(dup);
       }
-      // end of LANG speficic region
+      // end of LANG specific region
       if (he) {
         if (root) {
-          *root = mystrdup(he->word);
-          if (*root && complexprefixes) {
+          std::string word_root(he->word);
+          if (complexprefixes) {
             if (utf8)
-              reverseword_utf(*root);
+              reverseword_utf(word_root);
             else
-              reverseword(*root);
+              reverseword(word_root);
           }
+          *root = mystrdup(word_root.c_str());
         }
         if (info)
           *info += SPELL_COMPOUND;
@@ -845,11 +782,8 @@ struct hentry* Hunspell::checkword(const char* w, int* info, char** root) {
 
 int Hunspell::suggest(char*** slst, const char* word) {
   int onlycmpdsug = 0;
-  char cw[MAXWORDUTF8LEN];
-  char wspace[MAXWORDUTF8LEN];
   if (!pSMgr || maxdic == 0)
     return 0;
-  w_char unicw[MAXWORDLEN];
   *slst = NULL;
   // process XML input of the simplified API (see manual)
   if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
@@ -863,130 +797,132 @@ int Hunspell::suggest(char*** slst, const char* word) {
     if (nc >= MAXWORDLEN)
       return 0;
   }
-  int captype = 0;
-  int abbv = 0;
-  int wl = 0;
+  int captype = NOCAP;
+  size_t abbv = 0;
+  size_t wl = 0;
+
+  std::string scw;
+  std::vector<w_char> sunicw;
 
   // input conversion
   RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-  int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0;
-  if (convstatus < 0)
-    return 0;
-  else if (convstatus > 0)
-    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
-  else
-    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+  {
+    std::string wspace;
+
+    int convstatus = rl ? rl->conv(word, wspace) : 0;
+    if (convstatus < 0)
+      return 0;
+    else if (convstatus > 0)
+      wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv);
+    else
+      wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv);
+
+    if (wl == 0)
+      return 0;
+  }
 
-  if (wl == 0)
-    return 0;
   int ns = 0;
   int capwords = 0;
 
   // check capitalized form for FORCEUCASE
   if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
     int info = SPELL_ORIGCAP;
-    char** wlst;
-    if (checkword(cw, &info, NULL)) {
-      if (*slst) {
-        wlst = *slst;
-      } else {
-        wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*));
-        if (wlst == NULL)
-          return -1;
-        *slst = wlst;
-        for (int i = 0; i < MAXSUGGESTION; i++) {
-          wlst[i] = NULL;
-        }
+    if (checkword(scw.c_str(), &info, NULL)) {
+      std::string form(scw);
+      mkinitcap(form);
+
+      char** wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*));
+      if (wlst == NULL)
+        return -1;
+      *slst = wlst;
+      wlst[0] = mystrdup(form.c_str());
+      for (int i = 1; i < MAXSUGGESTION; ++i) {
+        wlst[i] = NULL;
       }
-      wlst[0] = mystrdup(cw);
-      mkinitcap(wlst[0]);
+
       return 1;
     }
   }
 
   switch (captype) {
     case NOCAP: {
-      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
+      ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug);
       break;
     }
 
     case INITCAP: {
       capwords = 1;
-      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
+      ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug);
       if (ns == -1)
         break;
-      memcpy(wspace, cw, (wl + 1));
-      mkallsmall2(wspace, unicw, nc);
-      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
+      std::string wspace(scw);
+      mkallsmall2(wspace, sunicw);
+      ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
       break;
     }
     case HUHINITCAP:
       capwords = 1;
     case HUHCAP: {
-      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
+      ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug);
       if (ns != -1) {
-        int prevns;
         // something.The -> something. The
-        char* dot = strchr(cw, '.');
-        if (dot && (dot > cw)) {
+        size_t dot_pos = scw.find('.');
+        if (dot_pos != std::string::npos) {
+          std::string postdot = scw.substr(dot_pos + 1);
           int captype_;
           if (utf8) {
-            w_char w_[MAXWORDLEN];
-            int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
-            captype_ = get_captype_utf8(w_, wl_, langnum);
-          } else
-            captype_ = get_captype(dot + 1, strlen(dot + 1), csconv);
+            std::vector<w_char> postdotu;
+            u8_u16(postdotu, postdot);
+            captype_ = get_captype_utf8(postdotu, langnum);
+          } else {
+            captype_ = get_captype(postdot, csconv);
+          }
           if (captype_ == INITCAP) {
-            char* st = mystrdup(cw);
-            if (st) {
-              char* newst = (char*)realloc(st, wl + 2);
-              if (newst == NULL)
-                free(st);
-              st = newst;
-            }
-            if (st) {
-              st[(dot - cw) + 1] = ' ';
-              strcpy(st + (dot - cw) + 2, dot + 1);
-              ns = insert_sug(slst, st, ns);
-              free(st);
-            }
+            std::string str(scw);
+            str.insert(dot_pos + 1, 1, ' ');
+            ns = insert_sug(slst, str.c_str(), ns);
           }
         }
+
+        std::string wspace;
+
         if (captype == HUHINITCAP) {
           // TheOpenOffice.org -> The OpenOffice.org
-          memcpy(wspace, cw, (wl + 1));
-          mkinitsmall2(wspace, unicw, nc);
-          ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
+          wspace = scw;
+          mkinitsmall2(wspace, sunicw);
+          ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
         }
-        memcpy(wspace, cw, (wl + 1));
-        mkallsmall2(wspace, unicw, nc);
-        if (spell(wspace))
-          ns = insert_sug(slst, wspace, ns);
-        prevns = ns;
-        ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
+        wspace = scw;
+        mkallsmall2(wspace, sunicw);
+        if (spell(wspace.c_str()))
+          ns = insert_sug(slst, wspace.c_str(), ns);
+        int prevns = ns;
+        ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
         if (captype == HUHINITCAP) {
-          mkinitcap2(wspace, unicw, nc);
-          if (spell(wspace))
-            ns = insert_sug(slst, wspace, ns);
-          ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
+          mkinitcap2(wspace, sunicw);
+          if (spell(wspace.c_str()))
+            ns = insert_sug(slst, wspace.c_str(), ns);
+          ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
         }
         // aNew -> "a New" (instead of "a new")
         for (int j = prevns; j < ns; j++) {
           char* space = strchr((*slst)[j], ' ');
           if (space) {
-            int slen = strlen(space + 1);
+            size_t slen = strlen(space + 1);
             // different case after space (need capitalisation)
-            if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
-              w_char w[MAXWORDLEN];
-              int wc = 0;
-              char* r = (*slst)[j];
+            if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
+              std::string first((*slst)[j], space + 1);
+              std::string second(space + 1);
+              std::vector<w_char> w;
               if (utf8)
-                wc = u8_u16(w, MAXWORDLEN, space + 1);
-              mkinitcap2(space + 1, w, wc);
+                u8_u16(w, second);
+              mkinitcap2(second, w);
               // set as first suggestion
+              char* r = (*slst)[j];
               for (int k = j; k > 0; k--)
                 (*slst)[k] = (*slst)[k - 1];
-              (*slst)[0] = r;
+              free(r);
+              (*slst)[0] = mystrdup((first + second).c_str());
             }
           }
         }
@@ -995,35 +931,30 @@ int Hunspell::suggest(char*** slst, const char* word) {
     }
 
     case ALLCAP: {
-      memcpy(wspace, cw, (wl + 1));
-      mkallsmall2(wspace, unicw, nc);
-      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
+      std::string wspace(scw);
+      mkallsmall2(wspace, sunicw);
+      ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
       if (ns == -1)
         break;
-      if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
-        ns = insert_sug(slst, wspace, ns);
-      mkinitcap2(wspace, unicw, nc);
-      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
+      if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
+        ns = insert_sug(slst, wspace.c_str(), ns);
+      mkinitcap2(wspace, sunicw);
+      ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
       for (int j = 0; j < ns; j++) {
-        mkallcap((*slst)[j]);
+        std::string form((*slst)[j]);
+        mkallcap(form);
+
         if (pAMgr && pAMgr->get_checksharps()) {
-          char* pos;
           if (utf8) {
-            pos = strstr((*slst)[j], "\xC3\x9F");
-            while (pos) {
-              *pos = 'S';
-              *(pos + 1) = 'S';
-              pos = strstr(pos + 2, "\xC3\x9F");
-            }
+            mystrrep(form, "\xC3\x9F", "SS");
           } else {
-            pos = strchr((*slst)[j], '\xDF');
-            while (pos) {
-              (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 2);
-              mystrrep((*slst)[j], "\xDF", "SS");
-              pos = strchr((*slst)[j], '\xDF');
-            }
+            mystrrep(form, "\xDF", "SS");
           }
         }
+
+        free((*slst)[j]);
+        (*slst)[j] = mystrdup(form.c_str());
+
       }
       break;
     }
@@ -1035,11 +966,10 @@ int Hunspell::suggest(char*** slst, const char* word) {
       char* pos = strchr((*slst)[j], '-');
       if (pos) {
         int info;
-        char w[MAXWORDUTF8LEN];
         *pos = '\0';
-        strcpy(w, (*slst)[j]);
-        strcat(w, pos + 1);
-        (void)spell(w, &info, NULL);
+        std::string w((*slst)[j]);
+        w.append(pos + 1);
+        (void)spell(w.c_str(), &info, NULL);
         if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
           *pos = ' ';
         } else
@@ -1054,64 +984,67 @@ int Hunspell::suggest(char*** slst, const char* word) {
       (*slst)) {
     switch (captype) {
       case NOCAP: {
-        ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
+        ns = pSMgr->ngsuggest(*slst, scw.c_str(), ns, pHMgr, maxdic);
         break;
       }
       case HUHINITCAP:
         capwords = 1;
       case HUHCAP: {
-        memcpy(wspace, cw, (wl + 1));
-        mkallsmall2(wspace, unicw, nc);
-        ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
+        std::string wspace(scw);
+        mkallsmall2(wspace, sunicw);
+        ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic);
         break;
       }
       case INITCAP: {
         capwords = 1;
-        memcpy(wspace, cw, (wl + 1));
-        mkallsmall2(wspace, unicw, nc);
-        ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
+        std::string wspace(scw);
+        mkallsmall2(wspace, sunicw);
+        ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic);
         break;
       }
       case ALLCAP: {
-        memcpy(wspace, cw, (wl + 1));
-        mkallsmall2(wspace, unicw, nc);
+        std::string wspace(scw);
+        mkallsmall2(wspace, sunicw);
         int oldns = ns;
-        ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
-        for (int j = oldns; j < ns; j++)
-          mkallcap((*slst)[j]);
+        ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic);
+        for (int j = oldns; j < ns; j++) {
+          std::string form((*slst)[j]);
+          mkallcap(form);
+          free((*slst)[j]);
+          (*slst)[j] = mystrdup(form.c_str());
+        }
         break;
       }
     }
   }
 
   // try dash suggestion (Afo-American -> Afro-American)
-  if (char* pos = strchr(cw, '-')) {
-    char* ppos = cw;
+  size_t dash_pos = scw.find('-');
+  if (dash_pos != std::string::npos) {
     int nodashsug = 1;
-    char** nlst = NULL;
-    int nn = 0;
-    int last = 0;
-    if (*slst) {
-      for (int j = 0; j < ns && nodashsug == 1; j++) {
-        if (strchr((*slst)[j], '-'))
-          nodashsug = 0;
-      }
+    for (int j = 0; j < ns && nodashsug == 1; j++) {
+      if (strchr((*slst)[j], '-'))
+        nodashsug = 0;
     }
+
+    size_t prev_pos = 0;
+    bool last = false;
+
     while (nodashsug && !last) {
-      if (*pos == '\0')
+      if (dash_pos == scw.size())
         last = 1;
-      else
-        *pos = '\0';
-      if (!spell(ppos)) {
-        nn = suggest(&nlst, ppos);
+      std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
+      if (!spell(chunk.c_str())) {
+        char** nlst = NULL;
+        int nn = suggest(&nlst, chunk.c_str());
         for (int j = nn - 1; j >= 0; j--) {
-          strncpy(wspace, cw, ppos - cw);
-          strcpy(wspace + (ppos - cw), nlst[j]);
+          std::string wspace = scw.substr(0, prev_pos);
+          wspace.append(nlst[j]);
           if (!last) {
-            strcat(wspace, "-");
-            strcat(wspace, pos + 1);
+            wspace.append("-");
+            wspace.append(scw.substr(dash_pos + 1));
           }
-          ns = insert_sug(slst, wspace, ns);
+          ns = insert_sug(slst, wspace.c_str(), ns);
           free(nlst[j]);
         }
         if (nlst != NULL)
@@ -1119,29 +1052,34 @@ int Hunspell::suggest(char*** slst, const char* word) {
         nodashsug = 0;
       }
       if (!last) {
-        *pos = '-';
-        ppos = pos + 1;
-        pos = strchr(ppos, '-');
+        prev_pos = dash_pos + 1;
+        dash_pos = scw.find('-', prev_pos);
       }
-      if (!pos)
-        pos = cw + strlen(cw);
+      if (dash_pos == std::string::npos)
+        dash_pos = scw.size();
     }
   }
 
   // word reversing wrapper for complex prefixes
   if (complexprefixes) {
     for (int j = 0; j < ns; j++) {
+      std::string root((*slst)[j]);
+      free((*slst)[j]);
       if (utf8)
-        reverseword_utf((*slst)[j]);
+        reverseword_utf(root);
       else
-        reverseword((*slst)[j]);
+        reverseword(root);
+      (*slst)[j] = mystrdup(root.c_str());
     }
   }
 
   // capitalize
   if (capwords)
     for (int j = 0; j < ns; j++) {
-      mkinitcap((*slst)[j]);
+      std::string form((*slst)[j]);
+      free((*slst)[j]);
+      mkinitcap(form);
+      (*slst)[j] = mystrdup(form.c_str());
     }
 
   // expand suggestions with dot(s)
@@ -1160,25 +1098,23 @@ int Hunspell::suggest(char*** slst, const char* word) {
         int l = 0;
         for (int j = 0; j < ns; j++) {
           if (!strchr((*slst)[j], ' ') && !spell((*slst)[j])) {
-            char s[MAXSWUTF8L];
-            w_char w[MAXSWL];
-            int len;
+            std::string s;
+            std::vector<w_char> w;
             if (utf8) {
-              len = u8_u16(w, MAXSWL, (*slst)[j]);
+              u8_u16(w, (*slst)[j]);
             } else {
-              strcpy(s, (*slst)[j]);
-              len = strlen(s);
+              s = (*slst)[j];
             }
-            mkallsmall2(s, w, len);
+            mkallsmall2(s, w);
             free((*slst)[j]);
-            if (spell(s)) {
-              (*slst)[l] = mystrdup(s);
+            if (spell(s.c_str())) {
+              (*slst)[l] = mystrdup(s.c_str());
               if ((*slst)[l])
                 l++;
             } else {
-              mkinitcap2(s, w, len);
-              if (spell(s)) {
-                (*slst)[l] = mystrdup(s);
+              mkinitcap2(s, w);
+              if (spell(s.c_str())) {
+                (*slst)[l] = mystrdup(s.c_str());
                 if ((*slst)[l])
                   l++;
               }
@@ -1211,9 +1147,10 @@ int Hunspell::suggest(char*** slst, const char* word) {
   // output conversion
   rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
   for (int j = 0; rl && j < ns; j++) {
-    if (rl->conv((*slst)[j], wspace, MAXWORDUTF8LEN) > 0) {
+    std::string wspace;
+    if (rl->conv((*slst)[j], wspace) > 0) {
       free((*slst)[j]);
-      (*slst)[j] = mystrdup(wspace);
+      (*slst)[j] = mystrdup(wspace.c_str());
     }
   }
 
@@ -1233,151 +1170,25 @@ char* Hunspell::get_dic_encoding() {
   return encoding;
 }
 
-#ifdef HUNSPELL_EXPERIMENTAL
-// XXX UTF-8 support is OK?
-int Hunspell::suggest_auto(char*** slst, const char* word) {
-  char cw[MAXWORDUTF8LEN];
-  char wspace[MAXWORDUTF8LEN];
-  if (!pSMgr || maxdic == 0)
-    return 0;
-  w_char unicw[MAXWORDLEN];
-  int nc = strlen(word);
-  if (utf8) {
-    if (nc >= MAXWORDUTF8LEN)
-      return 0;
-  } else {
-    if (nc >= MAXWORDLEN)
-      return 0;
-  }
-  int captype = 0;
-  int abbv = 0;
-  int wl = 0;
-
-  // input conversion
-  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-  int convstatus = rl ? rl->conv(word, wspace) : 0;
-  if (convstatus < 0)
-    return 0;
-  else if (convstatus > 0)
-    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
-  else
-    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
-
-  if (wl == 0)
-    return 0;
-  int ns = 0;
-  *slst = NULL;  // HU, nsug in pSMgr->suggest
-
-  switch (captype) {
-    case NOCAP: {
-      ns = pSMgr->suggest_auto(slst, cw, ns);
-      if (ns > 0)
-        break;
-      break;
-    }
-
-    case INITCAP: {
-      memcpy(wspace, cw, (wl + 1));
-      mkallsmall2(wspace, unicw, nc);
-      ns = pSMgr->suggest_auto(slst, wspace, ns);
-      for (int j = 0; j < ns; j++)
-        mkinitcap((*slst)[j]);
-      ns = pSMgr->suggest_auto(slst, cw, ns);
-      break;
-    }
-
-    case HUHINITCAP:
-    case HUHCAP: {
-      ns = pSMgr->suggest_auto(slst, cw, ns);
-      if (ns == 0) {
-        memcpy(wspace, cw, (wl + 1));
-        mkallsmall2(wspace, unicw, nc);
-        ns = pSMgr->suggest_auto(slst, wspace, ns);
-      }
-      break;
-    }
-
-    case ALLCAP: {
-      memcpy(wspace, cw, (wl + 1));
-      mkallsmall2(wspace, unicw, nc);
-      ns = pSMgr->suggest_auto(slst, wspace, ns);
-
-      mkinitcap(wspace);
-      ns = pSMgr->suggest_auto(slst, wspace, ns);
-
-      for (int j = 0; j < ns; j++)
-        mkallcap((*slst)[j]);
-      break;
-    }
-  }
-
-  // word reversing wrapper for complex prefixes
-  if (complexprefixes) {
-    for (int j = 0; j < ns; j++) {
-      if (utf8)
-        reverseword_utf((*slst)[j]);
-      else
-        reverseword((*slst)[j]);
-    }
-  }
-
-  // expand suggestions with dot(s)
-  if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
-    for (int j = 0; j < ns; j++) {
-      (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
-      strcat((*slst)[j], word + strlen(word) - abbv);
-    }
-  }
-
-  // LANG_hu section: replace '-' with ' ' in Hungarian
-  if (langnum == LANG_hu) {
-    for (int j = 0; j < ns; j++) {
-      char* pos = strchr((*slst)[j], '-');
-      if (pos) {
-        int info;
-        char w[MAXWORDUTF8LEN];
-        *pos = '\0';
-        strcpy(w, (*slst)[j]);
-        strcat(w, pos + 1);
-        spell(w, &info, NULL);
-        if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
-          *pos = ' ';
-        } else
-          *pos = '-';
-      }
-    }
-  }
-
-  // output conversion
-  rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
-  for (int j = 0; rl && j < ns; j++) {
-    if (rl->conv((*slst)[j], wspace) > 0) {
-      free((*slst)[j]);
-      (*slst)[j] = mystrdup(wspace);
-    }
-  }
-
-  // END OF LANG_hu section
-  return ns;
-}
-#endif
-
 int Hunspell::stem(char*** slst, char** desc, int n) {
-  char result[MAXLNLEN];
-  char result2[MAXLNLEN];
+
+  std::string result2;
   *slst = NULL;
   if (n == 0)
     return 0;
-  *result2 = '\0';
   for (int i = 0; i < n; i++) {
-    *result = '\0';
+
+    std::string result;
+
     // add compound word parts (except the last one)
     char* s = (char*)desc[i];
     char* part = strstr(s, MORPH_PART);
     if (part) {
       char* nextpart = strstr(part + 1, MORPH_PART);
       while (nextpart) {
-        copy_field(result + strlen(result), part, MORPH_PART);
+        std::string field;
+        copy_field(field, part, MORPH_PART);
+        result.append(field);
         part = nextpart;
         nextpart = strstr(part + 1, MORPH_PART);
       }
@@ -1404,22 +1215,28 @@ int Hunspell::stem(char*** slst, char** desc, int n) {
           int genl = line_tok(sg, &gen, MSEP_REC);
           free(sg);
           for (int j = 0; j < genl; j++) {
-            sprintf(result2 + strlen(result2), "%c%s%s", MSEP_REC, result,
-                    gen[j]);
+            result2.push_back(MSEP_REC);
+            result2.append(result);
+            result2.append(gen[j]);
           }
           freelist(&gen, genl);
         }
       } else {
-        sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
+        result2.push_back(MSEP_REC);
+        result2.append(result);
         if (strstr(pl[k], MORPH_SURF_PFX)) {
-          copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
+          std::string field;
+          copy_field(field, pl[k], MORPH_SURF_PFX);
+          result2.append(field);
         }
-        copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
+        std::string field;
+        copy_field(field, pl[k], MORPH_STEM);
+        result2.append(field);
       }
     }
     freelist(&pl, pln);
   }
-  int sln = line_tok(result2, slst, MSEP_REC);
+  int sln = line_tok(result2.c_str(), slst, MSEP_REC);
   return uniqlist(*slst, sln);
 }
 
@@ -1431,148 +1248,43 @@ int Hunspell::stem(char*** slst, const char* word) {
   return pln2;
 }
 
-#ifdef HUNSPELL_EXPERIMENTAL
-int Hunspell::suggest_pos_stems(char*** slst, const char* word) {
-  char cw[MAXWORDUTF8LEN];
-  char wspace[MAXWORDUTF8LEN];
-  if (!pSMgr || maxdic == 0)
-    return 0;
-  w_char unicw[MAXWORDLEN];
-  int nc = strlen(word);
-  if (utf8) {
-    if (nc >= MAXWORDUTF8LEN)
-      return 0;
-  } else {
-    if (nc >= MAXWORDLEN)
-      return 0;
-  }
-  int captype = 0;
-  int abbv = 0;
-  int wl = 0;
-
-  // input conversion
-  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-  int convstatus = rl ? rl->conv(word, wspace) : 0;
-  if (convstatus < 0)
-    return 0;
-  else if (convstatus > 0)
-    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
-  else
-    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
-
-  if (wl == 0)
-    return 0;
-
-  int ns = 0;  // ns=0 = normalized input
-
-  *slst = NULL;  // HU, nsug in pSMgr->suggest
-
-  switch (captype) {
-    case HUHCAP:
-    case NOCAP: {
-      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
-
-      if ((abbv) && (ns == 0)) {
-        memcpy(wspace, cw, wl);
-        *(wspace + wl) = '.';
-        *(wspace + wl + 1) = '\0';
-        ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
-      }
-
-      break;
-    }
-
-    case INITCAP: {
-      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
-
-      if (ns == 0 || ((*slst)[0][0] == '#')) {
-        memcpy(wspace, cw, (wl + 1));
-        mkallsmall2(wspace, unicw, nc);
-        ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
-      }
-
-      break;
-    }
-
-    case ALLCAP: {
-      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
-      if (ns != 0)
-        break;
-
-      memcpy(wspace, cw, (wl + 1));
-      mkallsmall2(wspace, unicw, nc);
-      ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
-
-      if (ns == 0) {
-        mkinitcap(wspace);
-        ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
-      }
-      break;
-    }
-  }
-
-  // output conversion
-  rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
-  for (int j = 0; rl && j < ns; j++) {
-    if (rl->conv((*slst)[j], wspace) > 0) {
-      free((*slst)[j]);
-      (*slst)[j] = mystrdup(wspace);
-    }
-  }
-
-  return ns;
-}
-#endif  // END OF HUNSPELL_EXPERIMENTAL CODE
-
 const char* Hunspell::get_wordchars() {
   return pAMgr->get_wordchars();
 }
 
-unsigned short* Hunspell::get_wordchars_utf16(int* len) {
-  return pAMgr->get_wordchars_utf16(len);
+const std::vector<w_char>& Hunspell::get_wordchars_utf16() {
+  return pAMgr->get_wordchars_utf16();
 }
 
-void Hunspell::mkinitcap(char* p) {
-  if (!utf8) {
-    if (*p != '\0')
-      *p = csconv[((unsigned char)*p)].cupper;
+void Hunspell::mkinitcap(std::string& u8) {
+  if (utf8) {
+    std::vector<w_char> u16;
+    u8_u16(u16, u8);
+    ::mkinitcap_utf(u16, langnum);
+    u16_u8(u8, u16);
   } else {
-    int len;
-    w_char u[MAXWORDLEN];
-    len = u8_u16(u, MAXWORDLEN, p);
-    unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
-    u[0].h = (unsigned char)(i >> 8);
-    u[0].l = (unsigned char)(i & 0x00FF);
-    u16_u8(p, MAXWORDUTF8LEN, u, len);
+    ::mkinitcap(u8, csconv);
   }
 }
 
-int Hunspell::mkinitcap2(char* p, w_char* u, int nc) {
-  if (!utf8) {
-    if (*p != '\0')
-      *p = csconv[((unsigned char)*p)].cupper;
-  } else if (nc > 0) {
-    unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
-    u[0].h = (unsigned char)(i >> 8);
-    u[0].l = (unsigned char)(i & 0x00FF);
-    u16_u8(p, MAXWORDUTF8LEN, u, nc);
-    return strlen(p);
+int Hunspell::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
+  if (utf8) {
+    ::mkinitcap_utf(u16, langnum);
+    u16_u8(u8, u16);
+  } else {
+    ::mkinitcap(u8, csconv);
   }
-  return nc;
+  return u8.size();
 }
 
-int Hunspell::mkinitsmall2(char* p, w_char* u, int nc) {
-  if (!utf8) {
-    if (*p != '\0')
-      *p = csconv[((unsigned char)*p)].clower;
-  } else if (nc > 0) {
-    unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
-    u[0].h = (unsigned char)(i >> 8);
-    u[0].l = (unsigned char)(i & 0x00FF);
-    u16_u8(p, MAXWORDUTF8LEN, u, nc);
-    return strlen(p);
+int Hunspell::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
+  if (utf8) {
+    ::mkinitsmall_utf(u16, langnum);
+    u16_u8(u8, u16);
+  } else {
+    ::mkinitsmall(u8, csconv);
   }
-  return nc;
+  return u8.size();
 }
 
 int Hunspell::add(const char* word) {
@@ -1601,20 +1313,16 @@ struct cs_info* Hunspell::get_csconv() {
   return csconv;
 }
 
-void Hunspell::cat_result(char* result, char* st) {
+void Hunspell::cat_result(std::string& result, char* st) {
   if (st) {
-    if (*result)
-      mystrcat(result, "\n", MAXLNLEN);
-    mystrcat(result, st, MAXLNLEN);
+    if (!result.empty())
+      result.append("\n");
+    result.append(st);
     free(st);
   }
 }
 
 int Hunspell::analyze(char*** slst, const char* word) {
-  char cw[MAXWORDUTF8LEN];
-  char wspace[MAXWORDUTF8LEN];
-  w_char unicw[MAXWORDLEN];
-  int wl2 = 0;
   *slst = NULL;
   if (!pSMgr || maxdic == 0)
     return 0;
@@ -1626,48 +1334,52 @@ int Hunspell::analyze(char*** slst, const char* word) {
     if (nc >= MAXWORDLEN)
       return 0;
   }
-  int captype = 0;
-  int abbv = 0;
-  int wl = 0;
+  int captype = NOCAP;
+  size_t abbv = 0;
+  size_t wl = 0;
+
+  std::string scw;
+  std::vector<w_char> sunicw;
 
   // input conversion
   RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-  int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0;
-  if (convstatus < 0)
-    return 0;
-  else if (convstatus > 0)
-    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
-  else
-    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+  {
+    std::string wspace;
+
+    int convstatus = rl ? rl->conv(word, wspace) : 0;
+    if (convstatus < 0)
+      return 0;
+    else if (convstatus > 0)
+      wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv);
+    else
+      wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv);
+  }
 
   if (wl == 0) {
     if (abbv) {
+      scw.clear();
       for (wl = 0; wl < abbv; wl++)
-        cw[wl] = '.';
-      cw[wl] = '\0';
+        scw.push_back('.');
       abbv = 0;
     } else
       return 0;
   }
 
-  char result[MAXLNLEN];
-  char* st = NULL;
-
-  *result = '\0';
+  std::string result;
 
-  int n = 0;
-  int n2 = 0;
-  int n3 = 0;
+  size_t n = 0;
+  size_t n2 = 0;
+  size_t n3 = 0;
 
   // test numbers
   // LANG_hu section: set dash information for suggestions
   if (langnum == LANG_hu) {
-    while ((n < wl) && (((cw[n] <= '9') && (cw[n] >= '0')) ||
-                        (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
+    while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
+                        (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
       n++;
-      if ((cw[n] == '.') || (cw[n] == ',')) {
+      if ((scw[n] == '.') || (scw[n] == ',')) {
         if (((n2 == 0) && (n > 3)) ||
-            ((n2 > 0) && ((cw[n - 1] == '.') || (cw[n - 1] == ','))))
+            ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
           break;
         n2++;
         n3 = n;
@@ -1676,21 +1388,21 @@ int Hunspell::analyze(char*** slst, const char* word) {
 
     if ((n == wl) && (n3 > 0) && (n - n3 > 3))
       return 0;
-    if ((n == wl) || ((n > 0) && ((cw[n] == '%') || (cw[n] == '\xB0')) &&
-                      checkword(cw + n, NULL, NULL))) {
-      mystrcat(result, cw, MAXLNLEN);
-      result[n - 1] = '\0';
+    if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
+                      checkword(scw.c_str() + n, NULL, NULL))) {
+      result.append(scw);
+      result.resize(n - 1);
       if (n == wl)
-        cat_result(result, pSMgr->suggest_morph(cw + n - 1));
+        cat_result(result, pSMgr->suggest_morph(scw.c_str() + n - 1));
       else {
-        char sign = cw[n];
-        cw[n] = '\0';
-        cat_result(result, pSMgr->suggest_morph(cw + n - 1));
-        mystrcat(result, "+", MAXLNLEN);  // XXX SPEC. MORPHCODE
-        cw[n] = sign;
-        cat_result(result, pSMgr->suggest_morph(cw + n));
+        char sign = scw[n];
+        scw[n] = '\0';
+        cat_result(result, pSMgr->suggest_morph(scw.c_str() + n - 1));
+        result.push_back('+');  // XXX SPEC. MORPHCODE
+        scw[n] = sign;
+        cat_result(result, pSMgr->suggest_morph(scw.c_str() + n));
       }
-      return line_tok(result, slst, MSEP_REC);
+      return line_tok(result.c_str(), slst, MSEP_REC);
     }
   }
   // END OF LANG_hu section
@@ -1699,64 +1411,58 @@ int Hunspell::analyze(char*** slst, const char* word) {
     case HUHCAP:
     case HUHINITCAP:
     case NOCAP: {
-      cat_result(result, pSMgr->suggest_morph(cw));
+      cat_result(result, pSMgr->suggest_morph(scw.c_str()));
       if (abbv) {
-        memcpy(wspace, cw, wl);
-        *(wspace + wl) = '.';
-        *(wspace + wl + 1) = '\0';
-        cat_result(result, pSMgr->suggest_morph(wspace));
+        std::string u8buffer(scw);
+        u8buffer.push_back('.');
+        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
       }
       break;
     }
     case INITCAP: {
-      wl = mkallsmall2(cw, unicw, nc);
-      memcpy(wspace, cw, (wl + 1));
-      wl2 = mkinitcap2(cw, unicw, nc);
-      cat_result(result, pSMgr->suggest_morph(wspace));
-      cat_result(result, pSMgr->suggest_morph(cw));
+      wl = mkallsmall2(scw, sunicw);
+      std::string u8buffer(scw);
+      mkinitcap2(scw, sunicw);
+      cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
+      cat_result(result, pSMgr->suggest_morph(scw.c_str()));
       if (abbv) {
-        *(wspace + wl) = '.';
-        *(wspace + wl + 1) = '\0';
-        cat_result(result, pSMgr->suggest_morph(wspace));
+        u8buffer.push_back('.');
+        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
 
-        memcpy(wspace, cw, wl2);
-        *(wspace + wl2) = '.';
-        *(wspace + wl2 + 1) = '\0';
+        u8buffer = scw;
+        u8buffer.push_back('.');
 
-        cat_result(result, pSMgr->suggest_morph(wspace));
+        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
       }
       break;
     }
     case ALLCAP: {
-      cat_result(result, pSMgr->suggest_morph(cw));
+      cat_result(result, pSMgr->suggest_morph(scw.c_str()));
       if (abbv) {
-        memcpy(wspace, cw, wl);
-        *(wspace + wl) = '.';
-        *(wspace + wl + 1) = '\0';
-        cat_result(result, pSMgr->suggest_morph(cw));
+        std::string u8buffer(scw);
+        u8buffer.push_back('.');
+        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
       }
-      wl = mkallsmall2(cw, unicw, nc);
-      memcpy(wspace, cw, (wl + 1));
-      wl2 = mkinitcap2(cw, unicw, nc);
+      mkallsmall2(scw, sunicw);
+      std::string u8buffer(scw);
+      mkinitcap2(scw, sunicw);
 
-      cat_result(result, pSMgr->suggest_morph(wspace));
-      cat_result(result, pSMgr->suggest_morph(cw));
+      cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
+      cat_result(result, pSMgr->suggest_morph(scw.c_str()));
       if (abbv) {
-        *(wspace + wl) = '.';
-        *(wspace + wl + 1) = '\0';
-        cat_result(result, pSMgr->suggest_morph(wspace));
+        u8buffer.push_back('.');
+        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
 
-        memcpy(wspace, cw, wl2);
-        *(wspace + wl2) = '.';
-        *(wspace + wl2 + 1) = '\0';
+        u8buffer = scw;
+        u8buffer.push_back('.');
 
-        cat_result(result, pSMgr->suggest_morph(wspace));
+        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
       }
       break;
     }
   }
 
-  if (*result) {
+  if (!result.empty()) {
     // word reversing wrapper for complex prefixes
     if (complexprefixes) {
       if (utf8)
@@ -1764,95 +1470,94 @@ int Hunspell::analyze(char*** slst, const char* word) {
       else
         reverseword(result);
     }
-    return line_tok(result, slst, MSEP_REC);
+    return line_tok(result.c_str(), slst, MSEP_REC);
   }
 
   // compound word with dash (HU) I18n
-  char* dash = NULL;
-  int nresult = 0;
   // LANG_hu section: set dash information for suggestions
-  if (langnum == LANG_hu)
-    dash = (char*)strchr(cw, '-');
-  if ((langnum == LANG_hu) && dash) {
-    *dash = '\0';
+
+  size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
+  int nresult = 0;
+  if (dash_pos != std::string::npos) {
+    std::string part1 = scw.substr(0, dash_pos);
+    std::string part2 = scw.substr(dash_pos+1);
+
     // examine 2 sides of the dash
-    if (dash[1] == '\0') {  // base word ending with dash
-      if (spell(cw)) {
-        char* p = pSMgr->suggest_morph(cw);
+    if (part2.empty()) {  // base word ending with dash
+      if (spell(part1.c_str())) {
+        char* p = pSMgr->suggest_morph(part1.c_str());
         if (p) {
           int ret = line_tok(p, slst, MSEP_REC);
           free(p);
           return ret;
         }
       }
-    } else if ((dash[1] == 'e') && (dash[2] == '\0')) {  // XXX (HU) -e hat.
-      if (spell(cw) && (spell("-e"))) {
-        st = pSMgr->suggest_morph(cw);
+    } else if (part2.size() == 1 && part2[0] == 'e') {  // XXX (HU) -e hat.
+      if (spell(part1.c_str()) && (spell("-e"))) {
+        char* st = pSMgr->suggest_morph(part1.c_str());
         if (st) {
-          mystrcat(result, st, MAXLNLEN);
+          result.append(st);
           free(st);
         }
-        mystrcat(result, "+", MAXLNLEN);  // XXX spec. separator in MORPHCODE
+        result.push_back('+');  // XXX spec. separator in MORPHCODE
         st = pSMgr->suggest_morph("-e");
         if (st) {
-          mystrcat(result, st, MAXLNLEN);
+          result.append(st);
           free(st);
         }
-        return line_tok(result, slst, MSEP_REC);
+        return line_tok(result.c_str(), slst, MSEP_REC);
       }
     } else {
       // first word ending with dash: word- XXX ???
-      char r2 = *(dash + 1);
-      dash[0] = '-';
-      dash[1] = '\0';
-      nresult = spell(cw);
-      dash[1] = r2;
-      dash[0] = '\0';
-      if (nresult && spell(dash + 1) &&
-          ((strlen(dash + 1) > 1) || ((dash[1] > '0') && (dash[1] < '9')))) {
-        st = pSMgr->suggest_morph(cw);
+      part1.push_back(' ');
+      nresult = spell(part1.c_str());
+      part1.erase(part1.size() - 1);
+      if (nresult && spell(part2.c_str()) &&
+          ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
+        char* st = pSMgr->suggest_morph(part1.c_str());
         if (st) {
-          mystrcat(result, st, MAXLNLEN);
+          result.append(st);
           free(st);
-          mystrcat(result, "+", MAXLNLEN);  // XXX spec. separator in MORPHCODE
+          result.push_back('+');  // XXX spec. separator in MORPHCODE
         }
-        st = pSMgr->suggest_morph(dash + 1);
+        st = pSMgr->suggest_morph(part2.c_str());
         if (st) {
-          mystrcat(result, st, MAXLNLEN);
+          result.append(st);
           free(st);
         }
-        return line_tok(result, slst, MSEP_REC);
+        return line_tok(result.c_str(), slst, MSEP_REC);
       }
     }
     // affixed number in correct word
-    if (nresult && (dash > cw) &&
-        (((*(dash - 1) <= '9') && (*(dash - 1) >= '0')) ||
-         (*(dash - 1) == '.'))) {
-      *dash = '-';
+    if (nresult && (dash_pos > 0) &&
+        (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
+         (scw[dash_pos - 1] == '.'))) {
       n = 1;
-      if (*(dash - n) == '.')
+      if (scw[dash_pos - n] == '.')
         n++;
       // search first not a number character to left from dash
-      while (((dash - n) >= cw) && ((*(dash - n) == '0') || (n < 3)) &&
+      while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
              (n < 6)) {
         n++;
       }
-      if ((dash - n) < cw)
+      if (dash_pos < n)
         n--;
       // numbers: valami1000000-hoz
       // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
       // 56-hoz, 6-hoz
       for (; n >= 1; n--) {
-        if ((*(dash - n) >= '0') && (*(dash - n) <= '9') &&
-            checkword(dash - n, NULL, NULL)) {
-          mystrcat(result, cw, MAXLNLEN);
-          result[dash - cw - n] = '\0';
-          st = pSMgr->suggest_morph(dash - n);
+        if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
+            continue;
+        }
+        std::string chunk = scw.substr(dash_pos - n);
+        if (checkword(chunk.c_str(), NULL, NULL)) {
+          result.append(chunk);
+          char* st = pSMgr->suggest_morph(chunk.c_str());
           if (st) {
-            mystrcat(result, st, MAXLNLEN);
+            result.append(st);
             free(st);
           }
-          return line_tok(result, slst, MSEP_REC);
+          return line_tok(result.c_str(), slst, MSEP_REC);
         }
       }
     }
@@ -1866,30 +1571,33 @@ int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
     return 0;
   char** pl2;
   int pl2n = analyze(&pl2, word);
-  int captype = 0;
+  int captype = NOCAP;
   int abbv = 0;
-  char cw[MAXWORDUTF8LEN];
+  std::string cw;
   cleanword(cw, word, &captype, &abbv);
-  char result[MAXLNLEN];
-  *result = '\0';
+  std::string result;
 
   for (int i = 0; i < pln; i++) {
     cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
   }
   freelist(&pl2, pl2n);
 
-  if (*result) {
+  if (!result.empty()) {
     // allcap
     if (captype == ALLCAP)
       mkallcap(result);
 
     // line split
-    int linenum = line_tok(result, slst, MSEP_REC);
+    int linenum = line_tok(result.c_str(), slst, MSEP_REC);
 
     // capitalize
     if (captype == INITCAP || captype == HUHINITCAP) {
-      for (int j = 0; j < linenum; j++)
-        mkinitcap((*slst)[j]);
+      for (int j = 0; j < linenum; j++) {
+        std::string form((*slst)[j]);
+        free((*slst)[j]);
+        mkinitcap(form);
+        (*slst)[j] = mystrdup(form.c_str());
+      }
     }
 
     // temporary filtering of prefix related errors (eg.
@@ -1923,22 +1631,21 @@ int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
 }
 
 // minimal XML parser functions
-int Hunspell::get_xml_par(char* dest, const char* par, int max) {
-  char* d = dest;
+std::string Hunspell::get_xml_par(const char* par) {
+  std::string dest;
   if (!par)
-    return 0;
+    return dest;
   char end = *par;
-  char* dmax = dest + max;
   if (end == '>')
     end = '<';
   else if (end != '\'' && end != '"')
     return 0;  // bad XML
-  for (par++; d < dmax && *par != '\0' && *par != end; par++, d++)
-    *d = *par;
-  *d = '\0';
+  for (par++; *par != '\0' && *par != end; ++par) {
+    dest.push_back(*par);
+  }
   mystrrep(dest, "&lt;", "<");
   mystrrep(dest, "&amp;", "&");
-  return (int)(d - dest);
+  return dest;
 }
 
 int Hunspell::get_langnum() const {
@@ -1967,18 +1674,17 @@ const char* Hunspell::get_xml_pos(const char* s, const char* attr) {
 int Hunspell::check_xml_par(const char* q,
                             const char* attr,
                             const char* value) {
-  char cw[MAXWORDUTF8LEN];
-  if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
-      strcmp(cw, value) == 0)
+  std::string cw = get_xml_par(get_xml_pos(q, attr));
+  if (cw == value)
     return 1;
   return 0;
 }
 
-int Hunspell::get_xml_list(char*** slst, char* list, const char* tag) {
-  int n = 0;
-  char* p;
+int Hunspell::get_xml_list(char*** slst, const char* list, const char* tag) {
   if (!list)
     return 0;
+  int n = 0;
+  const char* p;
   for (p = list; ((p = strstr(p, tag)) != NULL); p++)
     n++;
   if (n == 0)
@@ -1987,25 +1693,20 @@ int Hunspell::get_xml_list(char*** slst, char* list, const char* tag) {
   if (!*slst)
     return 0;
   for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) {
-    int l = strlen(p);
-    (*slst)[n] = (char*)malloc(l + 1);
-    if (!(*slst)[n])
-      return n;
-    if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
-      free((*slst)[n]);
+    std::string cw = get_xml_par(p + strlen(tag) - 1);
+    if (cw.empty()) {
       break;
     }
+    (*slst)[n] = mystrdup(cw.c_str());
   }
   return n;
 }
 
 int Hunspell::spellml(char*** slst, const char* word) {
-  char *q, *q2;
-  char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
-  q = (char*)strstr(word, "<query");
+  const char* q = strstr(word, "<query");
   if (!q)
     return 0;  // bad XML input
-  q2 = strchr(q, '>');
+  const char* q2 = strchr(q, '>');
   if (!q2)
     return 0;  // bad XML input
   q2 = strstr(q2, "<word");
@@ -2013,8 +1714,9 @@ int Hunspell::spellml(char*** slst, const char* word) {
     return 0;  // bad XML input
   if (check_xml_par(q, "type=", "analyze")) {
     int n = 0;
-    if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10))
-      n = analyze(slst, cw);
+    std::string cw = get_xml_par(strchr(q2, '>'));
+    if (!cw.empty())
+      n = analyze(slst, cw.c_str());
     if (n == 0)
       return 0;
     // convert the result to <code><a>ana1</a><a>ana2</a></code> format
@@ -2036,22 +1738,25 @@ int Hunspell::spellml(char*** slst, const char* word) {
     (*slst)[0] = mystrdup(r.c_str());
     return 1;
   } else if (check_xml_par(q, "type=", "stem")) {
-    if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1))
-      return stem(slst, cw);
+    std::string cw = get_xml_par(strchr(q2, '>'));
+    if (!cw.empty())
+      return stem(slst, cw.c_str());
   } else if (check_xml_par(q, "type=", "generate")) {
-    int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
-    if (n == 0)
+    std::string cw = get_xml_par(strchr(q2, '>'));
+    if (cw.empty())
       return 0;
-    char* q3 = strstr(q2 + 1, "<word");
+    const char* q3 = strstr(q2 + 1, "<word");
     if (q3) {
-      if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
-        return generate(slst, cw, cw2);
+      std::string cw2 = get_xml_par(strchr(q3, '>'));
+      if (!cw2.empty()) {
+        return generate(slst, cw.c_str(), cw2.c_str());
       }
     } else {
       if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
         char** slst2;
-        if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) {
-          int n2 = generate(slst, cw, slst2, n);
+        int n = get_xml_list(&slst2, strchr(q2, '>'), "<a>");
+        if (n != 0) {
+          int n2 = generate(slst, cw.c_str(), slst2, n);
           freelist(&slst2, n);
           return uniqlist(*slst, n2);
         }
@@ -2062,182 +1767,6 @@ int Hunspell::spellml(char*** slst, const char* word) {
   return 0;
 }
 
-#ifdef HUNSPELL_EXPERIMENTAL
-// XXX is UTF-8 support OK?
-char* Hunspell::morph_with_correction(const char* word) {
-  char cw[MAXWORDUTF8LEN];
-  char wspace[MAXWORDUTF8LEN];
-  if (!pSMgr || maxdic == 0)
-    return NULL;
-  w_char unicw[MAXWORDLEN];
-  int nc = strlen(word);
-  if (utf8) {
-    if (nc >= MAXWORDUTF8LEN)
-      return NULL;
-  } else {
-    if (nc >= MAXWORDLEN)
-      return NULL;
-  }
-  int captype = 0;
-  int abbv = 0;
-  int wl = 0;
-
-  // input conversion
-  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-  int convstatus = rl ? rl->conv(word, wspace) : 0;
-  if (convstatus < 0)
-    return 0;
-  else if (convstatus > 0)
-    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
-  else
-    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
-
-  if (wl == 0)
-    return NULL;
-
-  char result[MAXLNLEN];
-  char* st = NULL;
-
-  *result = '\0';
-
-  switch (captype) {
-    case NOCAP: {
-      st = pSMgr->suggest_morph_for_spelling_error(cw);
-      if (st) {
-        mystrcat(result, st, MAXLNLEN);
-        free(st);
-      }
-      if (abbv) {
-        memcpy(wspace, cw, wl);
-        *(wspace + wl) = '.';
-        *(wspace + wl + 1) = '\0';
-        st = pSMgr->suggest_morph_for_spelling_error(wspace);
-        if (st) {
-          if (*result)
-            mystrcat(result, "\n", MAXLNLEN);
-          mystrcat(result, st, MAXLNLEN);
-          free(st);
-        }
-      }
-      break;
-    }
-    case INITCAP: {
-      memcpy(wspace, cw, (wl + 1));
-      mkallsmall2(wspace, unicw, nc);
-      st = pSMgr->suggest_morph_for_spelling_error(wspace);
-      if (st) {
-        mystrcat(result, st, MAXLNLEN);
-        free(st);
-      }
-      st = pSMgr->suggest_morph_for_spelling_error(cw);
-      if (st) {
-        if (*result)
-          mystrcat(result, "\n", MAXLNLEN);
-        mystrcat(result, st, MAXLNLEN);
-        free(st);
-      }
-      if (abbv) {
-        memcpy(wspace, cw, wl);
-        *(wspace + wl) = '.';
-        *(wspace + wl + 1) = '\0';
-        mkallsmall2(wspace, unicw, nc);
-        st = pSMgr->suggest_morph_for_spelling_error(wspace);
-        if (st) {
-          if (*result)
-            mystrcat(result, "\n", MAXLNLEN);
-          mystrcat(result, st, MAXLNLEN);
-          free(st);
-        }
-        mkinitcap(wspace);
-        st = pSMgr->suggest_morph_for_spelling_error(wspace);
-        if (st) {
-          if (*result)
-            mystrcat(result, "\n", MAXLNLEN);
-          mystrcat(result, st, MAXLNLEN);
-          free(st);
-        }
-      }
-      break;
-    }
-    case HUHCAP: {
-      st = pSMgr->suggest_morph_for_spelling_error(cw);
-      if (st) {
-        mystrcat(result, st, MAXLNLEN);
-        free(st);
-      }
-      memcpy(wspace, cw, (wl + 1));
-      mkallsmall2(wspace, unicw, nc);
-      st = pSMgr->suggest_morph_for_spelling_error(wspace);
-      if (st) {
-        if (*result)
-          mystrcat(result, "\n", MAXLNLEN);
-        mystrcat(result, st, MAXLNLEN);
-        free(st);
-      }
-      break;
-    }
-    case ALLCAP: {
-      memcpy(wspace, cw, (wl + 1));
-      st = pSMgr->suggest_morph_for_spelling_error(wspace);
-      if (st) {
-        mystrcat(result, st, MAXLNLEN);
-        free(st);
-      }
-      mkallsmall2(wspace, unicw, nc);
-      st = pSMgr->suggest_morph_for_spelling_error(wspace);
-      if (st) {
-        if (*result)
-          mystrcat(result, "\n", MAXLNLEN);
-        mystrcat(result, st, MAXLNLEN);
-        free(st);
-      }
-      mkinitcap(wspace);
-      st = pSMgr->suggest_morph_for_spelling_error(wspace);
-      if (st) {
-        if (*result)
-          mystrcat(result, "\n", MAXLNLEN);
-        mystrcat(result, st, MAXLNLEN);
-        free(st);
-      }
-      if (abbv) {
-        memcpy(wspace, cw, (wl + 1));
-        *(wspace + wl) = '.';
-        *(wspace + wl + 1) = '\0';
-        if (*result)
-          mystrcat(result, "\n", MAXLNLEN);
-        st = pSMgr->suggest_morph_for_spelling_error(wspace);
-        if (st) {
-          mystrcat(result, st, MAXLNLEN);
-          free(st);
-        }
-        mkallsmall2(wspace, unicw, nc);
-        st = pSMgr->suggest_morph_for_spelling_error(wspace);
-        if (st) {
-          if (*result)
-            mystrcat(result, "\n", MAXLNLEN);
-          mystrcat(result, st, MAXLNLEN);
-          free(st);
-        }
-        mkinitcap(wspace);
-        st = pSMgr->suggest_morph_for_spelling_error(wspace);
-        if (st) {
-          if (*result)
-            mystrcat(result, "\n", MAXLNLEN);
-          mystrcat(result, st, MAXLNLEN);
-          free(st);
-        }
-      }
-      break;
-    }
-  }
-
-  if (*result)
-    return mystrdup(result);
-  return NULL;
-}
-
-#endif  // END OF HUNSPELL_EXPERIMENTAL CODE
-
 Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
   return (Hunhandle*)(new Hunspell(affpath, dpath));
 }
@@ -2333,10 +1862,9 @@ int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
   if (ignoredchars != NULL) {
     w2.assign(root_word);
     if (utf8) {
-      int ignoredchars_utf16_len;
-      unsigned short* ignoredchars_utf16 =
-          pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
-      remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
+      const std::vector<w_char>& ignoredchars_utf16 =
+          pAMgr->get_ignore_utf16();
+      remove_ignored_chars_utf(w2, ignoredchars_utf16);
     } else {
       remove_ignored_chars(w2, ignoredchars);
     }