fix for loading hunspell project

author: George Hazan <ghazan@miranda.im> 2018-03-10 13:56:24 +0300
committer: George Hazan <ghazan@miranda.im> 2018-03-10 13:56:24 +0300
commit: 97a16a6e09df80ffa3429e23a0174bd8daaa29a1 (patch)
tree: bc9ec915bfbcdfef2b655aacd8b4d02a80731196 /libs/hunspell/src/hunspell.cxx
parent: cb2caccb52c4044937c0d9e8eda7ddeb1d115e85 (diff)
1 files changed, 0 insertions, 2017 deletions
diff --git a/libs/hunspell/src/hunspell.cxx b/libs/hunspell/src/hunspell.cxx
deleted file mode 100644
index b1535013fe..0000000000
--- a/libs/hunspell/src/hunspell.cxx
+++ /dev/null
@@ -1,2017 +0,0 @@
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-/*
- * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
- * And Contributors.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * 3. All modifications to the source code must be clearly marked as
- *    such.  Binary redistributions based on modified source code
- *    must be clearly marked as modified versions in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
- * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include "affixmgr.hxx"
-#include "hunspell.hxx"
-#include "suggestmgr.hxx"
-#include "hunspell.h"
-#include "csutil.hxx"
-
-#include <limits>
-#include <string>
-
-#define MAXWORDUTF8LEN (MAXWORDLEN * 3)
-
-class HunspellImpl
-{
-public:
-  HunspellImpl(const char* affpath, const char* dpath, const char* key);
-  ~HunspellImpl();
-  int add_dic(const char* dpath, const char* key);
-  std::vector<std::string> suffix_suggest(const std::string& root_word);
-  std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
-  std::vector<std::string> generate(const std::string& word, const std::string& pattern);
-  std::vector<std::string> stem(const std::string& word);
-  std::vector<std::string> stem(const std::vector<std::string>& morph);
-  std::vector<std::string> analyze(const std::string& word);
-  bool input_conv(const std::string& word, std::string& dest);
-  bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
-  std::vector<std::string> suggest(const std::string& word);
-  const std::string& get_wordchars() const;
-  const std::vector<w_char>& get_wordchars_utf16() const;
-  const std::string& get_dict_encoding() const;
-  int add(const std::string& word);
-  int add_with_affix(const std::string& word, const std::string& example);
-  int remove(const std::string& word);
-  struct cs_info* get_csconv();
-  std::vector<char> dic_encoding_vec;
-
-  int get_langnum() const { return langnum; }
-  const char* get_try_string() const { return pAMgr->get_try_string(); }
-  const std::string& get_version() const { return pAMgr->get_version(); }
-
-private:
-  AffixMgr* pAMgr;
-  std::vector<HashMgr*> m_HMgrs;
-  SuggestMgr* pSMgr;
-  char* affixpath;
-  std::string encoding;
-  struct cs_info* csconv;
-  int langnum;
-  int utf8;
-  int complexprefixes;
-  std::vector<std::string> wordbreak;
-
-private:
-  void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
-  size_t cleanword2(std::string& dest,
-                    std::vector<w_char>& dest_u,
-                    const std::string& src,
-                    int* pcaptype,
-                    size_t* pabbrev);
-  void mkinitcap(std::string& u8);
-  int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
-  int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
-  void mkallcap(std::string& u8);
-  int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
-  struct hentry* checkword(const std::string& source, int* info, std::string* root);
-  std::string sharps_u8_l1(const std::string& source);
-  hentry*
-  spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
-  int is_keepcase(const hentry* rv);
-  void insert_sug(std::vector<std::string>& slst, const std::string& word);
-  void cat_result(std::string& result, const std::string& st);
-  std::vector<std::string> spellml(const std::string& word);
-  std::string get_xml_par(const char* par);
-  const char* get_xml_pos(const char* s, const char* attr);
-  std::vector<std::string> get_xml_list(const char* list, const char* tag);
-  int check_xml_par(const char* q, const char* attr, const char* value);
-private:
-  HunspellImpl(const HunspellImpl&);
-  HunspellImpl& operator=(const HunspellImpl&);
-};
-
-Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
-  : m_Impl(new HunspellImpl(affpath, dpath, key)) {
-}
-
-HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {
-  csconv = NULL;
-  utf8 = 0;
-  complexprefixes = 0;
-  affixpath = mystrdup(affpath);
-
-  /* first set up the hash manager */
-  m_HMgrs.push_back(new HashMgr(dpath, affpath, key));
-
-  /* next set up the affix manager */
-  /* it needs access to the hash manager lookup methods */
-  pAMgr = new AffixMgr(affpath, m_HMgrs, key);
-
-  /* get the preferred try string and the dictionary */
-  /* encoding from the Affix Manager for that dictionary */
-  char* try_string = pAMgr->get_try_string();
-  encoding = pAMgr->get_encoding();
-  langnum = pAMgr->get_langnum();
-  utf8 = pAMgr->get_utf8();
-  if (!utf8)
-    csconv = get_current_cs(encoding);
-  complexprefixes = pAMgr->get_complexprefixes();
-  wordbreak = pAMgr->get_breaktable();
-
-  dic_encoding_vec.resize(encoding.size()+1);
-  strcpy(&dic_encoding_vec[0], encoding.c_str());
-
-  /* and finally set up the suggestion manager */
-  pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
-  if (try_string)
-    free(try_string);
-}
-
-Hunspell::~Hunspell() {
-  delete m_Impl;
-}
-
-HunspellImpl::~HunspellImpl() {
-  delete pSMgr;
-  delete pAMgr;
-  for (size_t i = 0; i < m_HMgrs.size(); ++i)
-    delete m_HMgrs[i];
-  pSMgr = NULL;
-  pAMgr = NULL;
-#ifdef MOZILLA_CLIENT
-  delete[] csconv;
-#endif
-  csconv = NULL;
-  if (affixpath)
-    free(affixpath);
-  affixpath = NULL;
-}
-
-// load extra dictionaries
-int Hunspell::add_dic(const char* dpath, const char* key) {
-  return m_Impl->add_dic(dpath, key);
-}
-
-// load extra dictionaries
-int HunspellImpl::add_dic(const char* dpath, const char* key) {
-  if (!affixpath)
-    return 1;
-  m_HMgrs.push_back(new HashMgr(dpath, affixpath, key));
-  return 0;
-}
-
-// make a copy of src at destination while removing all leading
-// blanks and removing any trailing periods after recording
-// their presence with the abbreviation flag
-// also since already going through character by character,
-// set the capitalization type
-// return the length of the "cleaned" (and UTF-8 encoded) word
-
-size_t HunspellImpl::cleanword2(std::string& dest,
-                         std::vector<w_char>& dest_utf,
-                         const std::string& src,
-                         int* pcaptype,
-                         size_t* pabbrev) {
-  dest.clear();
-  dest_utf.clear();
-
-  const char* q = src.c_str();
-
-  // first skip over any leading blanks
-  while (*q == ' ')
-    ++q;
-
-  // now strip off any trailing periods (recording their presence)
-  *pabbrev = 0;
-  int nl = strlen(q);
-  while ((nl > 0) && (*(q + nl - 1) == '.')) {
-    nl--;
-    (*pabbrev)++;
-  }
-
-  // if no characters are left it can't be capitalized
-  if (nl <= 0) {
-    *pcaptype = NOCAP;
-    return 0;
-  }
-
-  dest.append(q, nl);
-  nl = dest.size();
-  if (utf8) {
-    u8_u16(dest_utf, dest);
-    *pcaptype = get_captype_utf8(dest_utf, langnum);
-  } else {
-    *pcaptype = get_captype(dest, csconv);
-  }
-  return nl;
-}
-
-void HunspellImpl::cleanword(std::string& dest,
-                        const std::string& src,
-                        int* pcaptype,
-                        int* pabbrev) {
-  dest.clear();
-  const unsigned char* q = (const unsigned char*)src.c_str();
-  int firstcap = 0;
-
-  // first skip over any leading blanks
-  while (*q == ' ')
-    ++q;
-
-  // now strip off any trailing periods (recording their presence)
-  *pabbrev = 0;
-  int nl = strlen((const char*)q);
-  while ((nl > 0) && (*(q + nl - 1) == '.')) {
-    nl--;
-    (*pabbrev)++;
-  }
-
-  // if no characters are left it can't be capitalized
-  if (nl <= 0) {
-    *pcaptype = NOCAP;
-    return;
-  }
-
-  // now determine the capitalization type of the first nl letters
-  int ncap = 0;
-  int nneutral = 0;
-  int nc = 0;
-
-  if (!utf8) {
-    while (nl > 0) {
-      nc++;
-      if (csconv[(*q)].ccase)
-        ncap++;
-      if (csconv[(*q)].cupper == csconv[(*q)].clower)
-        nneutral++;
-      dest.push_back(*q++);
-      nl--;
-    }
-    // remember to terminate the destination string
-    firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
-  } else {
-    std::vector<w_char> t;
-    u8_u16(t, src);
-    for (size_t i = 0; i < t.size(); ++i) {
-      unsigned short idx = (t[i].h << 8) + t[i].l;
-      unsigned short low = unicodetolower(idx, langnum);
-      if (idx != low)
-        ncap++;
-      if (unicodetoupper(idx, langnum) == low)
-        nneutral++;
-    }
-    u16_u8(dest, t);
-    if (ncap) {
-      unsigned short idx = (t[0].h << 8) + t[0].l;
-      firstcap = (idx != unicodetolower(idx, langnum));
-    }
-  }
-
-  // now finally set the captype
-  if (ncap == 0) {
-    *pcaptype = NOCAP;
-  } else if ((ncap == 1) && firstcap) {
-    *pcaptype = INITCAP;
-  } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
-    *pcaptype = ALLCAP;
-  } else if ((ncap > 1) && firstcap) {
-    *pcaptype = HUHINITCAP;
-  } else {
-    *pcaptype = HUHCAP;
-  }
-}
-
-void HunspellImpl::mkallcap(std::string& u8) {
-  if (utf8) {
-    std::vector<w_char> u16;
-    u8_u16(u16, u8);
-    ::mkallcap_utf(u16, langnum);
-    u16_u8(u8, u16);
-  } else {
-    ::mkallcap(u8, csconv);
-  }
-}
-
-int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
-  if (utf8) {
-    ::mkallsmall_utf(u16, langnum);
-    u16_u8(u8, u16);
-  } else {
-    ::mkallsmall(u8, csconv);
-  }
-  return u8.size();
-}
-
-// convert UTF-8 sharp S codes to latin 1
-std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
-  std::string dest(source);
-  mystrrep(dest, "\xC3\x9F", "\xDF");
-  return dest;
-}
-
-// recursive search for right ss - sharp s permutations
-hentry* HunspellImpl::spellsharps(std::string& base,
-                              size_t n_pos,
-                              int n,
-                              int repnum,
-                              int* info,
-                              std::string* root) {
-  size_t pos = base.find("ss", n_pos);
-  if (pos != std::string::npos && (n < MAXSHARPS)) {
-    base[pos] = '\xC3';
-    base[pos + 1] = '\x9F';
-    hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
-    if (h)
-      return h;
-    base[pos] = 's';
-    base[pos + 1] = 's';
-    h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
-    if (h)
-      return h;
-  } else if (repnum > 0) {
-    if (utf8)
-      return checkword(base, info, root);
-    std::string tmp(sharps_u8_l1(base));
-    return checkword(tmp, info, root);
-  }
-  return NULL;
-}
-
-int HunspellImpl::is_keepcase(const hentry* rv) {
-  return pAMgr && rv->astr && pAMgr->get_keepcase() &&
-         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
-}
-
-/* insert a word to the beginning of the suggestion array */
-void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
-  slst.insert(slst.begin(), word);
-}
-
-bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
-  return m_Impl->spell(word, info, root);
-}
-
-bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
-  struct hentry* rv = NULL;
-
-  int info2 = 0;
-  if (!info)
-    info = &info2;
-  else
-    *info = 0;
-
-  // Hunspell supports XML input of the simplified API (see manual)
-  if (word == SPELL_XML)
-    return true;
-  if (utf8) {
-    if (word.size() >= MAXWORDUTF8LEN)
-      return false;
-  } else {
-    if (word.size() >= MAXWORDLEN)
-      return false;
-  }
-  int captype = NOCAP;
-  size_t abbv = 0;
-  size_t wl = 0;
-
-  std::string scw;
-  std::vector<w_char> sunicw;
-
-  // input conversion
-  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
-  {
-    std::string wspace;
-
-    bool convstatus = rl ? rl->conv(word, wspace) : false;
-    if (convstatus)
-      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
-    else
-      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
-  }
-
-#ifdef MOZILLA_CLIENT
-  // accept the abbreviated words without dots
-  // workaround for the incomplete tokenization of Mozilla
-  abbv = 1;
-#endif
-
-  if (wl == 0 || m_HMgrs.empty())
-    return true;
-  if (root)
-    root->clear();
-
-  // allow numbers with dots, dashes and commas (but forbid double separators:
-  // "..", "--" etc.)
-  enum { NBEGIN, NNUM, NSEP };
-  int nstate = NBEGIN;
-  size_t i;
-
-  for (i = 0; (i < wl); i++) {
-    if ((scw[i] <= '9') && (scw[i] >= '0')) {
-      nstate = NNUM;
-    } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
-      if ((nstate == NSEP) || (i == 0))
-        break;
-      nstate = NSEP;
-    } else
-      break;
-  }
-  if ((i == wl) && (nstate == NNUM))
-    return true;
-
-  switch (captype) {
-    case HUHCAP:
-    /* FALLTHROUGH */
-    case HUHINITCAP:
-      *info += SPELL_ORIGCAP;
-    /* FALLTHROUGH */
-    case NOCAP:
-      rv = checkword(scw, info, root);
-      if ((abbv) && !(rv)) {
-        std::string u8buffer(scw);
-        u8buffer.push_back('.');
-        rv = checkword(u8buffer, info, root);
-      }
-      break;
-    case ALLCAP: {
-      *info += SPELL_ORIGCAP;
-      rv = checkword(scw, info, root);
-      if (rv)
-        break;
-      if (abbv) {
-        std::string u8buffer(scw);
-        u8buffer.push_back('.');
-        rv = checkword(u8buffer, info, root);
-        if (rv)
-          break;
-      }
-      // Spec. prefix handling for Catalan, French, Italian:
-      // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
-      size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
-      if (apos != std::string::npos) {
-        mkallsmall2(scw, sunicw);
-        //conversion may result in string with different len to pre-mkallsmall2
-        //so re-scan
-        if (apos != std::string::npos && apos < scw.size() - 1) {
-          std::string part1 = scw.substr(0, apos+1);
-          std::string part2 = scw.substr(apos+1);
-          if (utf8) {
-            std::vector<w_char> part1u, part2u;
-            u8_u16(part1u, part1);
-            u8_u16(part2u, part2);
-            mkinitcap2(part2, part2u);
-            scw = part1 + part2;
-            sunicw = part1u;
-            sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
-            rv = checkword(scw, info, root);
-            if (rv)
-              break;
-          } else {
-            mkinitcap2(part2, sunicw);
-            scw = part1 + part2;
-            rv = checkword(scw, info, root);
-            if (rv)
-              break;
-          }
-          mkinitcap2(scw, sunicw);
-          rv = checkword(scw, info, root);
-          if (rv)
-            break;
-        }
-      }
-      if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
-
-        mkallsmall2(scw, sunicw);
-        std::string u8buffer(scw);
-        rv = spellsharps(u8buffer, 0, 0, 0, info, root);
-        if (!rv) {
-          mkinitcap2(scw, sunicw);
-          rv = spellsharps(scw, 0, 0, 0, info, root);
-        }
-        if ((abbv) && !(rv)) {
-          u8buffer.push_back('.');
-          rv = spellsharps(u8buffer, 0, 0, 0, info, root);
-          if (!rv) {
-            u8buffer = std::string(scw);
-            u8buffer.push_back('.');
-            rv = spellsharps(u8buffer, 0, 0, 0, info, root);
-          }
-        }
-        if (rv)
-          break;
-      }
-    }
-    case INITCAP: {
-
-      *info += SPELL_ORIGCAP;
-      mkallsmall2(scw, sunicw);
-      std::string u8buffer(scw);
-      mkinitcap2(scw, sunicw);
-      if (captype == INITCAP)
-        *info += SPELL_INITCAP;
-      rv = checkword(scw, info, root);
-      if (captype == INITCAP)
-        *info -= SPELL_INITCAP;
-      // forbid bad capitalization
-      // (for example, ijs -> Ijs instead of IJs in Dutch)
-      // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
-      if (*info & SPELL_FORBIDDEN) {
-        rv = NULL;
-        break;
-      }
-      if (rv && is_keepcase(rv) && (captype == ALLCAP))
-        rv = NULL;
-      if (rv)
-        break;
-
-      rv = checkword(u8buffer, info, root);
-      if (abbv && !rv) {
-        u8buffer.push_back('.');
-        rv = checkword(u8buffer, info, root);
-        if (!rv) {
-          u8buffer = scw;
-          u8buffer.push_back('.');
-          if (captype == INITCAP)
-            *info += SPELL_INITCAP;
-          rv = checkword(u8buffer, info, root);
-          if (captype == INITCAP)
-            *info -= SPELL_INITCAP;
-          if (rv && is_keepcase(rv) && (captype == ALLCAP))
-            rv = NULL;
-          break;
-        }
-      }
-      if (rv && is_keepcase(rv) &&
-          ((captype == ALLCAP) ||
-           // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
-           // in INITCAP form, too.
-           !(pAMgr->get_checksharps() &&
-             ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
-              (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
-        rv = NULL;
-      break;
-    }
-  }
-
-  if (rv) {
-    if (pAMgr && pAMgr->get_warn() && rv->astr &&
-        TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
-      *info += SPELL_WARN;
-      if (pAMgr->get_forbidwarn())
-        return false;
-      return true;
-    }
-    return true;
-  }
-
-  // recursive breaking at break points
-  if (!wordbreak.empty()) {
-
-    int nbr = 0;
-    wl = scw.size();
-
-    // calculate break points for recursion limit
-    for (size_t j = 0; j < wordbreak.size(); ++j) {
-      size_t pos = 0;
-      while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) {
-        ++nbr;
-        pos += wordbreak[j].size();
-      }
-    }
-    if (nbr >= 10)
-      return false;
-
-    // check boundary patterns (^begin and end$)
-    for (size_t j = 0; j < wordbreak.size(); ++j) {
-      size_t plen = wordbreak[j].size();
-      if (plen == 1 || plen > wl)
-        continue;
-
-      if (wordbreak[j][0] == '^' &&
-          scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1)))
-        return true;
-
-      if (wordbreak[j][plen - 1] == '$' &&
-          scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) {
-        std::string suffix(scw.substr(wl - plen + 1));
-        scw.resize(wl - plen + 1);
-        if (spell(scw))
-          return true;
-        scw.append(suffix);
-      }
-    }
-
-    // other patterns
-    for (size_t j = 0; j < wordbreak.size(); ++j) {
-      size_t plen = wordbreak[j].size();
-      size_t found = scw.find(wordbreak[j]);
-      if ((found > 0) && (found < wl - plen)) {
-        if (!spell(scw.substr(found + plen)))
-          continue;
-        std::string suffix(scw.substr(found));
-        scw.resize(found);
-        // examine 2 sides of the break point
-        if (spell(scw))
-          return true;
-        scw.append(suffix);
-
-        // LANG_hu: spec. dash rule
-        if (langnum == LANG_hu && wordbreak[j] == "-") {
-          suffix = scw.substr(found + 1);
-          scw.resize(found + 1);
-          if (spell(scw))
-            return true;  // check the first part with dash
-          scw.append(suffix);
-        }
-        // end of LANG specific region
-      }
-    }
-  }
-
-  return false;
-}
-
-struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
-  bool usebuffer = false;
-  std::string w2;
-  const char* word;
-  int len;
-
-  const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
-  if (ignoredchars != NULL) {
-    w2.assign(w);
-    if (utf8) {
-      const std::vector<w_char>& ignoredchars_utf16 =
-          pAMgr->get_ignore_utf16();
-      remove_ignored_chars_utf(w2, ignoredchars_utf16);
-    } else {
-      remove_ignored_chars(w2, ignoredchars);
-    }
-    word = w2.c_str();
-    len = w2.size();
-    usebuffer = true;
-  } else {
-    word = w.c_str();
-    len = w.size();
-  }
-
-  if (!len)
-    return NULL;
-
-  // word reversing wrapper for complex prefixes
-  if (complexprefixes) {
-    if (!usebuffer) {
-      w2.assign(word);
-      usebuffer = true;
-    }
-    if (utf8)
-      reverseword_utf(w2);
-    else
-      reverseword(w2);
-  }
-
-  if (usebuffer) {
-    word = w2.c_str();
-  }
-
-  // look word in hash table
-  struct hentry* he = NULL;
-  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
-    he = m_HMgrs[i]->lookup(word);
-
-    // check forbidden and onlyincompound words
-    if ((he) && (he->astr) && (pAMgr) &&
-        TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
-      if (info)
-        *info += SPELL_FORBIDDEN;
-      // LANG_hu section: set dash information for suggestions
-      if (langnum == LANG_hu) {
-        if (pAMgr->get_compoundflag() &&
-            TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
-          if (info)
-            *info += SPELL_COMPOUND;
-        }
-      }
-      return NULL;
-    }
-
-    // he = next not needaffix, onlyincompound homonym or onlyupcase word
-    while (he && (he->astr) && pAMgr &&
-           ((pAMgr->get_needaffix() &&
-             TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
-            (pAMgr->get_onlyincompound() &&
-             TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
-            (info && (*info & SPELL_INITCAP) &&
-             TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
-      he = he->next_homonym;
-  }
-
-  // check with affixes
-  if (!he && pAMgr) {
-    // try stripping off affixes */
-    he = pAMgr->affix_check(word, len, 0);
-
-    // check compound restriction and onlyupcase
-    if (he && he->astr &&
-        ((pAMgr->get_onlyincompound() &&
-          TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
-         (info && (*info & SPELL_INITCAP) &&
-          TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
-      he = NULL;
-    }
-
-    if (he) {
-      if ((he->astr) && (pAMgr) &&
-          TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
-        if (info)
-          *info += SPELL_FORBIDDEN;
-        return NULL;
-      }
-      if (root) {
-        root->assign(he->word);
-        if (complexprefixes) {
-          if (utf8)
-            reverseword_utf(*root);
-          else
-            reverseword(*root);
-        }
-      }
-      // try check compound word
-    } else if (pAMgr->get_compound()) {
-      struct hentry* rwords[100];  // buffer for COMPOUND pattern checking
-      he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
-      // LANG_hu section: `moving rule' with last dash
-      if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
-        std::string dup(word, len - 1);
-        he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);
-      }
-      // end of LANG specific region
-      if (he) {
-        if (root) {
-          root->assign(he->word);
-          if (complexprefixes) {
-            if (utf8)
-              reverseword_utf(*root);
-            else
-              reverseword(*root);
-          }
-        }
-        if (info)
-          *info += SPELL_COMPOUND;
-      }
-    }
-  }
-
-  return he;
-}
-
-std::vector<std::string> Hunspell::suggest(const std::string& word) {
-  return m_Impl->suggest(word);
-}
-
-std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
-  std::vector<std::string> slst;
-
-  int onlycmpdsug = 0;
-  if (!pSMgr || m_HMgrs.empty())
-    return slst;
-
-  // process XML input of the simplified API (see manual)
-  if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
-    return spellml(word);
-  }
-  if (utf8) {
-    if (word.size() >= MAXWORDUTF8LEN)
-      return slst;
-  } else {
-    if (word.size() >= MAXWORDLEN)
-      return slst;
-  }
-  int captype = NOCAP;
-  size_t abbv = 0;
-  size_t wl = 0;
-
-  std::string scw;
-  std::vector<w_char> sunicw;
-
-  // input conversion
-  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-  {
-    std::string wspace;
-
-    bool convstatus = rl ? rl->conv(word, wspace) : false;
-    if (convstatus)
-      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
-    else
-      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
-
-    if (wl == 0)
-      return slst;
-  }
-
-  int capwords = 0;
-
-  // check capitalized form for FORCEUCASE
-  if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
-    int info = SPELL_ORIGCAP;
-    if (checkword(scw, &info, NULL)) {
-      std::string form(scw);
-      mkinitcap(form);
-      slst.push_back(form);
-      return slst;
-    }
-  }
-
-  switch (captype) {
-    case NOCAP: {
-      pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
-      break;
-    }
-
-    case INITCAP: {
-      capwords = 1;
-      pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
-      std::string wspace(scw);
-      mkallsmall2(wspace, sunicw);
-      pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
-      break;
-    }
-    case HUHINITCAP:
-      capwords = 1;
-    case HUHCAP: {
-      pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
-      // something.The -> something. The
-      size_t dot_pos = scw.find('.');
-      if (dot_pos != std::string::npos) {
-        std::string postdot = scw.substr(dot_pos + 1);
-        int captype_;
-        if (utf8) {
-          std::vector<w_char> postdotu;
-          u8_u16(postdotu, postdot);
-          captype_ = get_captype_utf8(postdotu, langnum);
-        } else {
-          captype_ = get_captype(postdot, csconv);
-        }
-        if (captype_ == INITCAP) {
-          std::string str(scw);
-          str.insert(dot_pos + 1, 1, ' ');
-          insert_sug(slst, str);
-        }
-      }
-
-      std::string wspace;
-
-      if (captype == HUHINITCAP) {
-        // TheOpenOffice.org -> The OpenOffice.org
-        wspace = scw;
-        mkinitsmall2(wspace, sunicw);
-        pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
-      }
-      wspace = scw;
-      mkallsmall2(wspace, sunicw);
-      if (spell(wspace.c_str()))
-        insert_sug(slst, wspace);
-      size_t prevns = slst.size();
-      pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
-      if (captype == HUHINITCAP) {
-        mkinitcap2(wspace, sunicw);
-        if (spell(wspace.c_str()))
-          insert_sug(slst, wspace);
-        pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
-      }
-      // aNew -> "a New" (instead of "a new")
-      for (size_t j = prevns; j < slst.size(); ++j) {
-        const char* space = strchr(slst[j].c_str(), ' ');
-        if (space) {
-          size_t slen = strlen(space + 1);
-          // different case after space (need capitalisation)
-          if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
-            std::string first(slst[j].c_str(), space + 1);
-            std::string second(space + 1);
-            std::vector<w_char> w;
-            if (utf8)
-              u8_u16(w, second);
-            mkinitcap2(second, w);
-            // set as first suggestion
-            slst.erase(slst.begin() + j);
-            slst.insert(slst.begin(), first + second);
-          }
-        }
-      }
-      break;
-    }
-
-    case ALLCAP: {
-      std::string wspace(scw);
-      mkallsmall2(wspace, sunicw);
-      pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
-      if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
-        insert_sug(slst, wspace);
-      mkinitcap2(wspace, sunicw);
-      pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
-      for (size_t j = 0; j < slst.size(); ++j) {
-        mkallcap(slst[j]);
-        if (pAMgr && pAMgr->get_checksharps()) {
-          if (utf8) {
-            mystrrep(slst[j], "\xC3\x9F", "SS");
-          } else {
-            mystrrep(slst[j], "\xDF", "SS");
-          }
-        }
-      }
-      break;
-    }
-  }
-
-  // LANG_hu section: replace '-' with ' ' in Hungarian
-  if (langnum == LANG_hu) {
-    for (size_t j = 0; j < slst.size(); ++j) {
-      size_t pos = slst[j].find('-');
-      if (pos != std::string::npos) {
-        int info;
-        std::string w(slst[j].substr(0, pos));
-        w.append(slst[j].substr(pos + 1));
-        (void)spell(w, &info, NULL);
-        if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
-          slst[j][pos] = ' ';
-        } else
-          slst[j][pos] = '-';
-      }
-    }
-  }
-  // END OF LANG_hu section
-
-  // try ngram approach since found nothing or only compound words
-  if (pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
-    switch (captype) {
-      case NOCAP: {
-        pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs);
-        break;
-      }
-      case HUHINITCAP:
-        capwords = 1;
-      case HUHCAP: {
-        std::string wspace(scw);
-        mkallsmall2(wspace, sunicw);
-        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
-        break;
-      }
-      case INITCAP: {
-        capwords = 1;
-        std::string wspace(scw);
-        mkallsmall2(wspace, sunicw);
-        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
-        break;
-      }
-      case ALLCAP: {
-        std::string wspace(scw);
-        mkallsmall2(wspace, sunicw);
-        size_t oldns = slst.size();
-        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
-        for (size_t j = oldns; j < slst.size(); ++j) {
-          mkallcap(slst[j]);
-        }
-        break;
-      }
-    }
-  }
-
-  // try dash suggestion (Afo-American -> Afro-American)
-  size_t dash_pos = scw.find('-');
-  if (dash_pos != std::string::npos) {
-    int nodashsug = 1;
-    for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
-      if (slst[j].find('-') != std::string::npos)
-        nodashsug = 0;
-    }
-
-    size_t prev_pos = 0;
-    bool last = false;
-
-    while (nodashsug && !last) {
-      if (dash_pos == scw.size())
-        last = 1;
-      std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
-      if (!spell(chunk.c_str())) {
-        std::vector<std::string> nlst = suggest(chunk.c_str());
-        for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {
-          std::string wspace = scw.substr(0, prev_pos);
-          wspace.append(*j);
-          if (!last) {
-            wspace.append("-");
-            wspace.append(scw.substr(dash_pos + 1));
-          }
-          insert_sug(slst, wspace);
-        }
-        nodashsug = 0;
-      }
-      if (!last) {
-        prev_pos = dash_pos + 1;
-        dash_pos = scw.find('-', prev_pos);
-      }
-      if (dash_pos == std::string::npos)
-        dash_pos = scw.size();
-    }
-  }
-
-  // word reversing wrapper for complex prefixes
-  if (complexprefixes) {
-    for (size_t j = 0; j < slst.size(); ++j) {
-      if (utf8)
-        reverseword_utf(slst[j]);
-      else
-        reverseword(slst[j]);
-    }
-  }
-
-  // capitalize
-  if (capwords)
-    for (size_t j = 0; j < slst.size(); ++j) {
-      mkinitcap(slst[j]);
-    }
-
-  // expand suggestions with dot(s)
-  if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
-    for (size_t j = 0; j < slst.size(); ++j) {
-      slst[j].append(word.substr(word.size() - abbv));
-    }
-  }
-
-  // remove bad capitalized and forbidden forms
-  if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
-    switch (captype) {
-      case INITCAP:
-      case ALLCAP: {
-        size_t l = 0;
-        for (size_t j = 0; j < slst.size(); ++j) {
-          if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
-            std::string s;
-            std::vector<w_char> w;
-            if (utf8) {
-              u8_u16(w, slst[j]);
-            } else {
-              s = slst[j];
-            }
-            mkallsmall2(s, w);
-            if (spell(s)) {
-              slst[l] = s;
-              ++l;
-            } else {
-              mkinitcap2(s, w);
-              if (spell(s)) {
-                slst[l] = s;
-                ++l;
-              }
-            }
-          } else {
-            slst[l] = slst[j];
-            ++l;
-          }
-        }
-        slst.resize(l);
-      }
-    }
-  }
-
-  // remove duplications
-  size_t l = 0;
-  for (size_t j = 0; j < slst.size(); ++j) {
-    slst[l] = slst[j];
-    for (size_t k = 0; k < l; ++k) {
-      if (slst[k] == slst[j]) {
-        --l;
-        break;
-      }
-    }
-    ++l;
-  }
-  slst.resize(l);
-
-  // output conversion
-  rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
-  for (size_t j = 0; rl && j < slst.size(); ++j) {
-    std::string wspace;
-    if (rl->conv(slst[j], wspace)) {
-      slst[j] = wspace;
-    }
-  }
-
-  return slst;
-}
-
-const std::string& Hunspell::get_dict_encoding() const {
-  return m_Impl->get_dict_encoding();
-}
-
-const std::string& HunspellImpl::get_dict_encoding() const {
-  return encoding;
-}
-
-std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
-  return m_Impl->stem(desc);
-}
-
-std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
-  std::vector<std::string> slst;
-
-  std::string result2;
-  if (desc.empty())
-    return slst;
-  for (size_t i = 0; i < desc.size(); ++i) {
-
-    std::string result;
-
-    // add compound word parts (except the last one)
-    const char* s = desc[i].c_str();
-    const char* part = strstr(s, MORPH_PART);
-    if (part) {
-      const char* nextpart = strstr(part + 1, MORPH_PART);
-      while (nextpart) {
-        std::string field;
-        copy_field(field, part, MORPH_PART);
-        result.append(field);
-        part = nextpart;
-        nextpart = strstr(part + 1, MORPH_PART);
-      }
-      s = part;
-    }
-
-    std::string tok(s);
-    size_t alt = 0;
-    while ((alt = tok.find(" | ", alt)) != std::string::npos) {
-      tok[alt + 1] = MSEP_ALT;
-    }
-    std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
-    for (size_t k = 0; k < pl.size(); ++k) {
-      // add derivational suffixes
-      if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) {
-        // remove inflectional suffixes
-        const size_t is = pl[k].find(MORPH_INFL_SFX);
-        if (is != std::string::npos)
-          pl[k].resize(is);
-        std::vector<std::string> singlepl;
-        singlepl.push_back(pl[k]);
-        std::string sg = pSMgr->suggest_gen(singlepl, pl[k]);
-        if (!sg.empty()) {
-          std::vector<std::string> gen = line_tok(sg, MSEP_REC);
-          for (size_t j = 0; j < gen.size(); ++j) {
-            result2.push_back(MSEP_REC);
-            result2.append(result);
-            result2.append(gen[j]);
-          }
-        }
-      } else {
-        result2.push_back(MSEP_REC);
-        result2.append(result);
-        if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) {
-          std::string field;
-          copy_field(field, pl[k], MORPH_SURF_PFX);
-          result2.append(field);
-        }
-        std::string field;
-        copy_field(field, pl[k], MORPH_STEM);
-        result2.append(field);
-      }
-    }
-  }
-  slst = line_tok(result2, MSEP_REC);
-  uniqlist(slst);
-  return slst;
-}
-
-std::vector<std::string> Hunspell::stem(const std::string& word) {
-  return m_Impl->stem(word);
-}
-
-std::vector<std::string> HunspellImpl::stem(const std::string& word) {
-  return stem(analyze(word));
-}
-
-const char* Hunspell::get_wordchars() const {
-  return m_Impl->get_wordchars().c_str();
-}
-
-const std::string& Hunspell::get_wordchars_cpp() const {
-  return m_Impl->get_wordchars();
-}
-
-const std::string& HunspellImpl::get_wordchars() const {
-  return pAMgr->get_wordchars();
-}
-
-const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
-  return m_Impl->get_wordchars_utf16();
-}
-
-const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
-  return pAMgr->get_wordchars_utf16();
-}
-
-void HunspellImpl::mkinitcap(std::string& u8) {
-  if (utf8) {
-    std::vector<w_char> u16;
-    u8_u16(u16, u8);
-    ::mkinitcap_utf(u16, langnum);
-    u16_u8(u8, u16);
-  } else {
-    ::mkinitcap(u8, csconv);
-  }
-}
-
-int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
-  if (utf8) {
-    ::mkinitcap_utf(u16, langnum);
-    u16_u8(u8, u16);
-  } else {
-    ::mkinitcap(u8, csconv);
-  }
-  return u8.size();
-}
-
-int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
-  if (utf8) {
-    ::mkinitsmall_utf(u16, langnum);
-    u16_u8(u8, u16);
-  } else {
-    ::mkinitsmall(u8, csconv);
-  }
-  return u8.size();
-}
-
-int Hunspell::add(const std::string& word) {
-  return m_Impl->add(word);
-}
-
-int HunspellImpl::add(const std::string& word) {
-  if (!m_HMgrs.empty())
-    return m_HMgrs[0]->add(word);
-  return 0;
-}
-
-int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
-  return m_Impl->add_with_affix(word, example);
-}
-
-int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
-  if (!m_HMgrs.empty())
-    return m_HMgrs[0]->add_with_affix(word, example);
-  return 0;
-}
-
-int Hunspell::remove(const std::string& word) {
-  return m_Impl->remove(word);
-}
-
-int HunspellImpl::remove(const std::string& word) {
-  if (!m_HMgrs.empty())
-    return m_HMgrs[0]->remove(word);
-  return 0;
-}
-
-const char* Hunspell::get_version() const {
-  return m_Impl->get_version().c_str();
-}
-
-const std::string& Hunspell::get_version_cpp() const {
-  return m_Impl->get_version();
-}
-
-const char* Hunspell::get_try_string() const {
-	return m_Impl->get_try_string();
-}
-
-struct cs_info* HunspellImpl::get_csconv() {
-  return csconv;
-}
-
-struct cs_info* Hunspell::get_csconv() {
-  return m_Impl->get_csconv();
-}
-
-void HunspellImpl::cat_result(std::string& result, const std::string& st) {
-  if (!st.empty()) {
-    if (!result.empty())
-      result.append("\n");
-    result.append(st);
-  }
-}
-
-std::vector<std::string> Hunspell::analyze(const std::string& word) {
-  return m_Impl->analyze(word);
-}
-
-std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
-  std::vector<std::string> slst;
-  if (!pSMgr || m_HMgrs.empty())
-    return slst;
-  if (utf8) {
-    if (word.size() >= MAXWORDUTF8LEN)
-      return slst;
-  } else {
-    if (word.size() >= MAXWORDLEN)
-      return slst;
-  }
-  int captype = NOCAP;
-  size_t abbv = 0;
-  size_t wl = 0;
-
-  std::string scw;
-  std::vector<w_char> sunicw;
-
-  // input conversion
-  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-  {
-    std::string wspace;
-
-    bool convstatus = rl ? rl->conv(word, wspace) : false;
-    if (convstatus)
-      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
-    else
-      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
-  }
-
-  if (wl == 0) {
-    if (abbv) {
-      scw.clear();
-      for (wl = 0; wl < abbv; wl++)
-        scw.push_back('.');
-      abbv = 0;
-    } else
-      return slst;
-  }
-
-  std::string result;
-
-  size_t n = 0;
-  // test numbers
-  // LANG_hu section: set dash information for suggestions
-  if (langnum == LANG_hu) {
-    size_t n2 = 0;
-    size_t n3 = 0;
-
-    while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
-                        (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
-      n++;
-      if ((scw[n] == '.') || (scw[n] == ',')) {
-        if (((n2 == 0) && (n > 3)) ||
-            ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
-          break;
-        n2++;
-        n3 = n;
-      }
-    }
-
-    if ((n == wl) && (n3 > 0) && (n - n3 > 3))
-      return slst;
-    if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
-                      checkword(scw.substr(n), NULL, NULL))) {
-      result.append(scw);
-      result.resize(n - 1);
-      if (n == wl)
-        cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
-      else {
-        std::string chunk = scw.substr(n - 1, 1);
-        cat_result(result, pSMgr->suggest_morph(chunk));
-        result.push_back('+');  // XXX SPEC. MORPHCODE
-        cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
-      }
-      return line_tok(result, MSEP_REC);
-    }
-  }
-  // END OF LANG_hu section
-
-  switch (captype) {
-    case HUHCAP:
-    case HUHINITCAP:
-    case NOCAP: {
-      cat_result(result, pSMgr->suggest_morph(scw));
-      if (abbv) {
-        std::string u8buffer(scw);
-        u8buffer.push_back('.');
-        cat_result(result, pSMgr->suggest_morph(u8buffer));
-      }
-      break;
-    }
-    case INITCAP: {
-      mkallsmall2(scw, sunicw);
-      std::string u8buffer(scw);
-      mkinitcap2(scw, sunicw);
-      cat_result(result, pSMgr->suggest_morph(u8buffer));
-      cat_result(result, pSMgr->suggest_morph(scw));
-      if (abbv) {
-        u8buffer.push_back('.');
-        cat_result(result, pSMgr->suggest_morph(u8buffer));
-
-        u8buffer = scw;
-        u8buffer.push_back('.');
-
-        cat_result(result, pSMgr->suggest_morph(u8buffer));
-      }
-      break;
-    }
-    case ALLCAP: {
-      cat_result(result, pSMgr->suggest_morph(scw));
-      if (abbv) {
-        std::string u8buffer(scw);
-        u8buffer.push_back('.');
-        cat_result(result, pSMgr->suggest_morph(u8buffer));
-      }
-      mkallsmall2(scw, sunicw);
-      std::string u8buffer(scw);
-      mkinitcap2(scw, sunicw);
-
-      cat_result(result, pSMgr->suggest_morph(u8buffer));
-      cat_result(result, pSMgr->suggest_morph(scw));
-      if (abbv) {
-        u8buffer.push_back('.');
-        cat_result(result, pSMgr->suggest_morph(u8buffer));
-
-        u8buffer = scw;
-        u8buffer.push_back('.');
-
-        cat_result(result, pSMgr->suggest_morph(u8buffer));
-      }
-      break;
-    }
-  }
-
-  if (!result.empty()) {
-    // word reversing wrapper for complex prefixes
-    if (complexprefixes) {
-      if (utf8)
-        reverseword_utf(result);
-      else
-        reverseword(result);
-    }
-    return line_tok(result, MSEP_REC);
-  }
-
-  // compound word with dash (HU) I18n
-  // LANG_hu section: set dash information for suggestions
-
-  size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
-  if (dash_pos != std::string::npos) {
-    int nresult = 0;
-
-    std::string part1 = scw.substr(0, dash_pos);
-    std::string part2 = scw.substr(dash_pos+1);
-
-    // examine 2 sides of the dash
-    if (part2.empty()) {  // base word ending with dash
-      if (spell(part1)) {
-        std::string p = pSMgr->suggest_morph(part1);
-        if (!p.empty()) {
-          slst = line_tok(p, MSEP_REC);
-          return slst;
-        }
-      }
-    } else if (part2.size() == 1 && part2[0] == 'e') {  // XXX (HU) -e hat.
-      if (spell(part1) && (spell("-e"))) {
-        std::string st = pSMgr->suggest_morph(part1);
-        if (!st.empty()) {
-          result.append(st);
-        }
-        result.push_back('+');  // XXX spec. separator in MORPHCODE
-        st = pSMgr->suggest_morph("-e");
-        if (!st.empty()) {
-          result.append(st);
-        }
-        return line_tok(result, MSEP_REC);
-      }
-    } else {
-      // first word ending with dash: word- XXX ???
-      part1.push_back(' ');
-      nresult = spell(part1);
-      part1.erase(part1.size() - 1);
-      if (nresult && spell(part2) &&
-          ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
-        std::string st = pSMgr->suggest_morph(part1);
-        if (!st.empty()) {
-          result.append(st);
-          result.push_back('+');  // XXX spec. separator in MORPHCODE
-        }
-        st = pSMgr->suggest_morph(part2);
-        if (!st.empty()) {
-          result.append(st);
-        }
-        return line_tok(result, MSEP_REC);
-      }
-    }
-    // affixed number in correct word
-    if (nresult && (dash_pos > 0) &&
-        (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
-         (scw[dash_pos - 1] == '.'))) {
-      n = 1;
-      if (scw[dash_pos - n] == '.')
-        n++;
-      // search first not a number character to left from dash
-      while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
-             (n < 6)) {
-        n++;
-      }
-      if (dash_pos < n)
-        n--;
-      // numbers: valami1000000-hoz
-      // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
-      // 56-hoz, 6-hoz
-      for (; n >= 1; n--) {
-        if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
-            continue;
-        }
-        std::string chunk = scw.substr(dash_pos - n);
-        if (checkword(chunk, NULL, NULL)) {
-          result.append(chunk);
-          std::string st = pSMgr->suggest_morph(chunk);
-          if (!st.empty()) {
-            result.append(st);
-          }
-          return line_tok(result, MSEP_REC);
-        }
-      }
-    }
-  }
-  return slst;
-}
-
-std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
-  return m_Impl->generate(word, pl);
-}
-
-std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
-  std::vector<std::string> slst;
-  if (!pSMgr || pl.empty())
-    return slst;
-  std::vector<std::string> pl2 = analyze(word);
-  int captype = NOCAP;
-  int abbv = 0;
-  std::string cw;
-  cleanword(cw, word, &captype, &abbv);
-  std::string result;
-
-  for (size_t i = 0; i < pl.size(); ++i) {
-    cat_result(result, pSMgr->suggest_gen(pl2, pl[i]));
-  }
-
-  if (!result.empty()) {
-    // allcap
-    if (captype == ALLCAP)
-      mkallcap(result);
-
-    // line split
-    slst = line_tok(result, MSEP_REC);
-
-    // capitalize
-    if (captype == INITCAP || captype == HUHINITCAP) {
-      for (size_t j = 0; j < slst.size(); ++j) {
-        mkinitcap(slst[j]);
-      }
-    }
-
-    // temporary filtering of prefix related errors (eg.
-    // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
-    std::vector<std::string>::iterator it = slst.begin();
-    while (it != slst.end()) {
-      if (!spell(*it)) {
-        it = slst.erase(it);
-      } else  {
-        ++it;
-      }
-    }
-  }
-  return slst;
-}
-
-std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
-  return m_Impl->generate(word, pattern);
-}
-
-std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
-  std::vector<std::string> pl = analyze(pattern);
-  std::vector<std::string> slst = generate(word, pl);
-  uniqlist(slst);
-  return slst;
-}
-
-// minimal XML parser functions
-std::string HunspellImpl::get_xml_par(const char* par) {
-  std::string dest;
-  if (!par)
-    return dest;
-  char end = *par;
-  if (end == '>')
-    end = '<';
-  else if (end != '\'' && end != '"')
-    return dest;  // bad XML
-  for (par++; *par != '\0' && *par != end; ++par) {
-    dest.push_back(*par);
-  }
-  mystrrep(dest, "&lt;", "<");
-  mystrrep(dest, "&amp;", "&");
-  return dest;
-}
-
-int Hunspell::get_langnum() const {
-  return m_Impl->get_langnum();
-}
-
-bool Hunspell::input_conv(const std::string& word, std::string& dest) {
-  return m_Impl->input_conv(word, dest);
-}
-
-int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
-  std::string d;
-  bool ret = input_conv(word, d);
-  if (ret && d.size() < destsize) {
-    strncpy(dest, d.c_str(), destsize);
-    return 1;
-  }
-  return 0;
-}
-
-bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
-  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
-  if (rl) {
-    return rl->conv(word, dest);
-  }
-  dest.assign(word);
-  return false;
-}
-
-// return the beginning of the element (attr == NULL) or the attribute
-const char* HunspellImpl::get_xml_pos(const char* s, const char* attr) {
-  const char* end = strchr(s, '>');
-  if (attr == NULL)
-    return end;
-  const char* p = s;
-  while (1) {
-    p = strstr(p, attr);
-    if (!p || p >= end)
-      return 0;
-    if (*(p - 1) == ' ' || *(p - 1) == '\n')
-      break;
-    p += strlen(attr);
-  }
-  return p + strlen(attr);
-}
-
-int HunspellImpl::check_xml_par(const char* q,
-                            const char* attr,
-                            const char* value) {
-  std::string cw = get_xml_par(get_xml_pos(q, attr));
-  if (cw == value)
-    return 1;
-  return 0;
-}
-
-std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char* tag) {
-  std::vector<std::string> slst;
-  if (!list)
-    return slst;
-  const char* p = list;
-  for (size_t n = 0; ((p = strstr(p, tag)) != NULL); ++p, ++n) {
-    std::string cw = get_xml_par(p + strlen(tag) - 1);
-    if (cw.empty()) {
-      break;
-    }
-    slst.push_back(cw);
-  }
-  return slst;
-}
-
-std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
-  std::vector<std::string> slst;
-
-  const char* word = in_word.c_str();
-
-  const char* q = strstr(word, "<query");
-  if (!q)
-    return slst;  // bad XML input
-  const char* q2 = strchr(q, '>');
-  if (!q2)
-    return slst;  // bad XML input
-  q2 = strstr(q2, "<word");
-  if (!q2)
-    return slst;  // bad XML input
-  if (check_xml_par(q, "type=", "analyze")) {
-    std::string cw = get_xml_par(strchr(q2, '>'));
-    if (!cw.empty())
-      slst = analyze(cw);
-    if (slst.empty())
-      return slst;
-    // convert the result to <code><a>ana1</a><a>ana2</a></code> format
-    std::string r;
-    r.append("<code>");
-    for (size_t i = 0; i < slst.size(); ++i) {
-      r.append("<a>");
-
-      std::string entry(slst[i]);
-      mystrrep(entry, "\t", " ");
-      mystrrep(entry, "&", "&amp;");
-      mystrrep(entry, "<", "&lt;");
-      r.append(entry);
-
-      r.append("</a>");
-    }
-    r.append("</code>");
-    slst.clear();
-    slst.push_back(r);
-    return slst;
-  } else if (check_xml_par(q, "type=", "stem")) {
-    std::string cw = get_xml_par(strchr(q2, '>'));
-    if (!cw.empty())
-      return stem(cw);
-  } else if (check_xml_par(q, "type=", "generate")) {
-    std::string cw = get_xml_par(strchr(q2, '>'));
-    if (cw.empty())
-      return slst;
-    const char* q3 = strstr(q2 + 1, "<word");
-    if (q3) {
-      std::string cw2 = get_xml_par(strchr(q3, '>'));
-      if (!cw2.empty()) {
-        return generate(cw, cw2);
-      }
-    } else {
-      if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
-        std::vector<std::string> slst2 = get_xml_list(strchr(q2, '>'), "<a>");
-        if (!slst2.empty()) {
-          slst = generate(cw, slst2);
-          uniqlist(slst);
-          return slst;
-        }
-      }
-    }
-  }
-  return slst;
-}
-
-int Hunspell::spell(const char* word, int* info, char** root) {
-  std::string sroot;
-  bool ret = m_Impl->spell(word, info, root ? &sroot : NULL);
-  if (root) {
-    if (sroot.empty()) {
-      *root = NULL;
-    } else {
-      *root = mystrdup(sroot.c_str());
-    }
-  }
-  return ret;
-}
-
-namespace {
-  int munge_vector(char*** slst, const std::vector<std::string>& items) {
-    if (items.empty()) {
-      *slst = NULL;
-      return 0;
-    } else {
-      *slst = (char**)malloc(sizeof(char*) * items.size());
-      if (!*slst)
-        return 0;
-      for (size_t i = 0; i < items.size(); ++i)
-        (*slst)[i] = mystrdup(items[i].c_str());
-    }
-    return items.size();
-  }
-}
-
-void Hunspell::free_list(char*** slst, int n) {
-  Hunspell_free_list((Hunhandle*)(this), slst, n);
-}
-
-int Hunspell::suggest(char*** slst, const char* word) {
-  return Hunspell_suggest((Hunhandle*)(this), slst, word);
-}
-
-int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
-  std::vector<std::string> stems = m_Impl->suffix_suggest(root_word);
-  return munge_vector(slst, stems);
-}
-
-char* Hunspell::get_dic_encoding() {
-  return &(m_Impl->dic_encoding_vec[0]);
-}
-
-int Hunspell::stem(char*** slst, char** desc, int n) {
-  return Hunspell_stem2((Hunhandle*)(this), slst, desc, n);
-}
-
-int Hunspell::stem(char*** slst, const char* word) {
-  return Hunspell_stem((Hunhandle*)(this), slst, word);
-}
-
-int Hunspell::analyze(char*** slst, const char* word) {
-  return Hunspell_analyze((Hunhandle*)(this), slst, word);
-}
-
-int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
-  return Hunspell_generate2((Hunhandle*)(this), slst, word, pl, pln);
-}
-
-int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
-  return Hunspell_generate((Hunhandle*)(this), slst, word, pattern);
-}
-
-Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
-  return (Hunhandle*)(new Hunspell(affpath, dpath));
-}
-
-Hunhandle* Hunspell_create_key(const char* affpath,
-                               const char* dpath,
-                               const char* key) {
-  return reinterpret_cast<Hunhandle*>(new Hunspell(affpath, dpath, key));
-}
-
-void Hunspell_destroy(Hunhandle* pHunspell) {
-  delete reinterpret_cast<Hunspell*>(pHunspell);
-}
-
-int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
-  return reinterpret_cast<Hunspell*>(pHunspell)->add_dic(dpath);
-}
-
-int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
-  return reinterpret_cast<Hunspell*>(pHunspell)->spell(std::string(word));
-}
-
-char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
-  return reinterpret_cast<Hunspell*>(pHunspell)->get_dic_encoding();
-}
-
-int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
-  std::vector<std::string> suggests = reinterpret_cast<Hunspell*>(pHunspell)->suggest(word);
-  return munge_vector(slst, suggests);
-}
-
-int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
-  std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->analyze(word);
-  return munge_vector(slst, stems);
-}
-
-int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
-
-  std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(word);
-  return munge_vector(slst, stems);
-}
-
-int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
-  std::vector<std::string> morph;
-  for (int i = 0; i < n; ++i)
-    morph.push_back(desc[i]);
-
-  std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(morph);
-  return munge_vector(slst, stems);
-}
-
-int Hunspell_generate(Hunhandle* pHunspell,
-                      char*** slst,
-                      const char* word,
-                      const char* pattern) {
-  std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, pattern);
-  return munge_vector(slst, stems);
-}
-
-int Hunspell_generate2(Hunhandle* pHunspell,
-                       char*** slst,
-                       const char* word,
-                       char** desc,
-                       int n) {
-  std::vector<std::string> morph;
-  for (int i = 0; i < n; ++i)
-    morph.push_back(desc[i]);
-
-  std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, morph);
-  return munge_vector(slst, stems);
-}
-
-/* functions for run-time modification of the dictionary */
-
-/* add word to the run-time dictionary */
-
-int Hunspell_add(Hunhandle* pHunspell, const char* word) {
-  return reinterpret_cast<Hunspell*>(pHunspell)->add(word);
-}
-
-/* add word to the run-time dictionary with affix flags of
- * the example (a dictionary word): Hunspell will recognize
- * affixed forms of the new word, too.
- */
-
-int Hunspell_add_with_affix(Hunhandle* pHunspell,
-                            const char* word,
-                            const char* example) {
-  return reinterpret_cast<Hunspell*>(pHunspell)->add_with_affix(word, example);
-}
-
-/* remove word from the run-time dictionary */
-
-int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
-  return reinterpret_cast<Hunspell*>(pHunspell)->remove(word);
-}
-
-void Hunspell_free_list(Hunhandle*, char*** list, int n) {
-  if (list && *list) {
-    for (int i = 0; i < n; i++)
-      free((*list)[i]);
-    free(*list);
-    *list = NULL;
-  }
-}
-
-std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
-  return m_Impl->suffix_suggest(root_word);
-}
-
-std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
-  std::vector<std::string> slst;
-  struct hentry* he = NULL;
-  int len;
-  std::string w2;
-  const char* word;
-  const char* ignoredchars = pAMgr->get_ignore();
-  if (ignoredchars != NULL) {
-    w2.assign(root_word);
-    if (utf8) {
-      const std::vector<w_char>& ignoredchars_utf16 =
-          pAMgr->get_ignore_utf16();
-      remove_ignored_chars_utf(w2, ignoredchars_utf16);
-    } else {
-      remove_ignored_chars(w2, ignoredchars);
-    }
-    word = w2.c_str();
-  } else
-    word = root_word.c_str();
-
-  len = strlen(word);
-
-  if (!len)
-    return slst;
-
-  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
-    he = m_HMgrs[i]->lookup(word);
-  }
-  if (he) {
-    slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
-  }
-  return slst;
-}
author	George Hazan <ghazan@miranda.im>	2018-03-10 13:56:24 +0300
committer	George Hazan <ghazan@miranda.im>	2018-03-10 13:56:24 +0300
commit	97a16a6e09df80ffa3429e23a0174bd8daaa29a1 (patch)
tree	bc9ec915bfbcdfef2b655aacd8b4d02a80731196 /libs/hunspell/src/hunspell.cxx
parent	cb2caccb52c4044937c0d9e8eda7ddeb1d115e85 (diff)