diff options
| -rw-r--r-- | libs/hunspell/src/affentry.c++ | 201 | ||||
| -rw-r--r-- | libs/hunspell/src/affentry.hxx | 8 | ||||
| -rw-r--r-- | libs/hunspell/src/affixmgr.c++ | 381 | ||||
| -rw-r--r-- | libs/hunspell/src/affixmgr.hxx | 24 | ||||
| -rw-r--r-- | libs/hunspell/src/atypes.hxx | 13 | ||||
| -rw-r--r-- | libs/hunspell/src/baseaffix.hxx | 2 | ||||
| -rw-r--r-- | libs/hunspell/src/config.h | 7 | ||||
| -rw-r--r-- | libs/hunspell/src/csutil.c++ | 520 | ||||
| -rw-r--r-- | libs/hunspell/src/csutil.hxx | 121 | ||||
| -rw-r--r-- | libs/hunspell/src/filemgr.c++ | 10 | ||||
| -rw-r--r-- | libs/hunspell/src/hashmgr.c++ | 193 | ||||
| -rw-r--r-- | libs/hunspell/src/hashmgr.hxx | 15 | ||||
| -rw-r--r-- | libs/hunspell/src/hunspell.c++ | 1554 | ||||
| -rw-r--r-- | libs/hunspell/src/hunspell.hxx | 52 | ||||
| -rw-r--r-- | libs/hunspell/src/phonet.c++ | 59 | ||||
| -rw-r--r-- | libs/hunspell/src/phonet.hxx | 6 | ||||
| -rw-r--r-- | libs/hunspell/src/replist.c++ | 20 | ||||
| -rw-r--r-- | libs/hunspell/src/replist.hxx | 4 | ||||
| -rw-r--r-- | libs/hunspell/src/suggestmgr.c++ | 812 | ||||
| -rw-r--r-- | libs/hunspell/src/suggestmgr.hxx | 7 | ||||
| -rw-r--r-- | libs/hunspell/src/w_char.hxx | 20 | ||||
| -rw-r--r-- | plugins/SpellChecker/src/Version.h | 4 | ||||
| -rw-r--r-- | plugins/SpellChecker/src/dictionary.cpp | 5 | 
23 files changed, 1481 insertions, 2557 deletions
diff --git a/libs/hunspell/src/affentry.c++ b/libs/hunspell/src/affentry.c++ index 983fe2c1ec..bd28274368 100644 --- a/libs/hunspell/src/affentry.c++ +++ b/libs/hunspell/src/affentry.c++ @@ -79,8 +79,6 @@  #include "affentry.hxx"  #include "csutil.hxx" -#define MAXTEMPWORDLEN (MAXWORDUTF8LEN + 4) -  PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)      // register affix manager      : pmyMgr(pmgr), @@ -117,11 +115,10 @@ PfxEntry::~PfxEntry() {  }  // add prefix to this word assuming conditions hold -char* PfxEntry::add(const char* word, int len) { +char* PfxEntry::add(const char* word, size_t len) {    if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&        (len >= numconds) && test_condition(word) && -      (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0)) && -      ((MAXTEMPWORDLEN) > (len + appnd.size() - strip.size()))) { +      (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0))) {      /* we have a match so add prefix */      std::string tword(appnd);      tword.append(word + strip.size()); @@ -233,26 +230,21 @@ struct hentry* PfxEntry::checkword(const char* word,                                     int len,                                     char in_compound,                                     const FLAG needflag) { -  int tmpl;           // length of tmpword    struct hentry* he;  // hash entry of root word or NULL -  char tmpword[MAXTEMPWORDLEN];    // on entry prefix is 0 length or already matches the beginning of the word.    // So if the remaining root word has positive length    // and if there are enough chars in root word and added back strip chars    // to meet the number of characters conditions, then test it -  tmpl = len - appnd.size(); +  int tmpl = len - appnd.size(); // length of tmpword    if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {      // generate new root word by removing prefix and adding      // back any characters that would have been stripped -    if (strip.size()) { -      strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); -      tmpword[MAXTEMPWORDLEN - 1] = '\0'; -    } -    strcpy((tmpword + strip.size()), (word + appnd.size())); +    std::string tmpword(strip); +    tmpword.append(word + appnd.size());      // now make sure all of the conditions on characters      // are met.  Please see the appendix at the end of @@ -262,9 +254,9 @@ struct hentry* PfxEntry::checkword(const char* word,      // if all conditions are met then check if resulting      // root word in the dictionary -    if (test_condition(tmpword)) { +    if (test_condition(tmpword.c_str())) {        tmpl += strip.size(); -      if ((he = pmyMgr->lookup(tmpword)) != NULL) { +      if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {          do {            if (TESTAFF(he->astr, aflag, he->alen) &&                // forbid single prefixes with needaffix flag @@ -283,8 +275,9 @@ struct hentry* PfxEntry::checkword(const char* word,        // if ((opts & aeXPRODUCT) && in_compound) {        if ((opts & aeXPRODUCT)) { -        he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, this, NULL, 0, -                                  NULL, FLAG_NULL, needflag, in_compound); +        he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, aeXPRODUCT, this, +                                  NULL, 0, NULL, FLAG_NULL, needflag, +                                  in_compound);          if (he)            return he;        } @@ -298,27 +291,22 @@ struct hentry* PfxEntry::check_twosfx(const char* word,                                        int len,                                        char in_compound,                                        const FLAG needflag) { -  int tmpl;           // length of tmpword    struct hentry* he;  // hash entry of root word or NULL -  char tmpword[MAXTEMPWORDLEN];    // on entry prefix is 0 length or already matches the beginning of the word.    // So if the remaining root word has positive length    // and if there are enough chars in root word and added back strip chars    // to meet the number of characters conditions, then test it -  tmpl = len - appnd.size(); +  int tmpl = len - appnd.size(); // length of tmpword    if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&        (tmpl + strip.size() >= numconds)) {      // generate new root word by removing prefix and adding      // back any characters that would have been stripped -    if (strip.size()) { -      strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); -      tmpword[MAXTEMPWORDLEN - 1] = '\0'; -    } -    strcpy((tmpword + strip.size()), (word + appnd.size())); +    std::string tmpword(strip); +    tmpword.append(word + appnd.size());      // now make sure all of the conditions on characters      // are met.  Please see the appendix at the end of @@ -328,7 +316,7 @@ struct hentry* PfxEntry::check_twosfx(const char* word,      // if all conditions are met then check if resulting      // root word in the dictionary -    if (test_condition(tmpword)) { +    if (test_condition(tmpword.c_str())) {        tmpl += strip.size();        // prefix matched but no root word was found @@ -336,7 +324,7 @@ struct hentry* PfxEntry::check_twosfx(const char* word,        // cross checked combined with a suffix        if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { -        he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, this, +        he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this,                                           needflag);          if (he)            return he; @@ -351,26 +339,20 @@ char* PfxEntry::check_twosfx_morph(const char* word,                                     int len,                                     char in_compound,                                     const FLAG needflag) { -  int tmpl;  // length of tmpword -  char tmpword[MAXTEMPWORDLEN]; -    // on entry prefix is 0 length or already matches the beginning of the word.    // So if the remaining root word has positive length    // and if there are enough chars in root word and added back strip chars    // to meet the number of characters conditions, then test it -  tmpl = len - appnd.size(); +  int tmpl = len - appnd.size(); // length of tmpword    if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&        (tmpl + strip.size() >= numconds)) {      // generate new root word by removing prefix and adding      // back any characters that would have been stripped -    if (strip.size()) { -      strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); -      tmpword[MAXTEMPWORDLEN - 1] = '\0'; -    } -    strcpy((tmpword + strip.size()), (word + appnd.size())); +    std::string tmpword(strip); +    tmpword.append(word + appnd.size());      // now make sure all of the conditions on characters      // are met.  Please see the appendix at the end of @@ -380,7 +362,7 @@ char* PfxEntry::check_twosfx_morph(const char* word,      // if all conditions are met then check if resulting      // root word in the dictionary -    if (test_condition(tmpword)) { +    if (test_condition(tmpword.c_str())) {        tmpl += strip.size();        // prefix matched but no root word was found @@ -388,7 +370,8 @@ char* PfxEntry::check_twosfx_morph(const char* word,        // ross checked combined with a suffix        if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { -        return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl, aeXPRODUCT, +        return pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl, +                                                 aeXPRODUCT,                                                   this, needflag);        }      } @@ -401,31 +384,23 @@ char* PfxEntry::check_morph(const char* word,                              int len,                              char in_compound,                              const FLAG needflag) { -  int tmpl;           // length of tmpword    struct hentry* he;  // hash entry of root word or NULL -  char tmpword[MAXTEMPWORDLEN]; -  char result[MAXLNLEN];    char* st; -  *result = '\0'; -    // on entry prefix is 0 length or already matches the beginning of the word.    // So if the remaining root word has positive length    // and if there are enough chars in root word and added back strip chars    // to meet the number of characters conditions, then test it -  tmpl = len - appnd.size(); +  int tmpl = len - appnd.size(); // length of tmpword    if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&        (tmpl + strip.size() >= numconds)) {      // generate new root word by removing prefix and adding      // back any characters that would have been stripped -    if (strip.size()) { -      strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); -      tmpword[MAXTEMPWORDLEN - 1] = '\0'; -    } -    strcpy(tmpword + strip.size(), word + appnd.size()); +    std::string tmpword(strip); +    tmpword.append(word + appnd.size());      // now make sure all of the conditions on characters      // are met.  Please see the appendix at the end of @@ -435,9 +410,11 @@ char* PfxEntry::check_morph(const char* word,      // if all conditions are met then check if resulting      // root word in the dictionary -    if (test_condition(tmpword)) { +    if (test_condition(tmpword.c_str())) { +      std::string result; +        tmpl += strip.size(); -      if ((he = pmyMgr->lookup(tmpword)) != NULL) { +      if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {          do {            if (TESTAFF(he->astr, aflag, he->alen) &&                // forbid single prefixes with needaffix flag @@ -446,28 +423,28 @@ char* PfxEntry::check_morph(const char* word,                ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||                 (contclass && TESTAFF(contclass, needflag, contclasslen)))) {              if (morphcode) { -              mystrcat(result, " ", MAXLNLEN); -              mystrcat(result, morphcode, MAXLNLEN); +              result.append(" "); +              result.append(morphcode);              } else -              mystrcat(result, getKey(), MAXLNLEN); +              result.append(getKey());              if (!HENTRY_FIND(he, MORPH_STEM)) { -              mystrcat(result, " ", MAXLNLEN); -              mystrcat(result, MORPH_STEM, MAXLNLEN); -              mystrcat(result, HENTRY_WORD(he), MAXLNLEN); +              result.append(" "); +              result.append(MORPH_STEM); +              result.append(HENTRY_WORD(he));              }              // store the pointer of the hash entry              if (HENTRY_DATA(he)) { -              mystrcat(result, " ", MAXLNLEN); -              mystrcat(result, HENTRY_DATA2(he), MAXLNLEN); +              result.append(" "); +              result.append(HENTRY_DATA2(he));              } else {                // return with debug information                char* flag = pmyMgr->encode_flag(getFlag()); -              mystrcat(result, " ", MAXLNLEN); -              mystrcat(result, MORPH_FLAG, MAXLNLEN); -              mystrcat(result, flag, MAXLNLEN); +              result.append(" "); +              result.append(MORPH_FLAG); +              result.append(flag);                free(flag);              } -            mystrcat(result, "\n", MAXLNLEN); +            result.append("\n");            }            he = he->next_homonym;          } while (he); @@ -478,18 +455,19 @@ char* PfxEntry::check_morph(const char* word,        // ross checked combined with a suffix        if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { -        st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, this, +        st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this,                                          FLAG_NULL, needflag);          if (st) { -          mystrcat(result, st, MAXLNLEN); +          result.append(st);            free(st);          }        } + +      if (!result.empty()) +        return mystrdup(result.c_str());      }    } -  if (*result) -    return mystrdup(result);    return NULL;  } @@ -516,7 +494,8 @@ SfxEntry::SfxEntry(AffixMgr* pmgr, affentry* dp)      c.l.conds2 = dp->c.l.conds2;    } else      memcpy(c.conds, dp->c.conds, MAXCONDLEN); -  rappnd = myrevstrdup(appnd.c_str()); +  rappnd = appnd; +  reverseword(rappnd);    morphcode = dp->morphcode;    contclass = dp->contclass;    contclasslen = dp->contclasslen; @@ -524,8 +503,6 @@ SfxEntry::SfxEntry(AffixMgr* pmgr, affentry* dp)  SfxEntry::~SfxEntry() {    aflag = 0; -  if (rappnd) -    free(rappnd);    pmyMgr = NULL;    if (opts & aeLONGCOND)      free(c.l.conds2); @@ -536,13 +513,12 @@ SfxEntry::~SfxEntry() {  }  // add suffix to this word assuming conditions hold -char* SfxEntry::add(const char* word, int len) { +char* SfxEntry::add(const char* word, size_t len) {    /* make sure all conditions match */    if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&        (len >= numconds) && test_condition(word + len, word) &&        (!strip.size() || -       (strcmp(word + len - strip.size(), strip.c_str()) == 0)) && -      ((MAXTEMPWORDLEN) > (len + appnd.size() - strip.size()))) { +       (strcmp(word + len - strip.size(), strip.c_str()) == 0))) {      std::string tword(word);      /* we have a match so add suffix */      tword.replace(len - strip.size(), std::string::npos, appnd); @@ -699,10 +675,7 @@ struct hentry* SfxEntry::checkword(const char* word,                                     const FLAG cclass,                                     const FLAG needflag,                                     const FLAG badflag) { -  int tmpl;           // length of tmpword    struct hentry* he;  // hash entry pointer -  unsigned char* cp; -  char tmpword[MAXTEMPWORDLEN];    PfxEntry* ep = ppfx;    // if this suffix is being cross checked with a prefix @@ -716,7 +689,7 @@ struct hentry* SfxEntry::checkword(const char* word,    // and if there are enough chars in root word and added back strip chars    // to meet the number of characters conditions, then test it -  tmpl = len - appnd.size(); +  int tmpl = len - appnd.size(); // length of tmpword    // the second condition is not enough for UTF-8 strings    // it checked in test_condition() @@ -726,15 +699,13 @@ struct hentry* SfxEntry::checkword(const char* word,      // back any characters that would have been stripped or      // or null terminating the shorter string -    strncpy(tmpword, word, MAXTEMPWORDLEN - 1); -    tmpword[MAXTEMPWORDLEN - 1] = '\0'; -    cp = (unsigned char*)(tmpword + tmpl); +    std::string tmpstring(word, tmpl);      if (strip.size()) { -      strcpy((char*)cp, strip.c_str()); -      tmpl += strip.size(); -      cp = (unsigned char*)(tmpword + tmpl); -    } else -      *cp = '\0'; +      tmpstring.append(strip); +    } + +    const char* tmpword = tmpstring.c_str(); +    const char* endword = tmpword + tmpstring.size();      // now make sure all of the conditions on characters      // are met.  Please see the appendix at the end of @@ -744,7 +715,7 @@ struct hentry* SfxEntry::checkword(const char* word,      // if all conditions are met then check if resulting      // root word in the dictionary -    if (test_condition((char*)cp, (char*)tmpword)) { +    if (test_condition(endword, tmpword)) {  #ifdef SZOSZABLYA_POSSIBLE_ROOTS        fprintf(stdout, "%s %s %c\n", word, tmpword, aflag);  #endif @@ -804,10 +775,7 @@ struct hentry* SfxEntry::check_twosfx(const char* word,                                        int optflags,                                        PfxEntry* ppfx,                                        const FLAG needflag) { -  int tmpl;           // length of tmpword    struct hentry* he;  // hash entry pointer -  unsigned char* cp; -  char tmpword[MAXTEMPWORDLEN];    PfxEntry* ep = ppfx;    // if this suffix is being cross checked with a prefix @@ -821,7 +789,7 @@ struct hentry* SfxEntry::check_twosfx(const char* word,    // and if there are enough chars in root word and added back strip chars    // to meet the number of characters conditions, then test it -  tmpl = len - appnd.size(); +  int tmpl = len - appnd.size(); // length of tmpword    if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&        (tmpl + strip.size() >= numconds)) { @@ -829,15 +797,13 @@ struct hentry* SfxEntry::check_twosfx(const char* word,      // back any characters that would have been stripped or      // or null terminating the shorter string -    strncpy(tmpword, word, MAXTEMPWORDLEN - 1); -    tmpword[MAXTEMPWORDLEN - 1] = '\0'; -    cp = (unsigned char*)(tmpword + tmpl); -    if (strip.size()) { -      strcpy((char*)cp, strip.c_str()); -      tmpl += strip.size(); -      cp = (unsigned char*)(tmpword + tmpl); -    } else -      *cp = '\0'; +    std::string tmpword(word); +    tmpword.resize(tmpl); +    tmpword.append(strip); +    tmpl += strip.size(); + +    const char* beg = tmpword.c_str(); +    const char* end = beg + tmpl;      // now make sure all of the conditions on characters      // are met.  Please see the appendix at the end of @@ -846,17 +812,17 @@ struct hentry* SfxEntry::check_twosfx(const char* word,      // if all conditions are met then recall suffix_check -    if (test_condition((char*)cp, (char*)tmpword)) { +    if (test_condition(end, beg)) {        if (ppfx) {          // handle conditional suffix          if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) -          he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, +          he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, NULL, 0, NULL,                                      (FLAG)aflag, needflag);          else -          he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0, +          he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx, NULL, 0,                                      NULL, (FLAG)aflag, needflag);        } else { -        he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, +        he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, NULL, 0, NULL,                                    (FLAG)aflag, needflag);        }        if (he) @@ -872,9 +838,6 @@ char* SfxEntry::check_twosfx_morph(const char* word,                                     int optflags,                                     PfxEntry* ppfx,                                     const FLAG needflag) { -  int tmpl;  // length of tmpword -  unsigned char* cp; -  char tmpword[MAXTEMPWORDLEN];    PfxEntry* ep = ppfx;    char* st; @@ -893,7 +856,7 @@ char* SfxEntry::check_twosfx_morph(const char* word,    // and if there are enough chars in root word and added back strip chars    // to meet the number of characters conditions, then test it -  tmpl = len - appnd.size(); +  int tmpl = len - appnd.size(); // length of tmpword    if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&        (tmpl + strip.size() >= numconds)) { @@ -901,15 +864,13 @@ char* SfxEntry::check_twosfx_morph(const char* word,      // back any characters that would have been stripped or      // or null terminating the shorter string -    strncpy(tmpword, word, MAXTEMPWORDLEN - 1); -    tmpword[MAXTEMPWORDLEN - 1] = '\0'; -    cp = (unsigned char*)(tmpword + tmpl); -    if (strip.size()) { -      strcpy((char*)cp, strip.c_str()); -      tmpl += strip.size(); -      cp = (unsigned char*)(tmpword + tmpl); -    } else -      *cp = '\0'; +    std::string tmpword(word); +    tmpword.resize(tmpl); +    tmpword.append(strip); +    tmpl += strip.size(); + +    const char* beg = tmpword.c_str(); +    const char* end = beg + tmpl;      // now make sure all of the conditions on characters      // are met.  Please see the appendix at the end of @@ -918,11 +879,11 @@ char* SfxEntry::check_twosfx_morph(const char* word,      // if all conditions are met then recall suffix_check -    if (test_condition((char*)cp, (char*)tmpword)) { +    if (test_condition(end, beg)) {        if (ppfx) {          // handle conditional suffix          if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) { -          st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, +          st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag,                                            needflag);            if (st) {              if (ppfx->getMorph()) { @@ -934,7 +895,7 @@ char* SfxEntry::check_twosfx_morph(const char* word,              mychomp(result);            }          } else { -          st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, +          st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag,                                            needflag);            if (st) {              mystrcat(result, st, MAXLNLEN); @@ -944,7 +905,7 @@ char* SfxEntry::check_twosfx_morph(const char* word,          }        } else {          st = -            pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag); +            pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag);          if (st) {            mystrcat(result, st, MAXLNLEN);            free(st); diff --git a/libs/hunspell/src/affentry.hxx b/libs/hunspell/src/affentry.hxx index f3db20013c..6311d83fff 100644 --- a/libs/hunspell/src/affentry.hxx +++ b/libs/hunspell/src/affentry.hxx @@ -122,7 +122,7 @@ class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry {    inline FLAG getFlag() { return aflag; }    inline const char* getKey() { return appnd.c_str(); } -  char* add(const char* word, int len); +  char* add(const char* word, size_t len);    inline short getKeyLen() { return appnd.size(); } @@ -154,7 +154,7 @@ class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry {   private:    AffixMgr* pmyMgr; -  char* rappnd; +  std::string rappnd;    SfxEntry* next;    SfxEntry* nexteq; @@ -200,8 +200,8 @@ class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry {                                    const FLAG needflag);    inline FLAG getFlag() { return aflag; } -  inline const char* getKey() { return rappnd; } -  char* add(const char* word, int len); +  inline const char* getKey() { return rappnd.c_str(); } +  char* add(const char* word, size_t len);    inline const char* getMorph() { return morphcode; } diff --git a/libs/hunspell/src/affixmgr.c++ b/libs/hunspell/src/affixmgr.c++ index d21ff49573..d6bb677982 100644 --- a/libs/hunspell/src/affixmgr.c++ +++ b/libs/hunspell/src/affixmgr.c++ @@ -72,13 +72,13 @@   */  #include <stdlib.h> -#include <string>  #include <string.h>  #include <stdio.h>  #include <ctype.h> +#include <algorithm>  #include <limits> - +#include <string>  #include <vector>  #include "affixmgr.hxx" @@ -152,11 +152,7 @@ AffixMgr::AffixMgr(const char* affpath,    cpdsyllablenum = NULL;      // syllable count incrementing flag    checknum = 0;               // checking numbers, and word with numbers    wordchars = NULL;           // letters + spec. word characters -  wordchars_utf16 = NULL;     // letters + spec. word characters -  wordchars_utf16_len = 0;    // letters + spec. word characters    ignorechars = NULL;         // letters + spec. word characters -  ignorechars_utf16 = NULL;   // letters + spec. word characters -  ignorechars_utf16_len = 0;  // letters + spec. word characters    version = NULL;             // affix and dictionary file version string    havecontclass = 0;  // flags of possible continuing classes (double affix)    // LEMMA_PRESENT: not put root into the morphological output. Lemma presents @@ -336,12 +332,8 @@ AffixMgr::~AffixMgr() {      free(lang);    if (wordchars)      free(wordchars); -  if (wordchars_utf16) -    free(wordchars_utf16);    if (ignorechars)      free(ignorechars); -  if (ignorechars_utf16) -    free(ignorechars_utf16);    if (version)      free(version);    checknum = 0; @@ -632,8 +624,8 @@ int AffixMgr::parse_file(const char* affpath, const char* key) {      /* parse in the extra word characters */      if (strncmp(line, "WORDCHARS", 9) == 0) { -      if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, -                      utf8, afflst->getlinenum())) { +      if (!parse_array(line, &wordchars, wordchars_utf16, +                       utf8, afflst->getlinenum())) {          finishFileMgr(afflst);          return 1;        } @@ -642,8 +634,8 @@ int AffixMgr::parse_file(const char* affpath, const char* key) {      /* parse in the ignored characters (for example, Arabic optional diacretics       * charachters */      if (strncmp(line, "IGNORE", 6) == 0) { -      if (parse_array(line, &ignorechars, &ignorechars_utf16, -                      &ignorechars_utf16_len, utf8, afflst->getlinenum())) { +      if (!parse_array(line, &ignorechars, ignorechars_utf16, +                       utf8, afflst->getlinenum())) {          finishFileMgr(afflst);          return 1;        } @@ -1174,7 +1166,7 @@ std::string& AffixMgr::debugflag(std::string& result, unsigned short flag) {  }  // calculate the character length of the condition -int AffixMgr::condlen(char* st) { +int AffixMgr::condlen(const char* st) {    int l = 0;    bool group = false;    for (; *st; st++) { @@ -1189,7 +1181,7 @@ int AffixMgr::condlen(char* st) {    return l;  } -int AffixMgr::encodeit(affentry& entry, char* cs) { +int AffixMgr::encodeit(affentry& entry, const char* cs) {    if (strcmp(cs, ".") != 0) {      entry.numconds = (char)condlen(cs);      // coverity[buffer_size_warning] - deliberate use of lack of end of conds @@ -1328,7 +1320,6 @@ char* AffixMgr::prefix_check_morph(const char* word,                                     int len,                                     char in_compound,                                     const FLAG needflag) { -  char* st;    char result[MAXLNLEN];    result[0] = '\0'; @@ -1340,7 +1331,7 @@ char* AffixMgr::prefix_check_morph(const char* word,    // first handle the special case of 0 length prefixes    PfxEntry* pe = pStart[0];    while (pe) { -    st = pe->check_morph(word, len, in_compound, needflag); +    char* st = pe->check_morph(word, len, in_compound, needflag);      if (st) {        mystrcat(result, st, MAXLNLEN);        free(st); @@ -1355,7 +1346,7 @@ char* AffixMgr::prefix_check_morph(const char* word,    while (pptr) {      if (isSubset(pptr->getKey(), word)) { -      st = pptr->check_morph(word, len, in_compound, needflag); +      char* st = pptr->check_morph(word, len, in_compound, needflag);        if (st) {          // fogemorpheme          if ((in_compound != IN_CPD_NOT) || @@ -1382,8 +1373,6 @@ char* AffixMgr::prefix_check_twosfx_morph(const char* word,                                            int len,                                            char in_compound,                                            const FLAG needflag) { -  char* st; -    char result[MAXLNLEN];    result[0] = '\0'; @@ -1394,7 +1383,7 @@ char* AffixMgr::prefix_check_twosfx_morph(const char* word,    // first handle the special case of 0 length prefixes    PfxEntry* pe = pStart[0];    while (pe) { -    st = pe->check_twosfx_morph(word, len, in_compound, needflag); +    char* st = pe->check_twosfx_morph(word, len, in_compound, needflag);      if (st) {        mystrcat(result, st, MAXLNLEN);        free(st); @@ -1408,7 +1397,7 @@ char* AffixMgr::prefix_check_twosfx_morph(const char* word,    while (pptr) {      if (isSubset(pptr->getKey(), word)) { -      st = pptr->check_twosfx_morph(word, len, in_compound, needflag); +      char* st = pptr->check_twosfx_morph(word, len, in_compound, needflag);        if (st) {          mystrcat(result, st, MAXLNLEN);          free(st); @@ -1427,13 +1416,12 @@ char* AffixMgr::prefix_check_twosfx_morph(const char* word,  // Is word a non compound with a REP substitution (see checkcompoundrep)?  int AffixMgr::cpdrep_check(const char* word, int wl) { -  const char* r;    if ((wl < 2) || !numrep)      return 0;    for (int i = 0; i < numrep; i++) { -    r = word; +    const char* r = word;      int lenp = strlen(reptable[i].pattern);      // search every occurence of the pattern in the word      while ((r = strstr(r, reptable[i].pattern)) != NULL) { @@ -1478,14 +1466,14 @@ int AffixMgr::cpdpat_check(const char* word,  // bounds  int AffixMgr::cpdcase_check(const char* word, int pos) {    if (utf8) { -    w_char u, w;      const char* p; -    u8_u16(&u, 1, word + pos);      for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--)        ; -    u8_u16(&w, 1, p); -    unsigned short a = (u.h << 8) + u.l; -    unsigned short b = (w.h << 8) + w.l; +    std::string pair(p); +    std::vector<w_char> pair_u; +    u8_u16(pair_u, pair); +    unsigned short a = pair_u.size() > 1 ? ((pair_u[1].h << 8) + pair_u[1].l) : 0; +    unsigned short b = !pair_u.empty() ? ((pair_u[0].h << 8) + pair_u[0].l) : 0;      if (((unicodetoupper(a, langnum) == a) ||           (unicodetoupper(b, langnum) == b)) &&          (a != '-') && (b != '-')) @@ -1499,20 +1487,18 @@ int AffixMgr::cpdcase_check(const char* word, int pos) {    return 0;  } +struct metachar_data { +  signed short btpp;  // metacharacter (*, ?) position for backtracking +  signed short btwp;  // word position for metacharacters +  int btnum;          // number of matched characters in metacharacter +}; +  // check compound patterns  int AffixMgr::defcpd_check(hentry*** words,                             short wnum,                             hentry* rv,                             hentry** def,                             char all) { -  signed short -      btpp[MAXWORDLEN];  // metacharacter (*, ?) positions for backtracking -  signed short btwp[MAXWORDLEN];  // word positions for metacharacters -  int btnum[MAXWORDLEN];  // number of matched characters in metacharacter -                          // positions -  short bt = 0; -  int i, j; -  int ok;    int w = 0;    if (!*words) { @@ -1524,6 +1510,11 @@ int AffixMgr::defcpd_check(hentry*** words,      return 0;    } +  std::vector<metachar_data> btinfo(1); + +  short bt = 0; +  int i, j; +    (*words)[wnum] = rv;    // has the last word COMPOUNDRULE flag? @@ -1533,7 +1524,7 @@ int AffixMgr::defcpd_check(hentry*** words,        *words = NULL;      return 0;    } -  ok = 0; +  int ok = 0;    for (i = 0; i < numdefcpd; i++) {      for (j = 0; j < defcpdtable[i].len; j++) {        if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' && @@ -1564,8 +1555,8 @@ int AffixMgr::defcpd_check(hentry*** words,            int wend = (defcpdtable[i].def[pp + 1] == '?') ? wp : wnum;            ok2 = 1;            pp += 2; -          btpp[bt] = pp; -          btwp[bt] = wp; +          btinfo[bt].btpp = pp; +          btinfo[bt].btwp = wp;            while (wp <= wend) {              if (!(*words)[wp]->alen ||                  !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp - 2], @@ -1577,9 +1568,11 @@ int AffixMgr::defcpd_check(hentry*** words,            }            if (wp <= wnum)              ok2 = 0; -          btnum[bt] = wp - btwp[bt]; -          if (btnum[bt] > 0) -            bt++; +          btinfo[bt].btnum = wp - btinfo[bt].btwp; +          if (btinfo[bt].btnum > 0) { +            ++bt; +            btinfo.resize(bt+1); +          }            if (ok2)              break;          } else { @@ -1609,10 +1602,10 @@ int AffixMgr::defcpd_check(hentry*** words,        if (bt)          do {            ok = 1; -          btnum[bt - 1]--; -          pp = btpp[bt - 1]; -          wp = btwp[bt - 1] + (signed short)btnum[bt - 1]; -        } while ((btnum[bt - 1] < 0) && --bt); +          btinfo[bt - 1].btnum--; +          pp = btinfo[bt - 1].btpp; +          wp = btinfo[bt - 1].btwp + (signed short)btinfo[bt - 1].btnum; +        } while ((btinfo[bt - 1].btnum < 0) && --bt);      } while (bt);      if (ok && ok2 && (!all || (defcpdtable[i].len <= pp))) @@ -1650,24 +1643,26 @@ inline int AffixMgr::candidate_check(const char* word, int len) {  }  // calculate number of syllable for compound-checking -short AffixMgr::get_syllable(const char* word, int wlen) { +short AffixMgr::get_syllable(const std::string& word) {    if (cpdmaxsyllable == 0)      return 0;    short num = 0;    if (!utf8) { -    for (int i = 0; i < wlen; i++) { +    for (size_t i = 0; i < word.size(); ++i) {        if (strchr(cpdvowels, word[i]))          num++;      }    } else if (cpdvowels_utf16) { -    w_char w[MAXWORDUTF8LEN]; -    int i = u8_u16(w, MAXWORDUTF8LEN, word); +    std::vector<w_char> w; +    int i = u8_u16(w, word);      for (; i > 0; i--) { -      if (flag_bsearch((unsigned short*)cpdvowels_utf16, -                       ((unsigned short*)w)[i - 1], cpdvowels_utf16_len)) -        num++; +      if (std::binary_search(cpdvowels_utf16, +                             cpdvowels_utf16 + cpdvowels_utf16_len, +                             w[i - 1])) { +        ++num; +      }      }    }    return num; @@ -1676,12 +1671,12 @@ short AffixMgr::get_syllable(const char* word, int wlen) {  void AffixMgr::setcminmax(int* cmin, int* cmax, const char* word, int len) {    if (utf8) {      int i; -    for (*cmin = 0, i = 0; (i < cpdmin) && word[*cmin]; i++) { -      for ((*cmin)++; (word[*cmin] & 0xc0) == 0x80; (*cmin)++) +    for (*cmin = 0, i = 0; (i < cpdmin) && *cmin < len; i++) { +      for ((*cmin)++; *cmin < len && (word[*cmin] & 0xc0) == 0x80; (*cmin)++)          ;      } -    for (*cmax = len, i = 0; (i < (cpdmin - 1)) && *cmax; i++) { -      for ((*cmax)--; (word[*cmax] & 0xc0) == 0x80; (*cmax)--) +    for (*cmax = len, i = 0; (i < (cpdmin - 1)) && *cmax >= 0; i++) { +      for ((*cmax)--; *cmax >= 0 && (word[*cmax] & 0xc0) == 0x80; (*cmax)--)          ;      }    } else { @@ -1699,6 +1694,7 @@ struct hentry* AffixMgr::compound_check(const char* word,                                          short maxwordnum,                                          short wnum,                                          hentry** words = NULL, +                                        hentry** rwords = NULL,                                          char hu_mov_rule = 0,                                          char is_sug = 0,                                          int* info = NULL) { @@ -1706,8 +1702,7 @@ struct hentry* AffixMgr::compound_check(const char* word,    short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;    struct hentry* rv = NULL;    struct hentry* rv_first; -  struct hentry* rwords[MAXWORDLEN];  // buffer for COMPOUND pattern checking -  char st[MAXWORDUTF8LEN + 4]; +  std::string st;    char ch = '\0';    int cmin;    int cmax; @@ -1726,7 +1721,7 @@ struct hentry* AffixMgr::compound_check(const char* word,    setcminmax(&cmin, &cmax, word, len); -  strcpy(st, word); +  st.assign(word);    for (i = cmin; i < cmax; i++) {      // go to end of the UTF-8 character @@ -1758,11 +1753,11 @@ struct hentry* AffixMgr::compound_check(const char* word,            if (scpd > numcheckcpd)              break;  // break simplified checkcompoundpattern loop -          strcpy(st + i, checkcpdtable[scpd - 1].pattern); +          st.replace(i, std::string::npos, checkcpdtable[scpd - 1].pattern);            soldi = i;            i += strlen(checkcpdtable[scpd - 1].pattern); -          strcpy(st + i, checkcpdtable[scpd - 1].pattern2); -          strcpy(st + i + strlen(checkcpdtable[scpd - 1].pattern2), +          st.replace(i, std::string::npos, checkcpdtable[scpd - 1].pattern2); +          st.replace(i + strlen(checkcpdtable[scpd - 1].pattern2), std::string::npos,                   word + soldi + strlen(checkcpdtable[scpd - 1].pattern3));            oldlen = len; @@ -1771,7 +1766,7 @@ struct hentry* AffixMgr::compound_check(const char* word,                   strlen(checkcpdtable[scpd - 1].pattern3);            oldcmin = cmin;            oldcmax = cmax; -          setcminmax(&cmin, &cmax, st, len); +          setcminmax(&cmin, &cmax, st.c_str(), len);            cmax = len - cpdmin + 1;          } @@ -1785,7 +1780,7 @@ struct hentry* AffixMgr::compound_check(const char* word,          // FIRST WORD          affixed = 1; -        rv = lookup(st);  // perhaps without prefix +        rv = lookup(st.c_str());  // perhaps without prefix          // search homonym with compound flag          while ((rv) && !hu_mov_rule && @@ -1798,9 +1793,9 @@ struct hentry* AffixMgr::compound_check(const char* word,                     TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||                    (numdefcpd && onlycpdrule &&                     ((!words && !wordnum && -                     defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0)) || +                     defcpd_check(&words, wnum, rv, rwords, 0)) ||                      (words && -                     defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0))))) || +                     defcpd_check(&words, wnum, rv, rwords, 0))))) ||                  (scpd != 0 && checkcpdtable[scpd - 1].cond != FLAG_NULL &&                   !TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)))) {            rv = rv->next_homonym; @@ -1813,14 +1808,14 @@ struct hentry* AffixMgr::compound_check(const char* word,            if (onlycpdrule)              break;            if (compoundflag && -              !(rv = prefix_check(st, i, +              !(rv = prefix_check(st.c_str(), i,                                    hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,                                    compoundflag))) {              if (((rv = suffix_check( -                      st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundflag, +                      st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundflag,                        hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||                   (compoundmoresuffixes && -                  (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && +                  (rv = suffix_check_twosfx(st.c_str(), i, 0, NULL, compoundflag)))) &&                  !hu_mov_rule && sfx->getCont() &&                  ((compoundforbidflag &&                    TESTAFF(sfx->getCont(), compoundforbidflag, @@ -1834,24 +1829,24 @@ struct hentry* AffixMgr::compound_check(const char* word,            if (rv ||                (((wordnum == 0) && compoundbegin &&                  ((rv = suffix_check( -                      st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, +                      st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin,                        hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||                   (compoundmoresuffixes &&                    (rv = suffix_check_twosfx( -                       st, i, 0, NULL, +                       st.c_str(), i, 0, NULL,                         compoundbegin))) ||  // twofold suffixes + compound -                 (rv = prefix_check(st, i, +                 (rv = prefix_check(st.c_str(), i,                                      hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,                                      compoundbegin)))) ||                 ((wordnum > 0) && compoundmiddle &&                  ((rv = suffix_check( -                      st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, +                      st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle,                        hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||                   (compoundmoresuffixes &&                    (rv = suffix_check_twosfx( -                       st, i, 0, NULL, +                       st.c_str(), i, 0, NULL,                         compoundmiddle))) ||  // twofold suffixes + compound -                 (rv = prefix_check(st, i, +                 (rv = prefix_check(st.c_str(), i,                                      hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,                                      compoundmiddle))))))              checked_prefix = 1; @@ -1942,7 +1937,7 @@ struct hentry* AffixMgr::compound_check(const char* word,                  cpdcase_check(word, i))))              // LANG_hu section: spec. Hungarian rule              || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && -                (rv = affix_check(st, i)) && +                (rv = affix_check(st.c_str(), i)) &&                  (sfx && sfx->getCont() &&                   (  // XXX hardwired Hungarian dic. codes                       TESTAFF(sfx->getCont(), (unsigned short)'x', @@ -1954,10 +1949,10 @@ struct hentry* AffixMgr::compound_check(const char* word,            // LANG_hu section: spec. Hungarian rule            if (langnum == LANG_hu) {              // calculate syllable number of the word -            numsyllable += get_syllable(st, i); +            numsyllable += get_syllable(st.substr(i));              // + 1 word, if syllable number of the prefix > 1 (hungarian              // convention) -            if (pfx && (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) +            if (pfx && (get_syllable(pfx->getKey()) > 1))                wordnum++;            }            // END of LANG_hu section @@ -1977,7 +1972,7 @@ struct hentry* AffixMgr::compound_check(const char* word,                  striple = 1;              } -            rv = lookup((st + i));  // perhaps without prefix +            rv = lookup(st.c_str() + i);  // perhaps without prefix              // search homonym with compound flag              while ((rv) && @@ -2039,7 +2034,7 @@ struct hentry* AffixMgr::compound_check(const char* word,                   (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) &&                  (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||                   ((cpdmaxsyllable != 0) && -                  (numsyllable + get_syllable(HENTRY_WORD(rv), rv->clen) <= +                  (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->clen)) <=                     cpdmaxsyllable))) &&                  (                      // test CHECKCOMPOUNDPATTERN @@ -2123,20 +2118,19 @@ struct hentry* AffixMgr::compound_check(const char* word,              if (langnum == LANG_hu) {                // calculate syllable number of the word -              numsyllable += get_syllable(word + i, strlen(word + i)); +              numsyllable += get_syllable(word + i);                // - affix syllable num.                // XXX only second suffix (inflections, not derivations)                if (sfxappnd) { -                char* tmp = myrevstrdup(sfxappnd); -                numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra; -                free(tmp); +                std::string tmp(sfxappnd); +                reverseword(tmp); +                numsyllable -= get_syllable(tmp) + sfxextra;                }                // + 1 word, if syllable number of the prefix > 1 (hungarian                // convention) -              if (pfx && -                  (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) +              if (pfx && (get_syllable(pfx->getKey()) > 1))                  wordnum++;                // increment syllable num, if last word has a SYLLABLENUM flag @@ -2187,8 +2181,8 @@ struct hentry* AffixMgr::compound_check(const char* word,              // perhaps second word is a compound word (recursive call)              if (wordnum < maxwordnum) { -              rv = compound_check((st + i), strlen(st + i), wordnum + 1, -                                  numsyllable, maxwordnum, wnum + 1, words, 0, +              rv = compound_check(st.c_str() + i, strlen(st.c_str() + i), wordnum + 1, +                                  numsyllable, maxwordnum, wnum + 1, words, rwords, 0,                                    is_sug, info);                if (rv && numcheckcpd && @@ -2211,11 +2205,11 @@ struct hentry* AffixMgr::compound_check(const char* word,                  // check first part                  if (strncmp(rv->word, word + i, rv->blen) == 0) { -                  char r = *(st + i + rv->blen); -                  *(st + i + rv->blen) = '\0'; +                  char r = st[i + rv->blen]; +                  st[i + rv->blen] = '\0'; -                  if (checkcompoundrep && cpdrep_check(st, i + rv->blen)) { -                    *(st + i + rv->blen) = r; +                  if (checkcompoundrep && cpdrep_check(st.c_str(), i + rv->blen)) { +                    st[ + i + rv->blen] = r;                      continue;                    } @@ -2225,11 +2219,11 @@ struct hentry* AffixMgr::compound_check(const char* word,                        rv2 = affix_check(word, len);                      if (rv2 && rv2->astr &&                          TESTAFF(rv2->astr, forbiddenword, rv2->alen) && -                        (strncmp(rv2->word, st, i + rv->blen) == 0)) { +                        (strncmp(rv2->word, st.c_str(), i + rv->blen) == 0)) {                        return NULL;                      }                    } -                  *(st + i + rv->blen) = r; +                  st[i + rv->blen] = r;                  }                }                return rv_first; @@ -2262,7 +2256,7 @@ struct hentry* AffixMgr::compound_check(const char* word,        if (soldi != 0) {          i = soldi; -        strcpy(st, word);  // XXX add more optim. +        st.assign(word);  // XXX add more optim.          soldi = 0;        } else          st[i] = ch; @@ -2283,6 +2277,7 @@ int AffixMgr::compound_check_morph(const char* word,                                     short maxwordnum,                                     short wnum,                                     hentry** words, +                                   hentry** rwords,                                     char hu_mov_rule = 0,                                     char** result = NULL,                                     char* partresult = NULL) { @@ -2292,8 +2287,7 @@ int AffixMgr::compound_check_morph(const char* word,    struct hentry* rv = NULL;    struct hentry* rv_first; -  struct hentry* rwords[MAXWORDLEN];  // buffer for COMPOUND pattern checking -  char st[MAXWORDUTF8LEN + 4]; +  std::string st;    char ch;    int checked_prefix; @@ -2308,7 +2302,7 @@ int AffixMgr::compound_check_morph(const char* word,    setcminmax(&cmin, &cmax, word, len); -  strcpy(st, word); +  st.assign(word);    for (i = cmin; i < cmax; i++) {      // go to end of the UTF-8 character @@ -2340,7 +2334,7 @@ int AffixMgr::compound_check_morph(const char* word,        if (partresult)          mystrcat(presult, partresult, MAXLNLEN); -      rv = lookup(st);  // perhaps without prefix +      rv = lookup(st.c_str());  // perhaps without prefix        // search homonym with compound flag        while ((rv) && !hu_mov_rule && @@ -2353,9 +2347,9 @@ int AffixMgr::compound_check_morph(const char* word,                   TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||                  (numdefcpd && onlycpdrule &&                   ((!words && !wordnum && -                   defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0)) || +                   defcpd_check(&words, wnum, rv, rwords, 0)) ||                    (words && -                   defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0))))))) { +                   defcpd_check(&words, wnum, rv, rwords, 0))))))) {          rv = rv->next_homonym;        } @@ -2363,10 +2357,10 @@ int AffixMgr::compound_check_morph(const char* word,          affixed = 0;        if (rv) { -        sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_PART, st); +        sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_PART, st.c_str());          if (!HENTRY_FIND(rv, MORPH_STEM)) {            sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_STEM, -                  st); +                  st.c_str());          }          // store the pointer of the hash entry          //            sprintf(presult + strlen(presult), "%c%s%p", MSEP_FLD, @@ -2382,13 +2376,13 @@ int AffixMgr::compound_check_morph(const char* word,            break;          if (compoundflag &&              !(rv = -                  prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, +                  prefix_check(st.c_str(), i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,                                 compoundflag))) { -          if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, +          if (((rv = suffix_check(st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL,                                    compoundflag,                                    hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||                 (compoundmoresuffixes && -                (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && +                (rv = suffix_check_twosfx(st.c_str(), i, 0, NULL, compoundflag)))) &&                !hu_mov_rule && sfx->getCont() &&                ((compoundforbidflag &&                  TESTAFF(sfx->getCont(), compoundforbidflag, @@ -2401,44 +2395,44 @@ int AffixMgr::compound_check_morph(const char* word,          if (rv ||              (((wordnum == 0) && compoundbegin && -              ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, +              ((rv = suffix_check(st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL,                                    compoundbegin,                                    hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||                 (compoundmoresuffixes &&                  (rv = suffix_check_twosfx( -                     st, i, 0, NULL, +                     st.c_str(), i, 0, NULL,                       compoundbegin))) ||  // twofold suffix+compound -               (rv = prefix_check(st, i, +               (rv = prefix_check(st.c_str(), i,                                    hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,                                    compoundbegin)))) ||               ((wordnum > 0) && compoundmiddle && -              ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, +              ((rv = suffix_check(st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL,                                    compoundmiddle,                                    hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||                 (compoundmoresuffixes &&                  (rv = suffix_check_twosfx( -                     st, i, 0, NULL, +                     st.c_str(), i, 0, NULL,                       compoundmiddle))) ||  // twofold suffix+compound -               (rv = prefix_check(st, i, +               (rv = prefix_check(st.c_str(), i,                                    hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,                                    compoundmiddle)))))) {            // char * p = prefix_check_morph(st, i, 0, compound);            char* p = NULL;            if (compoundflag) -            p = affix_check_morph(st, i, compoundflag); +            p = affix_check_morph(st.c_str(), i, compoundflag);            if (!p || (*p == '\0')) {              if (p)                free(p);              p = NULL;              if ((wordnum == 0) && compoundbegin) { -              p = affix_check_morph(st, i, compoundbegin); +              p = affix_check_morph(st.c_str(), i, compoundbegin);              } else if ((wordnum > 0) && compoundmiddle) { -              p = affix_check_morph(st, i, compoundmiddle); +              p = affix_check_morph(st.c_str(), i, compoundmiddle);              }            }            if (p && (*p != '\0')) {              sprintf(presult + strlen(presult), "%c%s%s%s", MSEP_FLD, MORPH_PART, -                    st, line_uniq_app(&p, MSEP_REC)); +                    st.c_str(), line_uniq_app(&p, MSEP_REC));            }            if (p)              free(p); @@ -2519,7 +2513,7 @@ int AffixMgr::compound_check_morph(const char* word,            // LANG_hu section: spec. Hungarian rule            ||            ((!rv) && (langnum == LANG_hu) && hu_mov_rule && -           (rv = affix_check(st, i)) && +           (rv = affix_check(st.c_str(), i)) &&             (sfx && sfx->getCont() &&              (TESTAFF(sfx->getCont(), (unsigned short)'x', sfx->getContLen()) ||               TESTAFF(sfx->getCont(), (unsigned short)'%', sfx->getContLen())))) @@ -2528,11 +2522,11 @@ int AffixMgr::compound_check_morph(const char* word,          // LANG_hu section: spec. Hungarian rule          if (langnum == LANG_hu) {            // calculate syllable number of the word -          numsyllable += get_syllable(st, i); +          numsyllable += get_syllable(st.substr(i));            // + 1 word, if syllable number of the prefix > 1 (hungarian            // convention) -          if (pfx && (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) +          if (pfx && (get_syllable(pfx->getKey()) > 1))              wordnum++;          }          // END of LANG_hu section @@ -2608,7 +2602,7 @@ int AffixMgr::compound_check_morph(const char* word,               (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) &&              (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||               ((cpdmaxsyllable != 0) && -              (numsyllable + get_syllable(HENTRY_WORD(rv), rv->blen) <= +              (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <=                 cpdmaxsyllable))) &&              ((!checkcompounddup || (rv != rv_first)))) {            // bad compound word @@ -2701,19 +2695,19 @@ int AffixMgr::compound_check_morph(const char* word,          if (langnum == LANG_hu) {            // calculate syllable number of the word -          numsyllable += get_syllable(word + i, strlen(word + i)); +          numsyllable += get_syllable(word + i);            // - affix syllable num.            // XXX only second suffix (inflections, not derivations)            if (sfxappnd) { -            char* tmp = myrevstrdup(sfxappnd); -            numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra; -            free(tmp); +            std::string tmp(sfxappnd); +            reverseword(tmp); +            numsyllable -= get_syllable(tmp) + sfxextra;            }            // + 1 word, if syllable number of the prefix > 1 (hungarian            // convention) -          if (pfx && (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) +          if (pfx && (get_syllable(pfx->getKey()) > 1))              wordnum++;            // increment syllable num, if last word has a SYLLABLENUM flag @@ -2779,7 +2773,7 @@ int AffixMgr::compound_check_morph(const char* word,          // perhaps second word is a compound word (recursive call)          if ((wordnum < maxwordnum) && (ok == 0)) {            compound_check_morph((word + i), strlen(word + i), wordnum + 1, -                               numsyllable, maxwordnum, wnum + 1, words, 0, +                               numsyllable, maxwordnum, wnum + 1, words, rwords, 0,                                 result, presult);          } else {            rv = NULL; @@ -2795,6 +2789,7 @@ int AffixMgr::compound_check_morph(const char* word,    return 0;  } +  // return 1 if s1 (reversed) is a leading subset of end of s2  /* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int   len) @@ -3402,7 +3397,7 @@ int AffixMgr::expand_rootword(struct guessword* wlst,                                unsigned short al,                                const char* bad,                                int badl, -                              char* phon) { +                              const char* phon) {    int nh = 0;    // first add root word to list    if ((nh < maxn) && @@ -3653,8 +3648,7 @@ char* AffixMgr::get_ignore() const {  }  // return the preferred ignore string for suggestions -unsigned short* AffixMgr::get_ignore_utf16(int* len) const { -  *len = ignorechars_utf16_len; +const std::vector<w_char>& AffixMgr::get_ignore_utf16() const {    return ignorechars_utf16;  } @@ -3677,8 +3671,7 @@ const char* AffixMgr::get_wordchars() const {    return wordchars;  } -unsigned short* AffixMgr::get_wordchars_utf16(int* len) const { -  *len = wordchars_utf16_len; +const std::vector<w_char>& AffixMgr::get_wordchars_utf16() const {    return wordchars_utf16;  } @@ -3840,7 +3833,6 @@ int AffixMgr::parse_cpdsyllable(char* line, FileMgr* af) {    char* piece;    int i = 0;    int np = 0; -  w_char w[MAXWORDLEN];    piece = mystrsep(&tp, 0);    while (piece) {      if (*piece != '\0') { @@ -3858,15 +3850,16 @@ int AffixMgr::parse_cpdsyllable(char* line, FileMgr* af) {            if (!utf8) {              cpdvowels = mystrdup(piece);            } else { -            int n = u8_u16(w, MAXWORDLEN, piece); -            if (n > 0) { -              flag_qsort((unsigned short*)w, 0, n); -              cpdvowels_utf16 = (w_char*)malloc(n * sizeof(w_char)); +            std::vector<w_char> w; +            u8_u16(w, piece); +            if (!w.empty()) { +              std::sort(w.begin(), w.end()); +              cpdvowels_utf16 = (w_char*)malloc(w.size() * sizeof(w_char));                if (!cpdvowels_utf16)                  return 1; -              memcpy(cpdvowels_utf16, w, n * sizeof(w_char)); +              memcpy(cpdvowels_utf16, &w[0], w.size());              } -            cpdvowels_utf16_len = n; +            cpdvowels_utf16_len = w.size();            }            np++;            break; @@ -4636,13 +4629,16 @@ int AffixMgr::parse_breaktable(char* line, FileMgr* af) {    return 0;  } -void AffixMgr::reverse_condition(char* piece) { +void AffixMgr::reverse_condition(std::string& piece) { +  if (piece.empty()) +      return; +    int neg = 0; -  for (char* k = piece + strlen(piece) - 1; k >= piece; k--) { +  for (std::string::reverse_iterator k = piece.rbegin(); k != piece.rend(); ++k) {      switch (*k) {        case '[': {          if (neg) -          *(k + 1) = '['; +          *(k - 1) = '[';          else            *k = ']';          break; @@ -4650,20 +4646,20 @@ void AffixMgr::reverse_condition(char* piece) {        case ']': {          *k = '[';          if (neg) -          *(k + 1) = '^'; +          *(k - 1) = '^';          neg = 0;          break;        }        case '^': { -        if (*(k + 1) == ']') +        if (*(k - 1) == ']')            neg = 1;          else -          *(k + 1) = *k; +          *(k - 1) = *k;          break;        }        default: {          if (neg) -          *(k + 1) = *k; +          *(k - 1) = *k;        }      }    } @@ -4731,8 +4727,8 @@ int AffixMgr::parse_affix(char* line,          case 3: {            np++;            numents = atoi(piece); -          if ((numents <= 0) || ((::std::numeric_limits<size_t>::max() / -                                  sizeof(struct affentry)) < numents)) { +          if ((numents <= 0) || ((std::numeric_limits<size_t>::max() / +                                  sizeof(struct affentry)) < static_cast<size_t>(numents))) {              char* err = pHMgr->encode_flag(aflag);              if (err) {                HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", @@ -4817,18 +4813,15 @@ int AffixMgr::parse_affix(char* line,            // piece 3 - is string to strip or 0 for null            case 2: {              np++; +            entry->strip = piece;              if (complexprefixes) {                if (utf8) -                reverseword_utf(piece); +                reverseword_utf(entry->strip);                else -                reverseword(piece); +                reverseword(entry->strip);              } -            entry->strip = mystrdup(piece); -            entry->stripl = (unsigned char)strlen(entry->strip); -            if (strcmp(entry->strip, "0") == 0) { -              free(entry->strip); -              entry->strip = mystrdup(""); -              entry->stripl = 0; +            if (entry->strip.compare("0") == 0) { +              entry->strip.clear();              }              break;            } @@ -4844,22 +4837,22 @@ int AffixMgr::parse_affix(char* line,              if (dash) {                *dash = '\0'; +              entry->appnd = piece; +                if (ignorechars) {                  if (utf8) { -                  remove_ignored_chars_utf(piece, ignorechars_utf16, -                                           ignorechars_utf16_len); +                  remove_ignored_chars_utf(entry->appnd, ignorechars_utf16);                  } else { -                  remove_ignored_chars(piece, ignorechars); +                  remove_ignored_chars(entry->appnd, ignorechars);                  }                }                if (complexprefixes) {                  if (utf8) -                  reverseword_utf(piece); +                  reverseword_utf(entry->appnd);                  else -                  reverseword(piece); +                  reverseword(entry->appnd);                } -              entry->appnd = mystrdup(piece);                if (pHMgr->is_aliasf()) {                  int index = atoi(dash + 1); @@ -4872,7 +4865,7 @@ int AffixMgr::parse_affix(char* line,                } else {                  entry->contclasslen = (unsigned short)pHMgr->decode_flags(                      &(entry->contclass), dash + 1, af); -                flag_qsort(entry->contclass, 0, entry->contclasslen); +                std::sort(entry->contclass, entry->contclass + entry->contclasslen);                }                *dash = '/'; @@ -4881,74 +4874,74 @@ int AffixMgr::parse_affix(char* line,                  contclasses[(entry->contclass)[_i]] = 1;                }              } else { +              entry->appnd = piece; +                if (ignorechars) {                  if (utf8) { -                  remove_ignored_chars_utf(piece, ignorechars_utf16, -                                           ignorechars_utf16_len); +                  remove_ignored_chars_utf(entry->appnd, ignorechars_utf16);                  } else { -                  remove_ignored_chars(piece, ignorechars); +                  remove_ignored_chars(entry->appnd, ignorechars);                  }                }                if (complexprefixes) {                  if (utf8) -                  reverseword_utf(piece); +                  reverseword_utf(entry->appnd);                  else -                  reverseword(piece); +                  reverseword(entry->appnd);                } -              entry->appnd = mystrdup(piece);              } -            entry->appndl = (unsigned char)strlen(entry->appnd); -            if (strcmp(entry->appnd, "0") == 0) { -              free(entry->appnd); -              entry->appnd = mystrdup(""); -              entry->appndl = 0; +            if (entry->appnd.compare("0") == 0) { +              entry->appnd.clear();              }              break;            }            // piece 5 - is the conditions descriptions            case 4: { +            std::string chunk(piece);              np++;              if (complexprefixes) {                if (utf8) -                reverseword_utf(piece); +                reverseword_utf(chunk);                else -                reverseword(piece); -              reverse_condition(piece); +                reverseword(chunk); +              reverse_condition(chunk);              } -            if (entry->stripl && (strcmp(piece, ".") != 0) && -                redundant_condition(at, entry->strip, entry->stripl, piece, +            if (!entry->strip.empty() && chunk != "." && +                redundant_condition(at, entry->strip.c_str(), entry->strip.size(), chunk.c_str(),                                      af->getlinenum())) -              strcpy(piece, "."); +              chunk = ".";              if (at == 'S') { -              reverseword(piece); -              reverse_condition(piece); +              reverseword(chunk); +              reverse_condition(chunk);              } -            if (encodeit(*entry, piece)) +            if (encodeit(*entry, chunk.c_str()))                return 1;              break;            }            case 5: { +            std::string chunk(piece);              np++;              if (pHMgr->is_aliasm()) { -              int index = atoi(piece); +              int index = atoi(chunk.c_str());                entry->morphcode = pHMgr->get_aliasm(index);              } else {                if (complexprefixes) {  // XXX - fix me for morph. gen.                  if (utf8) -                  reverseword_utf(piece); +                  reverseword_utf(chunk);                  else -                  reverseword(piece); +                  reverseword(chunk);                }                // add the remaining of the line                if (*tp) {                  *(tp - 1) = ' '; -                tp = tp + strlen(tp); +                chunk.push_back(' '); +                chunk.append(tp);                } -              entry->morphcode = mystrdup(piece); +              entry->morphcode = mystrdup(chunk.c_str());                if (!entry->morphcode)                  return 1;              } @@ -5002,7 +4995,7 @@ int AffixMgr::parse_affix(char* line,  }  int AffixMgr::redundant_condition(char ft, -                                  char* strip, +                                  const char* strip,                                    int stripl,                                    const char* cond,                                    int linenum) { @@ -5112,11 +5105,7 @@ int AffixMgr::get_suffix_words(short unsigned* suff,            hentry* ht = ptr->checkword(nw.c_str(), nw.size(), 0, NULL, NULL, 0,                                        NULL, 0, 0, 0);            if (ht) { -            slst[suff_words_cnt] = (char*)malloc(MAXWORDUTF8LEN * sizeof(char)); -            if (slst[suff_words_cnt]) { -              strcpy(slst[suff_words_cnt], nw.c_str()); -              suff_words_cnt++; -            } +            slst[suff_words_cnt++] = mystrdup(nw.c_str());            }          }          suff++; diff --git a/libs/hunspell/src/affixmgr.hxx b/libs/hunspell/src/affixmgr.hxx index ca376953cd..d70e853388 100644 --- a/libs/hunspell/src/affixmgr.hxx +++ b/libs/hunspell/src/affixmgr.hxx @@ -160,11 +160,9 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr {    PfxEntry* pfx;         // BUG: not stateless    int checknum;    char* wordchars; -  unsigned short* wordchars_utf16; -  int wordchars_utf16_len; +  std::vector<w_char> wordchars_utf16;    char* ignorechars; -  unsigned short* ignorechars_utf16; -  int ignorechars_utf16_len; +  std::vector<w_char> ignorechars_utf16;    char* version;    char* lang;    int langnum; @@ -258,9 +256,9 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr {                        unsigned short al,                        const char* bad,                        int, -                      char*); +                      const char*); -  short get_syllable(const char* word, int wlen); +  short get_syllable(const std::string& word);    int cpdrep_check(const char* word, int len);    int cpdpat_check(const char* word,                     int len, @@ -282,6 +280,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr {                                  short maxwordnum,                                  short wnum,                                  hentry** words, +                                hentry** rwords,                                  char hu_mov_rule,                                  char is_sug,                                  int* info); @@ -293,6 +292,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr {                             short maxwordnum,                             short wnum,                             hentry** words, +                           hentry** rwords,                             char hu_mov_rule,                             char** result,                             char* partresult); @@ -317,9 +317,9 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr {    char* get_key_string();    char* get_try_string() const;    const char* get_wordchars() const; -  unsigned short* get_wordchars_utf16(int* len) const; +  const std::vector<w_char>& get_wordchars_utf16() const;    char* get_ignore() const; -  unsigned short* get_ignore_utf16(int* len) const; +  const std::vector<w_char>& get_ignore_utf16() const;    int get_compound() const;    FLAG get_compoundflag() const;    FLAG get_compoundbegin() const; @@ -370,11 +370,11 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr {    int parse_defcpdtable(char* line, FileMgr* af);    int parse_affix(char* line, const char at, FileMgr* af, char* dupflags); -  void reverse_condition(char*); +  void reverse_condition(std::string&);    void debugflag(char* result, unsigned short flag);    std::string& debugflag(std::string& result, unsigned short flag); -  int condlen(char*); -  int encodeit(affentry& entry, char* cs); +  int condlen(const char*); +  int encodeit(affentry& entry, const char* cs);    int build_pfxtree(PfxEntry* pfxptr);    int build_sfxtree(SfxEntry* sfxptr);    int process_pfx_order(); @@ -383,7 +383,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr {    SfxEntry* process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr);    int process_pfx_tree_to_list();    int process_sfx_tree_to_list(); -  int redundant_condition(char, char* strip, int stripl, const char* cond, int); +  int redundant_condition(char, const char* strip, int stripl, const char* cond, int);    void finishFileMgr(FileMgr* afflst);  }; diff --git a/libs/hunspell/src/atypes.hxx b/libs/hunspell/src/atypes.hxx index d71f62a32d..60826af20e 100644 --- a/libs/hunspell/src/atypes.hxx +++ b/libs/hunspell/src/atypes.hxx @@ -57,11 +57,11 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}  #include "hashmgr.hxx"  #include "w_char.hxx" +#include <algorithm> +#include <string>  #define SETSIZE 256  #define CONTSIZE 65536 -#define MAXWORDLEN 100 -#define MAXWORDUTF8LEN 256  // affentry options  #define aeXPRODUCT (1 << 0) @@ -98,14 +98,11 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}  #define FLAG_NULL 0x00  #define FREE_FLAG(a) a = 0 -#define TESTAFF(a, b, c) \ -  (flag_bsearch((unsigned short*)a, (unsigned short)b, c)) +#define TESTAFF(a, b, c) (std::binary_search(a, a + c, b))  struct affentry { -  char* strip; -  char* appnd; -  unsigned char stripl; -  unsigned char appndl; +  std::string strip; +  std::string appnd;    char numconds;    char opts;    unsigned short aflag; diff --git a/libs/hunspell/src/baseaffix.hxx b/libs/hunspell/src/baseaffix.hxx index cfc6b71ca4..59256e92f3 100644 --- a/libs/hunspell/src/baseaffix.hxx +++ b/libs/hunspell/src/baseaffix.hxx @@ -59,7 +59,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffEntry {          contclasslen(0) {}    std::string appnd;    std::string strip; -  char numconds; +  unsigned char numconds;    char opts;    unsigned short aflag;    union { diff --git a/libs/hunspell/src/config.h b/libs/hunspell/src/config.h index f1963148b6..1230ed0be7 100644 --- a/libs/hunspell/src/config.h +++ b/libs/hunspell/src/config.h @@ -179,9 +179,6 @@  /* Define to 1 if you have the `__argz_stringify' function. */  #define HAVE___ARGZ_STRINGIFY 1 -/* "Define if you use exterimental functions" */ -//#define HUNSPELL_EXPERIMENTAL 1 -  /* "Define if you need warning messages" */  #define HUNSPELL_WARNING_ON @@ -204,5 +201,5 @@  #define PACKAGE_TARNAME  /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.3.4" -#define VERSION "1.3.4" +#define PACKAGE_VERSION "1.4.0" +#define VERSION "1.4.0" diff --git a/libs/hunspell/src/csutil.c++ b/libs/hunspell/src/csutil.c++ index d7411bb216..1948e4a3b3 100644 --- a/libs/hunspell/src/csutil.c++ +++ b/libs/hunspell/src/csutil.c++ @@ -144,53 +144,6 @@ FILE* myfopen(const char* path, const char* mode) {    return fopen(path, mode);  } -/* only UTF-16 (BMP) implementation */ -char* u16_u8(char* dest, int size, const w_char* src, int srclen) { -  signed char* u8 = (signed char*)dest; -  signed char* u8_max = (signed char*)(u8 + size); -  const w_char* u2 = src; -  const w_char* u2_max = src + srclen; -  while ((u2 < u2_max) && (u8 < u8_max)) { -    if (u2->h) {  // > 0xFF -      // XXX 4-byte haven't implemented yet. -      if (u2->h >= 0x08) {  // >= 0x800 (3-byte UTF-8 character) -        *u8 = 0xe0 + (u2->h >> 4); -        u8++; -        if (u8 < u8_max) { -          *u8 = 0x80 + ((u2->h & 0xf) << 2) + (u2->l >> 6); -          u8++; -          if (u8 < u8_max) { -            *u8 = 0x80 + (u2->l & 0x3f); -            u8++; -          } -        } -      } else {  // < 0x800 (2-byte UTF-8 character) -        *u8 = 0xc0 + (u2->h << 2) + (u2->l >> 6); -        u8++; -        if (u8 < u8_max) { -          *u8 = 0x80 + (u2->l & 0x3f); -          u8++; -        } -      } -    } else {               // <= 0xFF -      if (u2->l & 0x80) {  // >0x80 (2-byte UTF-8 character) -        *u8 = 0xc0 + (u2->l >> 6); -        u8++; -        if (u8 < u8_max) { -          *u8 = 0x80 + (u2->l & 0x3f); -          u8++; -        } -      } else {  // < 0x80 (1-byte UTF-8 character) -        *u8 = u2->l; -        u8++; -      } -    } -    u2++; -  } -  *u8 = '\0'; -  return dest; -} -  std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {    dest.clear();    std::vector<w_char>::const_iterator u2 = src.begin(); @@ -228,93 +181,6 @@ std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {    return dest;  } -/* only UTF-16 (BMP) implementation */ -int u8_u16(w_char* dest, int size, const char* src) { -  const signed char* u8 = (const signed char*)src; -  w_char* u2 = dest; -  w_char* u2_max = u2 + size; - -  while ((u2 < u2_max) && *u8) { -    switch ((*u8) & 0xf0) { -      case 0x00: -      case 0x10: -      case 0x20: -      case 0x30: -      case 0x40: -      case 0x50: -      case 0x60: -      case 0x70: { -        u2->h = 0; -        u2->l = *u8; -        break; -      } -      case 0x80: -      case 0x90: -      case 0xa0: -      case 0xb0: { -        HUNSPELL_WARNING(stderr, -                         "UTF-8 encoding error. Unexpected continuation bytes " -                         "in %ld. character position\n%s\n", -                         static_cast<long>(u8 - (signed char*)src), src); -        u2->h = 0xff; -        u2->l = 0xfd; -        break; -      } -      case 0xc0: -      case 0xd0: {  // 2-byte UTF-8 codes -        if ((*(u8 + 1) & 0xc0) == 0x80) { -          u2->h = (*u8 & 0x1f) >> 2; -          u2->l = (*u8 << 6) + (*(u8 + 1) & 0x3f); -          u8++; -        } else { -          HUNSPELL_WARNING(stderr, -                           "UTF-8 encoding error. Missing continuation byte in " -                           "%ld. character position:\n%s\n", -                           static_cast<long>(u8 - (signed char*)src), src); -          u2->h = 0xff; -          u2->l = 0xfd; -        } -        break; -      } -      case 0xe0: {  // 3-byte UTF-8 codes -        if ((*(u8 + 1) & 0xc0) == 0x80) { -          u2->h = ((*u8 & 0x0f) << 4) + ((*(u8 + 1) & 0x3f) >> 2); -          u8++; -          if ((*(u8 + 1) & 0xc0) == 0x80) { -            u2->l = (*u8 << 6) + (*(u8 + 1) & 0x3f); -            u8++; -          } else { -            HUNSPELL_WARNING(stderr, -                             "UTF-8 encoding error. Missing continuation byte " -                             "in %ld. character position:\n%s\n", -                             static_cast<long>(u8 - (signed char*)src), src); -            u2->h = 0xff; -            u2->l = 0xfd; -          } -        } else { -          HUNSPELL_WARNING(stderr, -                           "UTF-8 encoding error. Missing continuation byte in " -                           "%ld. character position:\n%s\n", -                           static_cast<long>(u8 - (signed char*)src), src); -          u2->h = 0xff; -          u2->l = 0xfd; -        } -        break; -      } -      case 0xf0: {  // 4 or more byte UTF-8 codes -        HUNSPELL_WARNING( -            stderr, "This UTF-8 encoding can't convert to UTF-16:\n%s\n", src); -        u2->h = 0xff; -        u2->l = 0xfd; -        return -1; -      } -    } -    u8++; -    u2++; -  } -  return (int)(u2 - dest); -} -  int u8_u16(std::vector<w_char>& dest, const std::string& src) {    dest.clear();    std::string::const_iterator u8 = src.begin(); @@ -370,7 +236,7 @@ int u8_u16(std::vector<w_char>& dest, const std::string& src) {            u2.h = ((*u8 & 0x0f) << 4) + ((*(u8 + 1) & 0x3f) >> 2);            ++u8;            if ((*(u8 + 1) & 0xc0) == 0x80) { -            u2.l = (*u8 << 6) + (*(u8 + 1) & 0x3f); +            u2.l = (static_cast<unsigned char>(*u8) << 6) + (*(u8 + 1) & 0x3f);              ++u8;            } else {              HUNSPELL_WARNING(stderr, @@ -409,48 +275,6 @@ int u8_u16(std::vector<w_char>& dest, const std::string& src) {    return dest.size();  } -void flag_qsort(unsigned short flags[], int begin, int end) { -  unsigned short reg; -  if (end > begin) { -    unsigned short pivot = flags[begin]; -    int l = begin + 1; -    int r = end; -    while (l < r) { -      if (flags[l] <= pivot) { -        l++; -      } else { -        r--; -        reg = flags[l]; -        flags[l] = flags[r]; -        flags[r] = reg; -      } -    } -    l--; -    reg = flags[begin]; -    flags[begin] = flags[l]; -    flags[l] = reg; - -    flag_qsort(flags, begin, l); -    flag_qsort(flags, r, end); -  } -} - -int flag_bsearch(unsigned short flags[], unsigned short flag, int length) { -  int mid; -  int left = 0; -  int right = length - 1; -  while (left <= right) { -    mid = (left + right) / 2; -    if (flags[mid] == flag) -      return 1; -    if (flag < flags[mid]) -      right = mid - 1; -    else -      left = mid + 1; -  } -  return 0; -} -  // strip strings into token based on single char delimiter  // acts like strsep() but only uses a delim char and not  // a delim string @@ -519,25 +343,6 @@ void mychomp(char* s) {      *(s + k - 2) = '\0';  } -//  does an ansi strdup of the reverse of a string -char* myrevstrdup(const char* s) { -  char* d = NULL; -  if (s) { -    size_t sl = strlen(s); -    d = (char*)malloc(sl + 1); -    if (d) { -      const char* p = s + sl - 1; -      char* q = d; -      while (p >= s) -        *q++ = *p--; -      *q = '\0'; -    } else { -      HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); -    } -  } -  return d; -} -  // break text to lines  // return number of lines  int line_tok(const char* text, char*** lines, char breakchar) { @@ -654,26 +459,6 @@ char* line_uniq_app(char** text, char breakchar) {  }  // append s to ends of every lines in text -void strlinecat(char* dest, const char* s) { -  char* dup = mystrdup(dest); -  char* source = dup; -  int len = strlen(s); -  if (dup) { -    while (*source) { -      if (*source == '\n') { -        strncpy(dest, s, len); -        dest += len; -      } -      *dest = *source; -      source++; -      dest++; -    } -    strcpy(dest, s); -    free(dup); -  } -} - -// append s to ends of every lines in text  std::string& strlinecat(std::string& str, const std::string& apd) {    size_t pos = 0;    while ((pos = str.find('\n', pos)) != std::string::npos) { @@ -684,15 +469,6 @@ std::string& strlinecat(std::string& str, const std::string& apd) {    return str;  } -// change \n to char c -char* tr(char* text, char oldc, char newc) { -  char* p; -  for (p = text; *p; p++) -    if (*p == oldc) -      *p = newc; -  return text; -} -  // morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields  // in the first line of the inputs  // return 0, if inputs equal @@ -807,23 +583,6 @@ int fieldlen(const char* r) {    return n;  } -char* copy_field(char* dest, const char* morph, const char* var) { -  if (!morph) -    return NULL; -  const char* beg = strstr(morph, var); -  if (beg) { -    char* d = dest; -    for (beg += MORPH_TAG_LEN; -         *beg != ' ' && *beg != '\t' && *beg != '\n' && *beg != '\0'; -         d++, beg++) { -      *d = *beg; -    } -    *d = '\0'; -    return dest; -  } -  return NULL; -} -  bool copy_field(std::string& dest,                  const std::string& morph,                  const std::string& var) { @@ -884,47 +643,18 @@ char* mystrrep(char* word, const char* pat, const char* rep) {  }  // reverse word -int reverseword(char* word) { -  char r; -  for (char *dest = word + strlen(word) - 1; word < dest; word++, dest--) { -    r = *word; -    *word = *dest; -    *dest = r; -  } -  return 0; -} - -// reverse word -std::string& reverseword(std::string& word) { +size_t reverseword(std::string& word) {    std::reverse(word.begin(), word.end()); -  return word; -} - -// reverse word (error: 1) -int reverseword_utf(char* word) { -  w_char w[MAXWORDLEN]; -  w_char* p; -  w_char r; -  int l = u8_u16(w, MAXWORDLEN, word); -  if (l == -1) -    return 1; -  p = w; -  for (w_char *dest = w + l - 1; p < dest; p++, dest--) { -    r = *p; -    *p = *dest; -    *dest = r; -  } -  u16_u8(word, MAXWORDUTF8LEN, w, l); -  return 0; +  return word.size();  }  // reverse word -std::string& reverseword_utf(std::string& word) { +size_t reverseword_utf(std::string& word) {    std::vector<w_char> w;    u8_u16(w, word);    std::reverse(w.begin(), w.end());    u16_u8(word, w); -  return word; +  return w.size();  }  int uniqlist(char** list, int n) { @@ -978,12 +708,22 @@ unsigned char ccase(const struct cs_info* csconv, int nIndex) {  }  } -// convert null terminated string to all caps -void mkallcap(char* p, const struct cs_info* csconv) { -  while (*p != '\0') { -    *p = cupper(csconv, static_cast<unsigned char>(*p)); -    p++; +w_char upper_utf(w_char u, int langnum) { +  unsigned short idx = (u.h << 8) + u.l; +  if (idx != unicodetoupper(idx, langnum)) { +    u.h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); +    u.l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); +  } +  return u; +} + +w_char lower_utf(w_char u, int langnum) { +  unsigned short idx = (u.h << 8) + u.l; +  if (idx != unicodetolower(idx, langnum)) { +    u.h = (unsigned char)(unicodetolower(idx, langnum) >> 8); +    u.l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);    } +  return u;  }  // convert std::string to all caps @@ -994,14 +734,6 @@ std::string& mkallcap(std::string& s, const struct cs_info* csconv) {    return s;  } -// convert null terminated string to all little -void mkallsmall(char* p, const struct cs_info* csconv) { -  while (*p != '\0') { -    *p = clower(csconv, static_cast<unsigned char>(*p)); -    p++; -  } -} -  // convert std::string to all little  std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {    for (std::string::iterator aI = s.begin(), aEnd = s.end(); aI != aEnd; ++aI) { @@ -1010,20 +742,9 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {    return s;  } -void mkallsmall_utf(w_char* u, int nc, int langnum) { -  for (int i = 0; i < nc; i++) { -    unsigned short idx = (u[i].h << 8) + u[i].l; -    if (idx != unicodetolower(idx, langnum)) { -      u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); -      u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); -    } -  } -} -  std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u, -                                    int nc,                                      int langnum) { -  for (int i = 0; i < nc; i++) { +  for (size_t i = 0; i < u.size(); ++i) {      unsigned short idx = (u[i].h << 8) + u[i].l;      if (idx != unicodetolower(idx, langnum)) {        u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); @@ -1033,31 +754,51 @@ std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,    return u;  } -void mkallcap_utf(w_char* u, int nc, int langnum) { -  for (int i = 0; i < nc; i++) { +std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) { +  for (size_t i = 0; i < u.size(); i++) {      unsigned short idx = (u[i].h << 8) + u[i].l;      if (idx != unicodetoupper(idx, langnum)) {        u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);        u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);      }    } +  return u;  } -std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int nc, int langnum) { -  for (int i = 0; i < nc; i++) { -    unsigned short idx = (u[i].h << 8) + u[i].l; +std::string& mkinitcap(std::string& s, const struct cs_info* csconv) { +  if (!s.empty()) { +    s[0] = cupper(csconv, static_cast<unsigned char>(s[0])); +  } +  return s; +} + +std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) { +  if (!u.empty()) { +    unsigned short idx = (u[0].h << 8) + u[0].l;      if (idx != unicodetoupper(idx, langnum)) { -      u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); -      u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); +      u[0].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); +      u[0].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);      }    }    return u;  } -// convert null terminated string to have initial capital -void mkinitcap(char* p, const struct cs_info* csconv) { -  if (*p != '\0') -    *p = cupper(csconv, static_cast<unsigned char>(*p)); +std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) { +  if (!s.empty()) { +    s[0] = clower(csconv, static_cast<unsigned char>(s[0])); +  } +  return s; +} + +std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) { +  if (!u.empty()) { +    unsigned short idx = (u[0].h << 8) + u[0].l; +    if (idx != unicodetolower(idx, langnum)) { +      u[0].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); +      u[0].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); +    } +  } +  return u;  }  // conversion function for protected memory @@ -1073,35 +814,6 @@ char* get_stored_pointer(const char* s) {  }  #ifndef MOZILLA_CLIENT -// convert null terminated string to all caps using encoding -void enmkallcap(char* d, const char* p, const char* encoding) - -{ -  struct cs_info* csconv = get_current_cs(encoding); -  while (*p != '\0') { -    *d++ = cupper(csconv, static_cast<unsigned char>(*p)); -    p++; -  } -  *d = '\0'; -} - -// convert null terminated string to all little using encoding -void enmkallsmall(char* d, const char* p, const char* encoding) { -  struct cs_info* csconv = get_current_cs(encoding); -  while (*p != '\0') { -    *d++ = clower(csconv, static_cast<unsigned char>(*p)); -    p++; -  } -  *d = '\0'; -} - -// convert null terminated string to have initial capital using encoding -void enmkinitcap(char* d, const char* p, const char* encoding) { -  struct cs_info* csconv = get_current_cs(encoding); -  memcpy(d, p, (strlen(p) + 1)); -  if (*p != '\0') -    *d = cupper(csconv, static_cast<unsigned char>(*p)); -}  // these are simple character mappings for the  // encodings supported @@ -2982,14 +2694,14 @@ int unicodeisalpha(unsigned short c) {  }  /* get type of capitalization */ -int get_captype(char* word, int nl, cs_info* csconv) { +int get_captype(const std::string& word, cs_info* csconv) {    // now determine the capitalization type of the first nl letters -  int ncap = 0; -  int nneutral = 0; -  int firstcap = 0; +  size_t ncap = 0; +  size_t nneutral = 0; +  size_t firstcap = 0;    if (csconv == NULL)      return NOCAP; -  for (char* q = word; *q != '\0'; q++) { +  for (std::string::const_iterator q = word.begin(); q != word.end(); ++q) {      unsigned char nIndex = static_cast<unsigned char>(*q);      if (ccase(csconv, nIndex))        ncap++; @@ -3006,7 +2718,7 @@ int get_captype(char* word, int nl, cs_info* csconv) {      return NOCAP;    } else if ((ncap == 1) && firstcap) {      return INITCAP; -  } else if ((ncap == nl) || ((ncap + nneutral) == nl)) { +  } else if ((ncap == word.size()) || ((ncap + nneutral) == word.size())) {      return ALLCAP;    } else if ((ncap > 1) && firstcap) {      return HUHINITCAP; @@ -3014,27 +2726,20 @@ int get_captype(char* word, int nl, cs_info* csconv) {    return HUHCAP;  } -int get_captype_utf8(w_char* word, int nl, int langnum) { +int get_captype_utf8(const std::vector<w_char>& word, int langnum) {    // now determine the capitalization type of the first nl letters -  int ncap = 0; -  int nneutral = 0; -  int firstcap = 0; -  unsigned short idx; -  // don't check too long words -  if (nl >= MAXWORDLEN) -    return 0; -  // big Unicode character (non BMP area) -  if (nl == -1) -    return NOCAP; -  for (int i = 0; i < nl; i++) { -    idx = (word[i].h << 8) + word[i].l; +  size_t ncap = 0; +  size_t nneutral = 0; +  size_t firstcap = 0; +  for (size_t i = 0; i < word.size(); ++i) { +    unsigned short idx = (word[i].h << 8) + word[i].l;      if (idx != unicodetolower(idx, langnum))        ncap++;      if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum))        nneutral++;    }    if (ncap) { -    idx = (word[0].h << 8) + word[0].l; +    unsigned short idx = (word[0].h << 8) + word[0].l;      firstcap = (idx != unicodetolower(idx, langnum));    } @@ -3043,7 +2748,7 @@ int get_captype_utf8(w_char* word, int nl, int langnum) {      return NOCAP;    } else if ((ncap == 1) && firstcap) {      return INITCAP; -  } else if ((ncap == nl) || ((ncap + nneutral) == nl)) { +  } else if ((ncap == word.size()) || ((ncap + nneutral) == word.size())) {      return ALLCAP;    } else if ((ncap > 1) && firstcap) {      return HUHINITCAP; @@ -3052,63 +2757,22 @@ int get_captype_utf8(w_char* word, int nl, int langnum) {  }  // strip all ignored characters in the string -void remove_ignored_chars_utf(char* word, -                              unsigned short ignored_chars[], -                              int ignored_len) { -  w_char w[MAXWORDLEN]; -  w_char w2[MAXWORDLEN]; -  int i; -  int j; -  int len = u8_u16(w, MAXWORDLEN, word); -  for (i = 0, j = 0; i < len; i++) { -    if (!flag_bsearch(ignored_chars, ((unsigned short*)w)[i], ignored_len)) { -      w2[j] = w[i]; -      j++; -    } -  } -  if (j < i) -    u16_u8(word, MAXWORDUTF8LEN, w2, j); -} - -namespace { -union w_s { -  w_char w; -  unsigned short s; -}; - -unsigned short asushort(w_char in) { -  w_s c; -  c.w = in; -  return c.s; -} -} - -// strip all ignored characters in the string -std::string& remove_ignored_chars_utf(std::string& word, -                                      unsigned short ignored_chars[], -                                      int ignored_len) { +size_t remove_ignored_chars_utf(std::string& word, +                                const std::vector<w_char>& ignored_chars) {    std::vector<w_char> w;    std::vector<w_char> w2;    u8_u16(w, word);    for (size_t i = 0; i < w.size(); ++i) { -    if (!flag_bsearch(ignored_chars, asushort(w[i]), ignored_len)) +    if (!std::binary_search(ignored_chars.begin(), +                            ignored_chars.end(), +                            w[i])) {        w2.push_back(w[i]); +    }    }    u16_u8(word, w2); -  return word; -} - -// strip all ignored characters in the string -void remove_ignored_chars(char* word, char* ignored_chars) { -  for (char* p = word; *p != '\0'; p++) { -    if (!strchr(ignored_chars, *p)) { -      *word = *p; -      word++; -    } -  } -  *word = '\0'; +  return w2.size();  }  namespace { @@ -3119,16 +2783,17 @@ class is_any_of {    bool operator()(char c) { return chars.find(c) != std::string::npos; }   private: -  const std::string& chars; +  std::string chars;  };  }  // strip all ignored characters in the string -std::string& remove_ignored_chars(std::string& word, -                                  const std::string& ignored_chars) { +size_t remove_ignored_chars(std::string& word, +                            const std::string& ignored_chars) {    word.erase( -      std::remove_if(word.begin(), word.end(), is_any_of(ignored_chars))); -  return word; +      std::remove_if(word.begin(), word.end(), is_any_of(ignored_chars)), +      word.end()); +  return word.size();  }  int parse_string(char* line, char** out, int ln) { @@ -3170,25 +2835,16 @@ int parse_string(char* line, char** out, int ln) {    return 0;  } -int parse_array(char* line, -                char** out, -                unsigned short** out_utf16, -                int* out_utf16_len, -                int utf8, -                int ln) { +bool parse_array(char* line, +                 char** out, +                 std::vector<w_char>& out_utf16, +                 int utf8, +                 int ln) {    if (parse_string(line, out, ln)) -    return 1; +    return false;    if (utf8) { -    w_char w[MAXWORDLEN]; -    int n = u8_u16(w, MAXWORDLEN, *out); -    if (n > 0) { -      flag_qsort((unsigned short*)w, 0, n); -      *out_utf16 = (unsigned short*)malloc(n * sizeof(unsigned short)); -      if (!*out_utf16) -        return 1; -      memcpy(*out_utf16, w, n * sizeof(unsigned short)); -    } -    *out_utf16_len = n; +    u8_u16(out_utf16, *out); +    std::sort(out_utf16.begin(), out_utf16.end());    } -  return 0; +  return true;  } diff --git a/libs/hunspell/src/csutil.hxx b/libs/hunspell/src/csutil.hxx index cd582933b1..ce7091df55 100644 --- a/libs/hunspell/src/csutil.hxx +++ b/libs/hunspell/src/csutil.hxx @@ -131,30 +131,13 @@  LIBHUNSPELL_DLL_EXPORTED FILE* myfopen(const char* path, const char* mode);  // convert UTF-16 characters to UTF-8 -LIBHUNSPELL_DLL_EXPORTED char* u16_u8(char* dest, -                                      int size, -                                      const w_char* src, -                                      int srclen); -// convert UTF-16 characters to UTF-8  LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,                                               const std::vector<w_char>& src);  // convert UTF-8 characters to UTF-16 -LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char* dest, int size, const char* src); -// convert UTF-8 characters to UTF-16  LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,                                      const std::string& src); -// sort 2-byte vector -LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], -                                         int begin, -                                         int end); - -// binary search in 2-byte vector -LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], -                                          unsigned short flag, -                                          int right); -  // remove end of line char(s)  LIBHUNSPELL_DLL_EXPORTED void mychomp(char* s); @@ -164,13 +147,8 @@ LIBHUNSPELL_DLL_EXPORTED char* mystrdup(const char* s);  // strcat for limited length destination string  LIBHUNSPELL_DLL_EXPORTED char* mystrcat(char* dest, const char* st, int max); -// duplicate reverse of string -LIBHUNSPELL_DLL_EXPORTED char* myrevstrdup(const char* s); -  // parse into tokens with char delimiter  LIBHUNSPELL_DLL_EXPORTED char* mystrsep(char** sptr, const char delim); -// parse into tokens with char delimiter -LIBHUNSPELL_DLL_EXPORTED char* mystrsep2(char** sptr, const char delim);  // replace pat by rep in word and return word  LIBHUNSPELL_DLL_EXPORTED char* mystrrep(char* word, @@ -181,9 +159,6 @@ LIBHUNSPELL_DLL_EXPORTED std::string& mystrrep(std::string& str,                                                 const std::string& replace);  // append s to ends of every lines in text -LIBHUNSPELL_DLL_EXPORTED void strlinecat(char* lines, const char* s); - -// append s to ends of every lines in text  LIBHUNSPELL_DLL_EXPORTED std::string& strlinecat(std::string& str,                                                   const std::string& apd); @@ -196,18 +171,11 @@ LIBHUNSPELL_DLL_EXPORTED int line_tok(const char* text,  LIBHUNSPELL_DLL_EXPORTED char* line_uniq(char* text, char breakchar);  LIBHUNSPELL_DLL_EXPORTED char* line_uniq_app(char** text, char breakchar); -// change oldchar to newchar in place -LIBHUNSPELL_DLL_EXPORTED char* tr(char* text, char oldc, char newc); -  // reverse word -LIBHUNSPELL_DLL_EXPORTED int reverseword(char*); -// reverse word -LIBHUNSPELL_DLL_EXPORTED std::string& reverseword(std::string& word); +LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word);  // reverse word -LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char*); -// reverse word -LIBHUNSPELL_DLL_EXPORTED std::string& reverseword_utf(std::string&); +LIBHUNSPELL_DLL_EXPORTED size_t reverseword_utf(std::string&);  // remove duplicates  LIBHUNSPELL_DLL_EXPORTED int uniqlist(char** list, int n); @@ -226,6 +194,8 @@ LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();  LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();  LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c,                                                         int langnum); +LIBHUNSPELL_DLL_EXPORTED w_char upper_utf(w_char u, int langnum); +LIBHUNSPELL_DLL_EXPORTED w_char lower_utf(w_char u, int langnum);  LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c,                                                         int langnum);  LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c); @@ -238,87 +208,64 @@ LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char* lang);  // get characters of the given 8bit encoding with lower- and uppercase forms  LIBHUNSPELL_DLL_EXPORTED char* get_casechars(const char* enc); -// convert null terminated string to all caps using encoding -LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char* d, -                                         const char* p, -                                         const char* encoding); - -// convert null terminated string to all little using encoding -LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char* d, -                                           const char* p, -                                           const char* encoding); - -// convert null terminated string to have initial capital using encoding -LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char* d, -                                          const char* p, -                                          const char* encoding); - -// convert null terminated string to all caps -LIBHUNSPELL_DLL_EXPORTED void mkallcap(char* p, const struct cs_info* csconv);  // convert std::string to all caps  LIBHUNSPELL_DLL_EXPORTED std::string& mkallcap(std::string& s,                                                 const struct cs_info* csconv);  // convert null terminated string to all little -LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char* p, const struct cs_info* csconv); -// convert null terminated string to all little  LIBHUNSPELL_DLL_EXPORTED std::string& mkallsmall(std::string& s,                                                   const struct cs_info* csconv); -// convert null terminated string to have initial capital -LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char* p, const struct cs_info* csconv); +// convert first letter of string to little +LIBHUNSPELL_DLL_EXPORTED std::string& mkinitsmall(std::string& s, +                                                 const struct cs_info* csconv); + +// convert first letter of string to capital +LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s, +                                                const struct cs_info* csconv); + +// convert first letter of UTF-8 string to capital +LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& +mkinitcap_utf(std::vector<w_char>& u, int langnum); + +// convert UTF-8 string to little +LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& +mkallsmall_utf(std::vector<w_char>& u, int langnum); -// convert first nc characters of UTF-8 string to little -LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char* u, int nc, int langnum); -// convert first nc characters of UTF-8 string to little +// convert first letter of UTF-8 string to little  LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& -mkallsmall_utf(std::vector<w_char>& u, int nc, int langnum); +mkinitsmall_utf(std::vector<w_char>& u, int langnum); -// convert first nc characters of UTF-8 string to capital -LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char* u, int nc, int langnum); -// convert first nc characters of UTF-8 string to capital +// convert UTF-8 string to capital  LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& -mkallcap_utf(std::vector<w_char>& u, int nc, int langnum); +mkallcap_utf(std::vector<w_char>& u, int langnum);  // get type of capitalization -LIBHUNSPELL_DLL_EXPORTED int get_captype(char* q, int nl, cs_info*); +LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*);  // get type of capitalization (UTF-8) -LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char* q, int nl, int langnum); +LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum);  // strip all ignored characters in the string -LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf( -    char* word, -    unsigned short ignored_chars[], -    int ignored_len); -// strip all ignored characters in the string -LIBHUNSPELL_DLL_EXPORTED std::string& remove_ignored_chars_utf( +LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf(      std::string& word, -    unsigned short ignored_chars[], -    int ignored_len); +    const std::vector<w_char>& ignored_chars);  // strip all ignored characters in the string -LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char* word, -                                                   char* ignored_chars); -// strip all ignored characters in the string -LIBHUNSPELL_DLL_EXPORTED std::string& remove_ignored_chars( +LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(      std::string& word,      const std::string& ignored_chars);  LIBHUNSPELL_DLL_EXPORTED int parse_string(char* line, char** out, int ln); -LIBHUNSPELL_DLL_EXPORTED int parse_array(char* line, -                                         char** out, -                                         unsigned short** out_utf16, -                                         int* out_utf16_len, -                                         int utf8, -                                         int ln); +LIBHUNSPELL_DLL_EXPORTED bool parse_array(char* line, +                                          char** out, +                                          std::vector<w_char>& out_utf16, +                                          int utf8, +                                          int ln);  LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char* r); -LIBHUNSPELL_DLL_EXPORTED char* copy_field(char* dest, -                                          const char* morph, -                                          const char* var);  LIBHUNSPELL_DLL_EXPORTED bool copy_field(std::string& dest,                                           const std::string& morph,                                           const std::string& var); @@ -375,6 +322,4 @@ LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry* h,    return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);  } -#define w_char_eq(a, b) (((a).l == (b).l) && ((a).h == (b).h)) -  #endif diff --git a/libs/hunspell/src/filemgr.c++ b/libs/hunspell/src/filemgr.c++ index cbe41c577b..2218bc79e1 100644 --- a/libs/hunspell/src/filemgr.c++ +++ b/libs/hunspell/src/filemgr.c++ @@ -89,13 +89,9 @@ FileMgr::FileMgr(const char* file, const char* key) : hin(NULL), linenum(0) {    fin = myfopen(file, "r");    if (!fin) {      // check hzipped file -    char* st = (char*)malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1); -    if (st) { -      strcpy(st, file); -      strcat(st, HZIP_EXTENSION); -      hin = new Hunzip(st, key); -      free(st); -    } +    std::string st(file); +    st.append(HZIP_EXTENSION); +    hin = new Hunzip(st.c_str(), key);    }    if (!fin && !hin)      fail(MSG_OPEN, file); diff --git a/libs/hunspell/src/hashmgr.c++ b/libs/hunspell/src/hashmgr.c++ index dbcf56a51c..c3cd95420f 100644 --- a/libs/hunspell/src/hashmgr.c++ +++ b/libs/hunspell/src/hashmgr.c++ @@ -76,6 +76,7 @@  #include <stdio.h>  #include <ctype.h>  #include <limits> +#include <sstream>  #include "hashmgr.hxx"  #include "csutil.hxx" @@ -101,8 +102,6 @@ HashMgr::HashMgr(const char* tpath, const char* apath, const char* key)    enc = NULL;    csconv = 0;    ignorechars = NULL; -  ignorechars_utf16 = NULL; -  ignorechars_utf16_len = 0;    load_config(apath, key);    int ec = load_tables(tpath, key);    if (ec) { @@ -167,8 +166,6 @@ HashMgr::~HashMgr() {    if (ignorechars)      free(ignorechars); -  if (ignorechars_utf16) -    free(ignorechars_utf16);  #ifdef MOZILLA_CLIENT    delete[] csconv; @@ -199,28 +196,56 @@ int HashMgr::add_word(const char* word,                        int al,                        const char* desc,                        bool onlyupcase) { + +  std::string *word_copy = NULL; +  std::string *desc_copy = NULL; +  if (ignorechars || complexprefixes) { +    word_copy = new std::string(word, wbl); + +    if (ignorechars != NULL) { +      if (utf8) { +        wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16); +      } else { +        remove_ignored_chars(*word_copy, ignorechars); +      } +    } + +    if (complexprefixes) { +      if (utf8) +        wcl = reverseword_utf(*word_copy); +      else +        reverseword(*word_copy); + +      if (desc && !aliasm) { +        desc_copy = new std::string(desc); + +        if (complexprefixes) { +          if (utf8) +            reverseword_utf(*desc_copy); +          else +            reverseword(*desc_copy); +        } +        desc = desc_copy->c_str(); +      } +    } + +    wbl = word_copy->size(); +    word = word_copy->c_str(); +  } +    bool upcasehomonym = false;    int descl = desc ? (aliasm ? sizeof(char*) : strlen(desc) + 1) : 0;    // variable-length hash record with word and optional fields    struct hentry* hp =        (struct hentry*)malloc(sizeof(struct hentry) + wbl + descl); -  if (!hp) +  if (!hp) { +    delete desc_copy; +    delete word_copy;      return 1; +  } +    char* hpw = hp->word;    strcpy(hpw, word); -  if (ignorechars != NULL) { -    if (utf8) { -      remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len); -    } else { -      remove_ignored_chars(hpw, ignorechars); -    } -  } -  if (complexprefixes) { -    if (utf8) -      reverseword_utf(hpw); -    else -      reverseword(hpw); -  }    int i = hash(hpw); @@ -239,12 +264,6 @@ int HashMgr::add_word(const char* word,        store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));      } else {        strcpy(hpw + wbl + 1, desc); -      if (complexprefixes) { -        if (utf8) -          reverseword_utf(HENTRY_DATA(hp)); -        else -          reverseword(HENTRY_DATA(hp)); -      }      }      if (strstr(HENTRY_DATA(hp), MORPH_PHON))        hp->var += H_OPT_PHON; @@ -254,6 +273,8 @@ int HashMgr::add_word(const char* word,    struct hentry* dp = tableptr[i];    if (!dp) {      tableptr[i] = hp; +    delete desc_copy; +    delete word_copy;      return 0;    }    while (dp->next != NULL) { @@ -265,6 +286,8 @@ int HashMgr::add_word(const char* word,            dp->astr = hp->astr;            dp->alen = hp->alen;            free(hp); +          delete desc_copy; +          delete word_copy;            return 0;          } else {            dp->next_homonym = hp; @@ -283,6 +306,8 @@ int HashMgr::add_word(const char* word,          dp->astr = hp->astr;          dp->alen = hp->alen;          free(hp); +        delete desc_copy; +        delete word_copy;          return 0;        } else {          dp->next_homonym = hp; @@ -299,11 +324,13 @@ int HashMgr::add_word(const char* word,        free(hp->astr);      free(hp);    } + +  delete desc_copy; +  delete word_copy;    return 0;  } -int HashMgr::add_hidden_capitalized_word(char* word, -                                         int wbl, +int HashMgr::add_hidden_capitalized_word(const std::string& word,                                           int wcl,                                           unsigned short* flags,                                           int flagslen, @@ -326,32 +353,34 @@ int HashMgr::add_hidden_capitalized_word(char* word,        memcpy(flags2, flags, flagslen * sizeof(unsigned short));      flags2[flagslen] = ONLYUPCASEFLAG;      if (utf8) { -      char st[BUFSIZE]; -      w_char w[BUFSIZE]; -      int wlen = u8_u16(w, BUFSIZE, word); -      mkallsmall_utf(w, wlen, langnum); -      mkallcap_utf(w, 1, langnum); -      u16_u8(st, BUFSIZE, w, wlen); -      return add_word(st, wbl, wcl, flags2, flagslen + 1, dp, true); +      std::string st; +      std::vector<w_char> w; +      u8_u16(w, word); +      mkallsmall_utf(w, langnum); +      mkinitcap_utf(w, langnum); +      u16_u8(st, w); +      return add_word(st.c_str(), st.size(), wcl, flags2, flagslen + 1, dp, true);      } else { -      mkallsmall(word, csconv); -      mkinitcap(word, csconv); -      return add_word(word, wbl, wcl, flags2, flagslen + 1, dp, true); +      std::string new_word(word); +      mkallsmall(new_word, csconv); +      mkinitcap(new_word, csconv); +      int ret = add_word(new_word.c_str(), new_word.size(), wcl, flags2, flagslen + 1, dp, true); +      return ret;      }    }    return 0;  }  // detect captype and modify word length for UTF-8 encoding -int HashMgr::get_clen_and_captype(const char* word, int wbl, int* captype) { +int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {    int len;    if (utf8) { -    w_char dest_utf[BUFSIZE]; -    len = u8_u16(dest_utf, BUFSIZE, word); -    *captype = get_captype_utf8(dest_utf, len, langnum); +    std::vector<w_char> dest_utf; +    len = u8_u16(dest_utf, word); +    *captype = get_captype_utf8(dest_utf, langnum);    } else { -    len = wbl; -    *captype = get_captype((char*)word, len, csconv); +    len = word.size(); +    *captype = get_captype(word, csconv);    }    return len;  } @@ -370,7 +399,7 @@ int HashMgr::remove(const char* word) {        flags[dp->alen] = forbiddenword;        dp->astr = flags;        dp->alen++; -      flag_qsort(flags, 0, dp->alen); +      std::sort(flags, flags + dp->alen);      }      dp = dp->next_homonym;    } @@ -378,8 +407,8 @@ int HashMgr::remove(const char* word) {  }  /* remove forbidden flag to add a personal word to the hash */ -int HashMgr::remove_forbidden_flag(const char* word) { -  struct hentry* dp = lookup(word); +int HashMgr::remove_forbidden_flag(const std::string& word) { +  struct hentry* dp = lookup(word.c_str());    if (!dp)      return 1;    while (dp) { @@ -406,15 +435,15 @@ int HashMgr::remove_forbidden_flag(const char* word) {  }  // add a custom dic. word to the hash table (public) -int HashMgr::add(const char* word) { +int HashMgr::add(const std::string& word) {    unsigned short* flags = NULL;    int al = 0;    if (remove_forbidden_flag(word)) {      int captype; -    int wbl = strlen(word); -    int wcl = get_clen_and_captype(word, wbl, &captype); -    add_word(word, wbl, wcl, flags, al, NULL, false); -    return add_hidden_capitalized_word((char*)word, wbl, wcl, flags, al, NULL, +    int wbl = word.size(); +    int wcl = get_clen_and_captype(word, &captype); +    add_word(word.c_str(), wbl, wcl, flags, al, NULL, false); +    return add_hidden_capitalized_word(word, wcl, flags, al, NULL,                                         captype);    }    return 0; @@ -427,7 +456,7 @@ int HashMgr::add_with_affix(const char* word, const char* example) {    if (dp && dp->astr) {      int captype;      int wbl = strlen(word); -    int wcl = get_clen_and_captype(word, wbl, &captype); +    int wcl = get_clen_and_captype(word, &captype);      if (aliasf) {        add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);      } else { @@ -440,7 +469,7 @@ int HashMgr::add_with_affix(const char* word, const char* example) {        } else          return 1;      } -    return add_hidden_capitalized_word((char*)word, wbl, wcl, dp->astr, +    return add_hidden_capitalized_word(word, wcl, dp->astr,                                         dp->alen, NULL, captype);    }    return 1; @@ -574,7 +603,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) {            delete dict;            return 6;          } -        flag_qsort(flags, 0, al); +        std::sort(flags, flags + al);        }      } else {        al = 0; @@ -584,10 +613,10 @@ int HashMgr::load_tables(const char* tpath, const char* key) {      int captype;      int wbl = strlen(ts); -    int wcl = get_clen_and_captype(ts, wbl, &captype); +    int wcl = get_clen_and_captype(ts, &captype);      // add the word and its index plus its capitalized form optionally      if (add_word(ts, wbl, wcl, flags, al, dp, false) || -        add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) { +        add_hidden_capitalized_word(ts, wcl, flags, al, dp, captype)) {        delete dict;        return 5;      } @@ -674,12 +703,13 @@ int HashMgr::decode_flags(unsigned short** result, char* flags, FileMgr* af) {        break;      }      case FLAG_UNI: {  // UTF-8 characters -      w_char w[BUFSIZE / 2]; -      len = u8_u16(w, BUFSIZE / 2, flags); +      std::vector<w_char> w; +      u8_u16(w, flags); +      len = w.size();        *result = (unsigned short*)malloc(len * sizeof(unsigned short));        if (!*result)          return -1; -      memcpy(*result, w, len * sizeof(short)); +      memcpy(*result, &w[0], len * sizeof(short));        break;      }      default: {  // Ispell's one-character flags (erfg -> e r f g) @@ -712,9 +742,13 @@ unsigned short HashMgr::decode_flag(const char* f) {                           i, DEFAULTFLAGS - 1);        s = (unsigned short)i;        break; -    case FLAG_UNI: -      u8_u16((w_char*)&s, 1, f); +    case FLAG_UNI: { +      std::vector<w_char> w; +      u8_u16(w, f); +      if (!w.empty()) +          memcpy(&s, &w[0], 1 * sizeof(short));        break; +    }      default:        s = (unsigned short)*((unsigned char*)f);    } @@ -724,22 +758,24 @@ unsigned short HashMgr::decode_flag(const char* f) {  }  char* HashMgr::encode_flag(unsigned short f) { -  unsigned char ch[10];    if (f == 0)      return mystrdup("(NULL)"); +  std::string ch;    if (flag_mode == FLAG_LONG) { -    ch[0] = (unsigned char)(f >> 8); -    ch[1] = (unsigned char)(f - ((f >> 8) << 8)); -    ch[2] = '\0'; +    ch.push_back((unsigned char)(f >> 8)); +    ch.push_back((unsigned char)(f - ((f >> 8) << 8)));    } else if (flag_mode == FLAG_NUM) { -    sprintf((char*)ch, "%d", f); +    std::ostringstream stream; +    stream << f; +    ch = stream.str();    } else if (flag_mode == FLAG_UNI) { -    u16_u8((char*)&ch, 10, (w_char*)&f, 1); +    const w_char* w_c = (const w_char*)&f; +    std::vector<w_char> w(w_c, w_c + 1); +    u16_u8(ch, w);    } else { -    ch[0] = (unsigned char)(f); -    ch[1] = '\0'; +    ch.push_back((unsigned char)(f));    } -  return mystrdup((char*)ch); +  return mystrdup(ch.c_str());  }  // read in aff file and set flag mode @@ -824,8 +860,8 @@ int HashMgr::load_config(const char* affpath, const char* key) {      /* parse in the ignored characters (for example, Arabic optional diacritics       * characters */      if (strncmp(line, "IGNORE", 6) == 0) { -      if (parse_array(line, &ignorechars, &ignorechars_utf16, -                      &ignorechars_utf16_len, utf8, afflst->getlinenum())) { +      if (!parse_array(line, &ignorechars, ignorechars_utf16, +                       utf8, afflst->getlinenum())) {          delete afflst;          return 1;        } @@ -951,7 +987,7 @@ int HashMgr::parse_aliasf(char* line, FileMgr* af) {            case 1: {              aliasflen[j] =                  (unsigned short)decode_flags(&(aliasf[j]), piece, af); -            flag_qsort(aliasf[j], 0, aliasflen[j]); +            std::sort(aliasf[j], aliasf[j] + aliasflen[j]);              break;            }            default: @@ -1070,19 +1106,14 @@ int HashMgr::parse_aliasm(char* line, FileMgr* af) {                *(tp - 1) = ' ';                tp = tp + strlen(tp);              } +            std::string chunk(piece);              if (complexprefixes) {                if (utf8) -                reverseword_utf(piece); +                reverseword_utf(chunk);                else -                reverseword(piece); -            } -            aliasm[j] = mystrdup(piece); -            if (!aliasm[j]) { -              numaliasm = 0; -              free(aliasm); -              aliasm = NULL; -              return 1; +                reverseword(chunk);              } +            aliasm[j] = mystrdup(chunk.c_str());              break;            }            default: diff --git a/libs/hunspell/src/hashmgr.hxx b/libs/hunspell/src/hashmgr.hxx index c6d72f3c40..95b06b13f9 100644 --- a/libs/hunspell/src/hashmgr.hxx +++ b/libs/hunspell/src/hashmgr.hxx @@ -77,9 +77,12 @@  #include "hunvisapi.h"  #include <stdio.h> +#include <string> +#include <vector>  #include "htypes.hxx"  #include "filemgr.hxx" +#include "w_char.hxx"  enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; @@ -95,8 +98,7 @@ class LIBHUNSPELL_DLL_EXPORTED HashMgr {    char* lang;    struct cs_info* csconv;    char* ignorechars; -  unsigned short* ignorechars_utf16; -  int ignorechars_utf16_len; +  std::vector<w_char> ignorechars_utf16;    int numaliasf;  // flag vector `compression' with aliases    unsigned short** aliasf;    unsigned short* aliasflen; @@ -111,7 +113,7 @@ class LIBHUNSPELL_DLL_EXPORTED HashMgr {    int hash(const char*) const;    struct hentry* walk_hashtable(int& col, struct hentry* hp) const; -  int add(const char* word); +  int add(const std::string& word);    int add_with_affix(const char* word, const char* pattern);    int remove(const char* word);    int decode_flags(unsigned short** result, char* flags, FileMgr* af); @@ -123,7 +125,7 @@ class LIBHUNSPELL_DLL_EXPORTED HashMgr {    char* get_aliasm(int index);   private: -  int get_clen_and_captype(const char* word, int wbl, int* captype); +  int get_clen_and_captype(const std::string& word, int* captype);    int load_tables(const char* tpath, const char* key);    int add_word(const char* word,                 int wbl, @@ -134,15 +136,14 @@ class LIBHUNSPELL_DLL_EXPORTED HashMgr {                 bool onlyupcase);    int load_config(const char* affpath, const char* key);    int parse_aliasf(char* line, FileMgr* af); -  int add_hidden_capitalized_word(char* word, -                                  int wbl, +  int add_hidden_capitalized_word(const std::string& word,                                    int wcl,                                    unsigned short* flags,                                    int al,                                    char* dp,                                    int captype);    int parse_aliasm(char* line, FileMgr* af); -  int remove_forbidden_flag(const char* word); +  int remove_forbidden_flag(const std::string& word);  };  #endif diff --git a/libs/hunspell/src/hunspell.c++ b/libs/hunspell/src/hunspell.c++ index 726c72931a..f7c1581087 100644 --- a/libs/hunspell/src/hunspell.c++ +++ b/libs/hunspell/src/hunspell.c++ @@ -85,6 +85,9 @@  #include <limits>  #include <string> +#define MAXWORDLEN 176 +#define MAXWORDUTF8LEN (MAXWORDLEN * 3) +  Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) {    encoding = NULL;    csconv = NULL; @@ -158,14 +161,16 @@ int Hunspell::add_dic(const char* dpath, const char* key) {  // set the capitalization type  // return the length of the "cleaned" (and UTF-8 encoded) word -int Hunspell::cleanword2(char* dest, +size_t Hunspell::cleanword2(std::string& dest, +                         std::vector<w_char>& dest_utf,                           const char* src, -                         w_char* dest_utf,                           int* nc,                           int* pcaptype, -                         int* pabbrev) { -  unsigned char* p = (unsigned char*)dest; -  const unsigned char* q = (const unsigned char*)src; +                         size_t* pabbrev) { +  dest.clear(); +  dest_utf.clear(); + +  const char* q = src;    // first skip over any leading blanks    while ((*q != '\0') && (*q == ' ')) @@ -173,7 +178,7 @@ int Hunspell::cleanword2(char* dest,    // now strip off any trailing periods (recording their presence)    *pabbrev = 0; -  int nl = strlen((const char*)q); +  int nl = strlen(q);    while ((nl > 0) && (*(q + nl - 1) == '.')) {      nl--;      (*pabbrev)++; @@ -182,35 +187,26 @@ int Hunspell::cleanword2(char* dest,    // if no characters are left it can't be capitalized    if (nl <= 0) {      *pcaptype = NOCAP; -    *p = '\0';      return 0;    } -  strncpy(dest, (char*)q, nl); -  *(dest + nl) = '\0'; -  nl = strlen(dest); +  dest.append(q, nl); +  nl = dest.size();    if (utf8) { -    *nc = u8_u16(dest_utf, MAXWORDLEN, dest); -    // don't check too long words -    if (*nc >= MAXWORDLEN) -      return 0; -    if (*nc == -1) {  // big Unicode character (non BMP area) -      *pcaptype = NOCAP; -      return nl; -    } -    *pcaptype = get_captype_utf8(dest_utf, *nc, langnum); +    *nc = u8_u16(dest_utf, dest); +    *pcaptype = get_captype_utf8(dest_utf, langnum);    } else { -    *pcaptype = get_captype(dest, nl, csconv); +    *pcaptype = get_captype(dest, csconv);      *nc = nl;    }    return nl;  } -int Hunspell::cleanword(char* dest, +void Hunspell::cleanword(std::string& dest,                          const char* src,                          int* pcaptype,                          int* pabbrev) { -  unsigned char* p = (unsigned char*)dest; +  dest.clear();    const unsigned char* q = (const unsigned char*)src;    int firstcap = 0; @@ -229,8 +225,7 @@ int Hunspell::cleanword(char* dest,    // if no characters are left it can't be capitalized    if (nl <= 0) {      *pcaptype = NOCAP; -    *p = '\0'; -    return 0; +    return;    }    // now determine the capitalization type of the first nl letters @@ -245,27 +240,25 @@ int Hunspell::cleanword(char* dest,          ncap++;        if (csconv[(*q)].cupper == csconv[(*q)].clower)          nneutral++; -      *p++ = *q++; +      dest.push_back(*q++);        nl--;      }      // remember to terminate the destination string -    *p = '\0'; -    firstcap = csconv[(unsigned char)(*dest)].ccase; +    firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;    } else { -    unsigned short idx; -    w_char t[MAXWORDLEN]; -    nc = u8_u16(t, MAXWORDLEN, src); -    for (int i = 0; i < nc; i++) { -      idx = (t[i].h << 8) + t[i].l; +    std::vector<w_char> t; +    u8_u16(t, src); +    for (size_t i = 0; i < t.size(); ++i) { +      unsigned short idx = (t[i].h << 8) + t[i].l;        unsigned short low = unicodetolower(idx, langnum);        if (idx != low)          ncap++;        if (unicodetoupper(idx, langnum) == low)          nneutral++;      } -    u16_u8(dest, MAXWORDUTF8LEN, t, nc); +    u16_u8(dest, t);      if (ncap) { -      idx = (t[0].h << 8) + t[0].l; +      unsigned short idx = (t[0].h << 8) + t[0].l;        firstcap = (idx != unicodetolower(idx, langnum));      }    } @@ -282,117 +275,60 @@ int Hunspell::cleanword(char* dest,    } else {      *pcaptype = HUHCAP;    } -  return strlen(dest);  } -void Hunspell::mkallcap(char* p) { +void Hunspell::mkallcap(std::string& u8) {    if (utf8) { -    w_char u[MAXWORDLEN]; -    int nc = u8_u16(u, MAXWORDLEN, p); -    unsigned short idx; -    for (int i = 0; i < nc; i++) { -      idx = (u[i].h << 8) + u[i].l; -      if (idx != unicodetoupper(idx, langnum)) { -        u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); -        u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); -      } -    } -    u16_u8(p, MAXWORDUTF8LEN, u, nc); +    std::vector<w_char> u16; +    u8_u16(u16, u8); +    ::mkallcap_utf(u16, langnum); +    u16_u8(u8, u16);    } else { -    while (*p != '\0') { -      *p = csconv[((unsigned char)*p)].cupper; -      p++; -    } -  } -} - -int Hunspell::mkallcap2(char* p, w_char* u, int nc) { -  if (utf8) { -    unsigned short idx; -    for (int i = 0; i < nc; i++) { -      idx = (u[i].h << 8) + u[i].l; -      unsigned short up = unicodetoupper(idx, langnum); -      if (idx != up) { -        u[i].h = (unsigned char)(up >> 8); -        u[i].l = (unsigned char)(up & 0x00FF); -      } -    } -    u16_u8(p, MAXWORDUTF8LEN, u, nc); -    return strlen(p); -  } else { -    while (*p != '\0') { -      *p = csconv[((unsigned char)*p)].cupper; -      p++; -    } -  } -  return nc; -} - -void Hunspell::mkallsmall(char* p) { -  while (*p != '\0') { -    *p = csconv[((unsigned char)*p)].clower; -    p++; +    ::mkallcap(u8, csconv);    }  } -int Hunspell::mkallsmall2(char* p, w_char* u, int nc) { +int Hunspell::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {    if (utf8) { -    unsigned short idx; -    for (int i = 0; i < nc; i++) { -      idx = (u[i].h << 8) + u[i].l; -      unsigned short low = unicodetolower(idx, langnum); -      if (idx != low) { -        u[i].h = (unsigned char)(low >> 8); -        u[i].l = (unsigned char)(low & 0x00FF); -      } -    } -    u16_u8(p, MAXWORDUTF8LEN, u, nc); -    return strlen(p); +    ::mkallsmall_utf(u16, langnum); +    u16_u8(u8, u16);    } else { -    while (*p != '\0') { -      *p = csconv[((unsigned char)*p)].clower; -      p++; -    } +    ::mkallsmall(u8, csconv);    } -  return nc; +  return u8.size();  }  // convert UTF-8 sharp S codes to latin 1 -char* Hunspell::sharps_u8_l1(char* dest, char* source) { -  char* p = dest; -  *p = *source; -  for (p++, source++; *(source - 1); p++, source++) { -    *p = *source; -    if (*source == '\x9F') -      *--p = '\xDF'; -  } +std::string Hunspell::sharps_u8_l1(const std::string& source) { +  std::string dest(source); +  mystrrep(dest, "\xC3\x9F", "\xDF");    return dest;  }  // recursive search for right ss - sharp s permutations -hentry* Hunspell::spellsharps(char* base, -                              char* pos, +hentry* Hunspell::spellsharps(std::string& base, +                              size_t n_pos,                                int n,                                int repnum, -                              char* tmp,                                int* info,                                char** root) { -  pos = strstr(pos, "ss"); -  if (pos && (n < MAXSHARPS)) { -    *pos = '\xC3'; -    *(pos + 1) = '\x9F'; -    hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root); +  size_t pos = base.find("ss", n_pos); +  if (pos != std::string::npos && (n < MAXSHARPS)) { +    base[pos] = '\xC3'; +    base[pos + 1] = '\x9F'; +    hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);      if (h)        return h; -    *pos = 's'; -    *(pos + 1) = 's'; -    h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root); +    base[pos] = 's'; +    base[pos + 1] = 's'; +    h = spellsharps(base, pos + 2, n + 1, repnum, info, root);      if (h)        return h;    } else if (repnum > 0) {      if (utf8) -      return checkword(base, info, root); -    return checkword(sharps_u8_l1(tmp, base), info, root); +      return checkword(base.c_str(), info, root); +    std::string tmp(sharps_u8_l1(base)); +    return checkword(tmp.c_str(), info, root);    }    return NULL;  } @@ -403,7 +339,7 @@ int Hunspell::is_keepcase(const hentry* rv) {  }  /* insert a word to the beginning of the suggestion array and return ns */ -int Hunspell::insert_sug(char*** slst, char* word, int ns) { +int Hunspell::insert_sug(char*** slst, const char* word, int ns) {    if (!*slst)      return ns;    char* dup = mystrdup(word); @@ -421,11 +357,6 @@ int Hunspell::insert_sug(char*** slst, char* word, int ns) {  int Hunspell::spell(const char* word, int* info, char** root) {    struct hentry* rv = NULL; -  // need larger vector. For example, Turkish capital letter I converted a -  // 2-byte UTF-8 character (dotless i) by mkallsmall. -  char cw[MAXWORDUTF8LEN]; -  char wspace[MAXWORDUTF8LEN]; -  w_char unicw[MAXWORDLEN];    int info2 = 0;    if (!info) @@ -437,7 +368,6 @@ int Hunspell::spell(const char* word, int* info, char** root) {    if (strcmp(word, SPELL_XML) == 0)      return 1;    int nc = strlen(word); -  int wl2 = 0;    if (utf8) {      if (nc >= MAXWORDUTF8LEN)        return 0; @@ -445,19 +375,26 @@ int Hunspell::spell(const char* word, int* info, char** root) {      if (nc >= MAXWORDLEN)        return 0;    } -  int captype = 0; -  int abbv = 0; -  int wl = 0; +  int captype = NOCAP; +  size_t abbv = 0; +  size_t wl = 0; + +  std::string scw; +  std::vector<w_char> sunicw;    // input conversion    RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; -  int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; -  if (convstatus < 0) -    return 0; -  else if (convstatus > 0) -    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); -  else -    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); +  { +    std::string wspace; + +    int convstatus = rl ? rl->conv(word, wspace) : 0; +    if (convstatus < 0) +      return 0; +    else if (convstatus > 0) +      wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv); +    else +      wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv); +  }  #ifdef MOZILLA_CLIENT    // accept the abbreviated words without dots @@ -474,12 +411,12 @@ int Hunspell::spell(const char* word, int* info, char** root) {    // "..", "--" etc.)    enum { NBEGIN, NNUM, NSEP };    int nstate = NBEGIN; -  int i; +  size_t i;    for (i = 0; (i < wl); i++) { -    if ((cw[i] <= '9') && (cw[i] >= '0')) { +    if ((scw[i] <= '9') && (scw[i] >= '0')) {        nstate = NNUM; -    } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) { +    } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {        if ((nstate == NSEP) || (i == 0))          break;        nstate = NSEP; @@ -496,75 +433,75 @@ int Hunspell::spell(const char* word, int* info, char** root) {        *info += SPELL_ORIGCAP;      /* FALLTHROUGH */      case NOCAP: -      rv = checkword(cw, info, root); +      rv = checkword(scw.c_str(), info, root);        if ((abbv) && !(rv)) { -        memcpy(wspace, cw, wl); -        *(wspace + wl) = '.'; -        *(wspace + wl + 1) = '\0'; -        rv = checkword(wspace, info, root); +        std::string u8buffer(scw); +        u8buffer.push_back('.'); +        rv = checkword(u8buffer.c_str(), info, root);        }        break;      case ALLCAP: {        *info += SPELL_ORIGCAP; -      rv = checkword(cw, info, root); +      rv = checkword(scw.c_str(), info, root);        if (rv)          break;        if (abbv) { -        memcpy(wspace, cw, wl); -        *(wspace + wl) = '.'; -        *(wspace + wl + 1) = '\0'; -        rv = checkword(wspace, info, root); +        std::string u8buffer(scw); +        u8buffer.push_back('.'); +        rv = checkword(u8buffer.c_str(), info, root);          if (rv)            break;        }        // Spec. prefix handling for Catalan, French, Italian:        // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia). -      if (pAMgr && strchr(cw, '\'')) { -        mkallsmall2(cw, unicw, nc); -        // There are no really sane circumstances where this could fail, -        // but anyway... -        if (char* apostrophe = strchr(cw, '\'')) { +      size_t apos = pAMgr ? scw.find('\'') : std::string::npos; +      if (apos != std::string::npos) { +        mkallsmall2(scw, sunicw); +        //conversion may result in string with different len to pre-mkallsmall2 +        //so re-scan +        if (apos != std::string::npos && apos < scw.size() - 1) { +          std::string part1 = scw.substr(0, apos+1); +          std::string part2 = scw.substr(apos+1);            if (utf8) { -            w_char tmpword[MAXWORDLEN]; -            *apostrophe = '\0'; -            wl2 = u8_u16(tmpword, MAXWORDLEN, cw); -            *apostrophe = '\''; -            if (wl2 >= 0 && wl2 < nc) { -              mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1); -              rv = checkword(cw, info, root); -              if (rv) -                break; -            } +            std::vector<w_char> part1u, part2u; +            u8_u16(part1u, part1); +            u8_u16(part2u, part2); +            mkinitcap2(part2, part2u); +            scw = part1 + part2; +            sunicw = part1u; +            sunicw.insert(sunicw.end(), part2u.begin(), part2u.end()); +            rv = checkword(scw.c_str(), info, root); +            if (rv) +              break;            } else { -            mkinitcap2(apostrophe + 1, unicw, nc); -            rv = checkword(cw, info, root); +            mkinitcap2(part2, sunicw); +            scw = part1 + part2; +            rv = checkword(scw.c_str(), info, root);              if (rv)                break;            } +          mkinitcap2(scw, sunicw); +          rv = checkword(scw.c_str(), info, root); +          if (rv) +            break;          } -        mkinitcap2(cw, unicw, nc); -        rv = checkword(cw, info, root); -        if (rv) -          break;        } -      if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) { -        char tmpword[MAXWORDUTF8LEN]; -        wl = mkallsmall2(cw, unicw, nc); -        memcpy(wspace, cw, (wl + 1)); -        rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); +      if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) { + +        mkallsmall2(scw, sunicw); +        std::string u8buffer(scw); +        rv = spellsharps(u8buffer, 0, 0, 0, info, root);          if (!rv) { -          wl2 = mkinitcap2(cw, unicw, nc); -          rv = spellsharps(cw, cw, 0, 0, tmpword, info, root); +          mkinitcap2(scw, sunicw); +          rv = spellsharps(scw, 0, 0, 0, info, root);          }          if ((abbv) && !(rv)) { -          *(wspace + wl) = '.'; -          *(wspace + wl + 1) = '\0'; -          rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); +          u8buffer.push_back('.'); +          rv = spellsharps(u8buffer, 0, 0, 0, info, root);            if (!rv) { -            memcpy(wspace, cw, wl2); -            *(wspace + wl2) = '.'; -            *(wspace + wl2 + 1) = '\0'; -            rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); +            u8buffer = std::string(scw); +            u8buffer.push_back('.'); +            rv = spellsharps(u8buffer, 0, 0, 0, info, root);            }          }          if (rv) @@ -572,13 +509,14 @@ int Hunspell::spell(const char* word, int* info, char** root) {        }      }      case INITCAP: { +        *info += SPELL_ORIGCAP; -      wl = mkallsmall2(cw, unicw, nc); -      memcpy(wspace, cw, (wl + 1)); -      wl2 = mkinitcap2(cw, unicw, nc); +      mkallsmall2(scw, sunicw); +      std::string u8buffer(scw); +      mkinitcap2(scw, sunicw);        if (captype == INITCAP)          *info += SPELL_INITCAP; -      rv = checkword(cw, info, root); +      rv = checkword(scw.c_str(), info, root);        if (captype == INITCAP)          *info -= SPELL_INITCAP;        // forbid bad capitalization @@ -593,18 +531,16 @@ int Hunspell::spell(const char* word, int* info, char** root) {        if (rv)          break; -      rv = checkword(wspace, info, root); +      rv = checkword(u8buffer.c_str(), info, root);        if (abbv && !rv) { -        *(wspace + wl) = '.'; -        *(wspace + wl + 1) = '\0'; -        rv = checkword(wspace, info, root); +        u8buffer.push_back('.'); +        rv = checkword(u8buffer.c_str(), info, root);          if (!rv) { -          memcpy(wspace, cw, wl2); -          *(wspace + wl2) = '.'; -          *(wspace + wl2 + 1) = '\0'; +          u8buffer = scw; +          u8buffer.push_back('.');            if (captype == INITCAP)              *info += SPELL_INITCAP; -          rv = checkword(wspace, info, root); +          rv = checkword(u8buffer.c_str(), info, root);            if (captype == INITCAP)              *info -= SPELL_INITCAP;            if (rv && is_keepcase(rv) && (captype == ALLCAP)) @@ -617,8 +553,8 @@ int Hunspell::spell(const char* word, int* info, char** root) {             // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed             // in INITCAP form, too.             !(pAMgr->get_checksharps() && -             ((utf8 && strstr(wspace, "\xC3\x9F")) || -              (!utf8 && strchr(wspace, '\xDF')))))) +             ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) || +              (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))          rv = NULL;        break;      } @@ -637,67 +573,66 @@ int Hunspell::spell(const char* word, int* info, char** root) {    // recursive breaking at break points    if (wordbreak) { -    char* s; -    char r; +      int nbr = 0; -    wl = strlen(cw); +    wl = scw.size();      int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;      // calculate break points for recursion limit      for (int j = 0; j < numbreak; j++) { -      s = cw; -      do { -        s = (char*)strstr(s, wordbreak[j]); -        if (s) { -          nbr++; -          s++; -        } -      } while (s); +      size_t len = strlen(wordbreak[j]); +      size_t pos = 0; +      while ((pos = scw.find(wordbreak[j], pos, len)) != std::string::npos) { +        ++nbr; +        pos += len; +      }      }      if (nbr >= 10)        return 0;      // check boundary patterns (^begin and end$)      for (int j = 0; j < numbreak; j++) { -      int plen = strlen(wordbreak[j]); +      size_t plen = strlen(wordbreak[j]);        if (plen == 1 || plen > wl)          continue; +        if (wordbreak[j][0] == '^' && -          strncmp(cw, wordbreak[j] + 1, plen - 1) == 0 && spell(cw + plen - 1)) +          scw.compare(0, plen - 1, wordbreak[j] + 1, plen -1) == 0 && spell(scw.c_str() + plen - 1))          return 1; +        if (wordbreak[j][plen - 1] == '$' && -          strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) { -        r = cw[wl - plen + 1]; -        cw[wl - plen + 1] = '\0'; -        if (spell(cw)) +          scw.compare(wl - plen + 1, plen - 1, wordbreak[j], plen - 1) == 0) { +        char r = scw[wl - plen + 1]; +        scw[wl - plen + 1] = '\0'; +        if (spell(scw.c_str()))            return 1; -        cw[wl - plen + 1] = r; +        scw[wl - plen + 1] = r;        }      }      // other patterns      for (int j = 0; j < numbreak; j++) { -      int plen = strlen(wordbreak[j]); -      s = (char*)strstr(cw, wordbreak[j]); -      if (s && (s > cw) && (s < cw + wl - plen)) { -        if (!spell(s + plen)) +      size_t plen = strlen(wordbreak[j]); +      size_t found = scw.find(wordbreak[j]); +      if ((found > 0) && (found < wl - plen)) { +        if (!spell(scw.c_str() + found + plen))            continue; -        r = *s; -        *s = '\0'; +        char r = scw[found]; +        scw[found] = '\0';          // examine 2 sides of the break point -        if (spell(cw)) +        if (spell(scw.c_str()))            return 1; -        *s = r; +        scw[found] = r;          // LANG_hu: spec. dash rule          if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) { -          r = s[1]; -          s[1] = '\0'; -          if (spell(cw)) +          r = scw[found + 1]; +          scw[found + 1] = '\0'; +          if (spell(scw.c_str()))              return 1;  // check the first part with dash -          s[1] = r; +          scw[found + 1] = r;          } -        // end of LANG speficic region +        // end of LANG specific region        }      }    } @@ -716,10 +651,9 @@ struct hentry* Hunspell::checkword(const char* w, int* info, char** root) {    if (ignoredchars != NULL) {      w2.assign(w);      if (utf8) { -      int ignoredchars_utf16_len; -      unsigned short* ignoredchars_utf16 = -          pAMgr->get_ignore_utf16(&ignoredchars_utf16_len); -      remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len); +      const std::vector<w_char>& ignoredchars_utf16 = +          pAMgr->get_ignore_utf16(); +      remove_ignored_chars_utf(w2, ignoredchars_utf16);      } else {        remove_ignored_chars(w2, ignoredchars);      } @@ -802,37 +736,40 @@ struct hentry* Hunspell::checkword(const char* w, int* info, char** root) {          return NULL;        }        if (root) { -        *root = mystrdup(he->word); -        if (*root && complexprefixes) { +        std::string word_root(he->word); +        if (complexprefixes) {            if (utf8) -            reverseword_utf(*root); +            reverseword_utf(word_root);            else -            reverseword(*root); +            reverseword(word_root);          } +        *root = mystrdup(word_root.c_str());        }        // try check compound word      } else if (pAMgr->get_compound()) { -      he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info); +      struct hentry* rwords[100];  // buffer for COMPOUND pattern checking +      he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);        // LANG_hu section: `moving rule' with last dash        if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {          char* dup = mystrdup(word);          if (!dup)            return NULL;          dup[len - 1] = '\0'; -        he = pAMgr->compound_check(dup, len - 1, -5, 0, 100, 0, NULL, 1, 0, +        he = pAMgr->compound_check(dup, len - 1, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0,                                     info);          free(dup);        } -      // end of LANG speficic region +      // end of LANG specific region        if (he) {          if (root) { -          *root = mystrdup(he->word); -          if (*root && complexprefixes) { +          std::string word_root(he->word); +          if (complexprefixes) {              if (utf8) -              reverseword_utf(*root); +              reverseword_utf(word_root);              else -              reverseword(*root); +              reverseword(word_root);            } +          *root = mystrdup(word_root.c_str());          }          if (info)            *info += SPELL_COMPOUND; @@ -845,11 +782,8 @@ struct hentry* Hunspell::checkword(const char* w, int* info, char** root) {  int Hunspell::suggest(char*** slst, const char* word) {    int onlycmpdsug = 0; -  char cw[MAXWORDUTF8LEN]; -  char wspace[MAXWORDUTF8LEN];    if (!pSMgr || maxdic == 0)      return 0; -  w_char unicw[MAXWORDLEN];    *slst = NULL;    // process XML input of the simplified API (see manual)    if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) { @@ -863,130 +797,132 @@ int Hunspell::suggest(char*** slst, const char* word) {      if (nc >= MAXWORDLEN)        return 0;    } -  int captype = 0; -  int abbv = 0; -  int wl = 0; +  int captype = NOCAP; +  size_t abbv = 0; +  size_t wl = 0; + +  std::string scw; +  std::vector<w_char> sunicw;    // input conversion    RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; -  int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; -  if (convstatus < 0) -    return 0; -  else if (convstatus > 0) -    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); -  else -    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); +  { +    std::string wspace; + +    int convstatus = rl ? rl->conv(word, wspace) : 0; +    if (convstatus < 0) +      return 0; +    else if (convstatus > 0) +      wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv); +    else +      wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv); + +    if (wl == 0) +      return 0; +  } -  if (wl == 0) -    return 0;    int ns = 0;    int capwords = 0;    // check capitalized form for FORCEUCASE    if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {      int info = SPELL_ORIGCAP; -    char** wlst; -    if (checkword(cw, &info, NULL)) { -      if (*slst) { -        wlst = *slst; -      } else { -        wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*)); -        if (wlst == NULL) -          return -1; -        *slst = wlst; -        for (int i = 0; i < MAXSUGGESTION; i++) { -          wlst[i] = NULL; -        } +    if (checkword(scw.c_str(), &info, NULL)) { +      std::string form(scw); +      mkinitcap(form); + +      char** wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*)); +      if (wlst == NULL) +        return -1; +      *slst = wlst; +      wlst[0] = mystrdup(form.c_str()); +      for (int i = 1; i < MAXSUGGESTION; ++i) { +        wlst[i] = NULL;        } -      wlst[0] = mystrdup(cw); -      mkinitcap(wlst[0]); +        return 1;      }    }    switch (captype) {      case NOCAP: { -      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); +      ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug);        break;      }      case INITCAP: {        capwords = 1; -      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); +      ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug);        if (ns == -1)          break; -      memcpy(wspace, cw, (wl + 1)); -      mkallsmall2(wspace, unicw, nc); -      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); +      std::string wspace(scw); +      mkallsmall2(wspace, sunicw); +      ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);        break;      }      case HUHINITCAP:        capwords = 1;      case HUHCAP: { -      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); +      ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug);        if (ns != -1) { -        int prevns;          // something.The -> something. The -        char* dot = strchr(cw, '.'); -        if (dot && (dot > cw)) { +        size_t dot_pos = scw.find('.'); +        if (dot_pos != std::string::npos) { +          std::string postdot = scw.substr(dot_pos + 1);            int captype_;            if (utf8) { -            w_char w_[MAXWORDLEN]; -            int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1); -            captype_ = get_captype_utf8(w_, wl_, langnum); -          } else -            captype_ = get_captype(dot + 1, strlen(dot + 1), csconv); +            std::vector<w_char> postdotu; +            u8_u16(postdotu, postdot); +            captype_ = get_captype_utf8(postdotu, langnum); +          } else { +            captype_ = get_captype(postdot, csconv); +          }            if (captype_ == INITCAP) { -            char* st = mystrdup(cw); -            if (st) { -              char* newst = (char*)realloc(st, wl + 2); -              if (newst == NULL) -                free(st); -              st = newst; -            } -            if (st) { -              st[(dot - cw) + 1] = ' '; -              strcpy(st + (dot - cw) + 2, dot + 1); -              ns = insert_sug(slst, st, ns); -              free(st); -            } +            std::string str(scw); +            str.insert(dot_pos + 1, 1, ' '); +            ns = insert_sug(slst, str.c_str(), ns);            }          } + +        std::string wspace; +          if (captype == HUHINITCAP) {            // TheOpenOffice.org -> The OpenOffice.org -          memcpy(wspace, cw, (wl + 1)); -          mkinitsmall2(wspace, unicw, nc); -          ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); +          wspace = scw; +          mkinitsmall2(wspace, sunicw); +          ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);          } -        memcpy(wspace, cw, (wl + 1)); -        mkallsmall2(wspace, unicw, nc); -        if (spell(wspace)) -          ns = insert_sug(slst, wspace, ns); -        prevns = ns; -        ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); +        wspace = scw; +        mkallsmall2(wspace, sunicw); +        if (spell(wspace.c_str())) +          ns = insert_sug(slst, wspace.c_str(), ns); +        int prevns = ns; +        ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);          if (captype == HUHINITCAP) { -          mkinitcap2(wspace, unicw, nc); -          if (spell(wspace)) -            ns = insert_sug(slst, wspace, ns); -          ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); +          mkinitcap2(wspace, sunicw); +          if (spell(wspace.c_str())) +            ns = insert_sug(slst, wspace.c_str(), ns); +          ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);          }          // aNew -> "a New" (instead of "a new")          for (int j = prevns; j < ns; j++) {            char* space = strchr((*slst)[j], ' ');            if (space) { -            int slen = strlen(space + 1); +            size_t slen = strlen(space + 1);              // different case after space (need capitalisation) -            if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) { -              w_char w[MAXWORDLEN]; -              int wc = 0; -              char* r = (*slst)[j]; +            if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) { +              std::string first((*slst)[j], space + 1); +              std::string second(space + 1); +              std::vector<w_char> w;                if (utf8) -                wc = u8_u16(w, MAXWORDLEN, space + 1); -              mkinitcap2(space + 1, w, wc); +                u8_u16(w, second); +              mkinitcap2(second, w);                // set as first suggestion +              char* r = (*slst)[j];                for (int k = j; k > 0; k--)                  (*slst)[k] = (*slst)[k - 1]; -              (*slst)[0] = r; +              free(r); +              (*slst)[0] = mystrdup((first + second).c_str());              }            }          } @@ -995,35 +931,30 @@ int Hunspell::suggest(char*** slst, const char* word) {      }      case ALLCAP: { -      memcpy(wspace, cw, (wl + 1)); -      mkallsmall2(wspace, unicw, nc); -      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); +      std::string wspace(scw); +      mkallsmall2(wspace, sunicw); +      ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);        if (ns == -1)          break; -      if (pAMgr && pAMgr->get_keepcase() && spell(wspace)) -        ns = insert_sug(slst, wspace, ns); -      mkinitcap2(wspace, unicw, nc); -      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); +      if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str())) +        ns = insert_sug(slst, wspace.c_str(), ns); +      mkinitcap2(wspace, sunicw); +      ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);        for (int j = 0; j < ns; j++) { -        mkallcap((*slst)[j]); +        std::string form((*slst)[j]); +        mkallcap(form); +          if (pAMgr && pAMgr->get_checksharps()) { -          char* pos;            if (utf8) { -            pos = strstr((*slst)[j], "\xC3\x9F"); -            while (pos) { -              *pos = 'S'; -              *(pos + 1) = 'S'; -              pos = strstr(pos + 2, "\xC3\x9F"); -            } +            mystrrep(form, "\xC3\x9F", "SS");            } else { -            pos = strchr((*slst)[j], '\xDF'); -            while (pos) { -              (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 2); -              mystrrep((*slst)[j], "\xDF", "SS"); -              pos = strchr((*slst)[j], '\xDF'); -            } +            mystrrep(form, "\xDF", "SS");            }          } + +        free((*slst)[j]); +        (*slst)[j] = mystrdup(form.c_str()); +        }        break;      } @@ -1035,11 +966,10 @@ int Hunspell::suggest(char*** slst, const char* word) {        char* pos = strchr((*slst)[j], '-');        if (pos) {          int info; -        char w[MAXWORDUTF8LEN];          *pos = '\0'; -        strcpy(w, (*slst)[j]); -        strcat(w, pos + 1); -        (void)spell(w, &info, NULL); +        std::string w((*slst)[j]); +        w.append(pos + 1); +        (void)spell(w.c_str(), &info, NULL);          if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {            *pos = ' ';          } else @@ -1054,64 +984,67 @@ int Hunspell::suggest(char*** slst, const char* word) {        (*slst)) {      switch (captype) {        case NOCAP: { -        ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic); +        ns = pSMgr->ngsuggest(*slst, scw.c_str(), ns, pHMgr, maxdic);          break;        }        case HUHINITCAP:          capwords = 1;        case HUHCAP: { -        memcpy(wspace, cw, (wl + 1)); -        mkallsmall2(wspace, unicw, nc); -        ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); +        std::string wspace(scw); +        mkallsmall2(wspace, sunicw); +        ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic);          break;        }        case INITCAP: {          capwords = 1; -        memcpy(wspace, cw, (wl + 1)); -        mkallsmall2(wspace, unicw, nc); -        ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); +        std::string wspace(scw); +        mkallsmall2(wspace, sunicw); +        ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic);          break;        }        case ALLCAP: { -        memcpy(wspace, cw, (wl + 1)); -        mkallsmall2(wspace, unicw, nc); +        std::string wspace(scw); +        mkallsmall2(wspace, sunicw);          int oldns = ns; -        ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); -        for (int j = oldns; j < ns; j++) -          mkallcap((*slst)[j]); +        ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic); +        for (int j = oldns; j < ns; j++) { +          std::string form((*slst)[j]); +          mkallcap(form); +          free((*slst)[j]); +          (*slst)[j] = mystrdup(form.c_str()); +        }          break;        }      }    }    // try dash suggestion (Afo-American -> Afro-American) -  if (char* pos = strchr(cw, '-')) { -    char* ppos = cw; +  size_t dash_pos = scw.find('-'); +  if (dash_pos != std::string::npos) {      int nodashsug = 1; -    char** nlst = NULL; -    int nn = 0; -    int last = 0; -    if (*slst) { -      for (int j = 0; j < ns && nodashsug == 1; j++) { -        if (strchr((*slst)[j], '-')) -          nodashsug = 0; -      } +    for (int j = 0; j < ns && nodashsug == 1; j++) { +      if (strchr((*slst)[j], '-')) +        nodashsug = 0;      } + +    size_t prev_pos = 0; +    bool last = false; +      while (nodashsug && !last) { -      if (*pos == '\0') +      if (dash_pos == scw.size())          last = 1; -      else -        *pos = '\0'; -      if (!spell(ppos)) { -        nn = suggest(&nlst, ppos); +      std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos); +      if (!spell(chunk.c_str())) { +        char** nlst = NULL; +        int nn = suggest(&nlst, chunk.c_str());          for (int j = nn - 1; j >= 0; j--) { -          strncpy(wspace, cw, ppos - cw); -          strcpy(wspace + (ppos - cw), nlst[j]); +          std::string wspace = scw.substr(0, prev_pos); +          wspace.append(nlst[j]);            if (!last) { -            strcat(wspace, "-"); -            strcat(wspace, pos + 1); +            wspace.append("-"); +            wspace.append(scw.substr(dash_pos + 1));            } -          ns = insert_sug(slst, wspace, ns); +          ns = insert_sug(slst, wspace.c_str(), ns);            free(nlst[j]);          }          if (nlst != NULL) @@ -1119,29 +1052,34 @@ int Hunspell::suggest(char*** slst, const char* word) {          nodashsug = 0;        }        if (!last) { -        *pos = '-'; -        ppos = pos + 1; -        pos = strchr(ppos, '-'); +        prev_pos = dash_pos + 1; +        dash_pos = scw.find('-', prev_pos);        } -      if (!pos) -        pos = cw + strlen(cw); +      if (dash_pos == std::string::npos) +        dash_pos = scw.size();      }    }    // word reversing wrapper for complex prefixes    if (complexprefixes) {      for (int j = 0; j < ns; j++) { +      std::string root((*slst)[j]); +      free((*slst)[j]);        if (utf8) -        reverseword_utf((*slst)[j]); +        reverseword_utf(root);        else -        reverseword((*slst)[j]); +        reverseword(root); +      (*slst)[j] = mystrdup(root.c_str());      }    }    // capitalize    if (capwords)      for (int j = 0; j < ns; j++) { -      mkinitcap((*slst)[j]); +      std::string form((*slst)[j]); +      free((*slst)[j]); +      mkinitcap(form); +      (*slst)[j] = mystrdup(form.c_str());      }    // expand suggestions with dot(s) @@ -1160,25 +1098,23 @@ int Hunspell::suggest(char*** slst, const char* word) {          int l = 0;          for (int j = 0; j < ns; j++) {            if (!strchr((*slst)[j], ' ') && !spell((*slst)[j])) { -            char s[MAXSWUTF8L]; -            w_char w[MAXSWL]; -            int len; +            std::string s; +            std::vector<w_char> w;              if (utf8) { -              len = u8_u16(w, MAXSWL, (*slst)[j]); +              u8_u16(w, (*slst)[j]);              } else { -              strcpy(s, (*slst)[j]); -              len = strlen(s); +              s = (*slst)[j];              } -            mkallsmall2(s, w, len); +            mkallsmall2(s, w);              free((*slst)[j]); -            if (spell(s)) { -              (*slst)[l] = mystrdup(s); +            if (spell(s.c_str())) { +              (*slst)[l] = mystrdup(s.c_str());                if ((*slst)[l])                  l++;              } else { -              mkinitcap2(s, w, len); -              if (spell(s)) { -                (*slst)[l] = mystrdup(s); +              mkinitcap2(s, w); +              if (spell(s.c_str())) { +                (*slst)[l] = mystrdup(s.c_str());                  if ((*slst)[l])                    l++;                } @@ -1211,9 +1147,10 @@ int Hunspell::suggest(char*** slst, const char* word) {    // output conversion    rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;    for (int j = 0; rl && j < ns; j++) { -    if (rl->conv((*slst)[j], wspace, MAXWORDUTF8LEN) > 0) { +    std::string wspace; +    if (rl->conv((*slst)[j], wspace) > 0) {        free((*slst)[j]); -      (*slst)[j] = mystrdup(wspace); +      (*slst)[j] = mystrdup(wspace.c_str());      }    } @@ -1233,151 +1170,25 @@ char* Hunspell::get_dic_encoding() {    return encoding;  } -#ifdef HUNSPELL_EXPERIMENTAL -// XXX UTF-8 support is OK? -int Hunspell::suggest_auto(char*** slst, const char* word) { -  char cw[MAXWORDUTF8LEN]; -  char wspace[MAXWORDUTF8LEN]; -  if (!pSMgr || maxdic == 0) -    return 0; -  w_char unicw[MAXWORDLEN]; -  int nc = strlen(word); -  if (utf8) { -    if (nc >= MAXWORDUTF8LEN) -      return 0; -  } else { -    if (nc >= MAXWORDLEN) -      return 0; -  } -  int captype = 0; -  int abbv = 0; -  int wl = 0; - -  // input conversion -  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; -  int convstatus = rl ? rl->conv(word, wspace) : 0; -  if (convstatus < 0) -    return 0; -  else if (convstatus > 0) -    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); -  else -    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - -  if (wl == 0) -    return 0; -  int ns = 0; -  *slst = NULL;  // HU, nsug in pSMgr->suggest - -  switch (captype) { -    case NOCAP: { -      ns = pSMgr->suggest_auto(slst, cw, ns); -      if (ns > 0) -        break; -      break; -    } - -    case INITCAP: { -      memcpy(wspace, cw, (wl + 1)); -      mkallsmall2(wspace, unicw, nc); -      ns = pSMgr->suggest_auto(slst, wspace, ns); -      for (int j = 0; j < ns; j++) -        mkinitcap((*slst)[j]); -      ns = pSMgr->suggest_auto(slst, cw, ns); -      break; -    } - -    case HUHINITCAP: -    case HUHCAP: { -      ns = pSMgr->suggest_auto(slst, cw, ns); -      if (ns == 0) { -        memcpy(wspace, cw, (wl + 1)); -        mkallsmall2(wspace, unicw, nc); -        ns = pSMgr->suggest_auto(slst, wspace, ns); -      } -      break; -    } - -    case ALLCAP: { -      memcpy(wspace, cw, (wl + 1)); -      mkallsmall2(wspace, unicw, nc); -      ns = pSMgr->suggest_auto(slst, wspace, ns); - -      mkinitcap(wspace); -      ns = pSMgr->suggest_auto(slst, wspace, ns); - -      for (int j = 0; j < ns; j++) -        mkallcap((*slst)[j]); -      break; -    } -  } - -  // word reversing wrapper for complex prefixes -  if (complexprefixes) { -    for (int j = 0; j < ns; j++) { -      if (utf8) -        reverseword_utf((*slst)[j]); -      else -        reverseword((*slst)[j]); -    } -  } - -  // expand suggestions with dot(s) -  if (abbv && pAMgr && pAMgr->get_sugswithdots()) { -    for (int j = 0; j < ns; j++) { -      (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv); -      strcat((*slst)[j], word + strlen(word) - abbv); -    } -  } - -  // LANG_hu section: replace '-' with ' ' in Hungarian -  if (langnum == LANG_hu) { -    for (int j = 0; j < ns; j++) { -      char* pos = strchr((*slst)[j], '-'); -      if (pos) { -        int info; -        char w[MAXWORDUTF8LEN]; -        *pos = '\0'; -        strcpy(w, (*slst)[j]); -        strcat(w, pos + 1); -        spell(w, &info, NULL); -        if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { -          *pos = ' '; -        } else -          *pos = '-'; -      } -    } -  } - -  // output conversion -  rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; -  for (int j = 0; rl && j < ns; j++) { -    if (rl->conv((*slst)[j], wspace) > 0) { -      free((*slst)[j]); -      (*slst)[j] = mystrdup(wspace); -    } -  } - -  // END OF LANG_hu section -  return ns; -} -#endif -  int Hunspell::stem(char*** slst, char** desc, int n) { -  char result[MAXLNLEN]; -  char result2[MAXLNLEN]; + +  std::string result2;    *slst = NULL;    if (n == 0)      return 0; -  *result2 = '\0';    for (int i = 0; i < n; i++) { -    *result = '\0'; + +    std::string result; +      // add compound word parts (except the last one)      char* s = (char*)desc[i];      char* part = strstr(s, MORPH_PART);      if (part) {        char* nextpart = strstr(part + 1, MORPH_PART);        while (nextpart) { -        copy_field(result + strlen(result), part, MORPH_PART); +        std::string field; +        copy_field(field, part, MORPH_PART); +        result.append(field);          part = nextpart;          nextpart = strstr(part + 1, MORPH_PART);        } @@ -1404,22 +1215,28 @@ int Hunspell::stem(char*** slst, char** desc, int n) {            int genl = line_tok(sg, &gen, MSEP_REC);            free(sg);            for (int j = 0; j < genl; j++) { -            sprintf(result2 + strlen(result2), "%c%s%s", MSEP_REC, result, -                    gen[j]); +            result2.push_back(MSEP_REC); +            result2.append(result); +            result2.append(gen[j]);            }            freelist(&gen, genl);          }        } else { -        sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result); +        result2.push_back(MSEP_REC); +        result2.append(result);          if (strstr(pl[k], MORPH_SURF_PFX)) { -          copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX); +          std::string field; +          copy_field(field, pl[k], MORPH_SURF_PFX); +          result2.append(field);          } -        copy_field(result2 + strlen(result2), pl[k], MORPH_STEM); +        std::string field; +        copy_field(field, pl[k], MORPH_STEM); +        result2.append(field);        }      }      freelist(&pl, pln);    } -  int sln = line_tok(result2, slst, MSEP_REC); +  int sln = line_tok(result2.c_str(), slst, MSEP_REC);    return uniqlist(*slst, sln);  } @@ -1431,148 +1248,43 @@ int Hunspell::stem(char*** slst, const char* word) {    return pln2;  } -#ifdef HUNSPELL_EXPERIMENTAL -int Hunspell::suggest_pos_stems(char*** slst, const char* word) { -  char cw[MAXWORDUTF8LEN]; -  char wspace[MAXWORDUTF8LEN]; -  if (!pSMgr || maxdic == 0) -    return 0; -  w_char unicw[MAXWORDLEN]; -  int nc = strlen(word); -  if (utf8) { -    if (nc >= MAXWORDUTF8LEN) -      return 0; -  } else { -    if (nc >= MAXWORDLEN) -      return 0; -  } -  int captype = 0; -  int abbv = 0; -  int wl = 0; - -  // input conversion -  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; -  int convstatus = rl ? rl->conv(word, wspace) : 0; -  if (convstatus < 0) -    return 0; -  else if (convstatus > 0) -    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); -  else -    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - -  if (wl == 0) -    return 0; - -  int ns = 0;  // ns=0 = normalized input - -  *slst = NULL;  // HU, nsug in pSMgr->suggest - -  switch (captype) { -    case HUHCAP: -    case NOCAP: { -      ns = pSMgr->suggest_pos_stems(slst, cw, ns); - -      if ((abbv) && (ns == 0)) { -        memcpy(wspace, cw, wl); -        *(wspace + wl) = '.'; -        *(wspace + wl + 1) = '\0'; -        ns = pSMgr->suggest_pos_stems(slst, wspace, ns); -      } - -      break; -    } - -    case INITCAP: { -      ns = pSMgr->suggest_pos_stems(slst, cw, ns); - -      if (ns == 0 || ((*slst)[0][0] == '#')) { -        memcpy(wspace, cw, (wl + 1)); -        mkallsmall2(wspace, unicw, nc); -        ns = pSMgr->suggest_pos_stems(slst, wspace, ns); -      } - -      break; -    } - -    case ALLCAP: { -      ns = pSMgr->suggest_pos_stems(slst, cw, ns); -      if (ns != 0) -        break; - -      memcpy(wspace, cw, (wl + 1)); -      mkallsmall2(wspace, unicw, nc); -      ns = pSMgr->suggest_pos_stems(slst, wspace, ns); - -      if (ns == 0) { -        mkinitcap(wspace); -        ns = pSMgr->suggest_pos_stems(slst, wspace, ns); -      } -      break; -    } -  } - -  // output conversion -  rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; -  for (int j = 0; rl && j < ns; j++) { -    if (rl->conv((*slst)[j], wspace) > 0) { -      free((*slst)[j]); -      (*slst)[j] = mystrdup(wspace); -    } -  } - -  return ns; -} -#endif  // END OF HUNSPELL_EXPERIMENTAL CODE -  const char* Hunspell::get_wordchars() {    return pAMgr->get_wordchars();  } -unsigned short* Hunspell::get_wordchars_utf16(int* len) { -  return pAMgr->get_wordchars_utf16(len); +const std::vector<w_char>& Hunspell::get_wordchars_utf16() { +  return pAMgr->get_wordchars_utf16();  } -void Hunspell::mkinitcap(char* p) { -  if (!utf8) { -    if (*p != '\0') -      *p = csconv[((unsigned char)*p)].cupper; +void Hunspell::mkinitcap(std::string& u8) { +  if (utf8) { +    std::vector<w_char> u16; +    u8_u16(u16, u8); +    ::mkinitcap_utf(u16, langnum); +    u16_u8(u8, u16);    } else { -    int len; -    w_char u[MAXWORDLEN]; -    len = u8_u16(u, MAXWORDLEN, p); -    unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum); -    u[0].h = (unsigned char)(i >> 8); -    u[0].l = (unsigned char)(i & 0x00FF); -    u16_u8(p, MAXWORDUTF8LEN, u, len); +    ::mkinitcap(u8, csconv);    }  } -int Hunspell::mkinitcap2(char* p, w_char* u, int nc) { -  if (!utf8) { -    if (*p != '\0') -      *p = csconv[((unsigned char)*p)].cupper; -  } else if (nc > 0) { -    unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum); -    u[0].h = (unsigned char)(i >> 8); -    u[0].l = (unsigned char)(i & 0x00FF); -    u16_u8(p, MAXWORDUTF8LEN, u, nc); -    return strlen(p); +int Hunspell::mkinitcap2(std::string& u8, std::vector<w_char>& u16) { +  if (utf8) { +    ::mkinitcap_utf(u16, langnum); +    u16_u8(u8, u16); +  } else { +    ::mkinitcap(u8, csconv);    } -  return nc; +  return u8.size();  } -int Hunspell::mkinitsmall2(char* p, w_char* u, int nc) { -  if (!utf8) { -    if (*p != '\0') -      *p = csconv[((unsigned char)*p)].clower; -  } else if (nc > 0) { -    unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum); -    u[0].h = (unsigned char)(i >> 8); -    u[0].l = (unsigned char)(i & 0x00FF); -    u16_u8(p, MAXWORDUTF8LEN, u, nc); -    return strlen(p); +int Hunspell::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) { +  if (utf8) { +    ::mkinitsmall_utf(u16, langnum); +    u16_u8(u8, u16); +  } else { +    ::mkinitsmall(u8, csconv);    } -  return nc; +  return u8.size();  }  int Hunspell::add(const char* word) { @@ -1601,20 +1313,16 @@ struct cs_info* Hunspell::get_csconv() {    return csconv;  } -void Hunspell::cat_result(char* result, char* st) { +void Hunspell::cat_result(std::string& result, char* st) {    if (st) { -    if (*result) -      mystrcat(result, "\n", MAXLNLEN); -    mystrcat(result, st, MAXLNLEN); +    if (!result.empty()) +      result.append("\n"); +    result.append(st);      free(st);    }  }  int Hunspell::analyze(char*** slst, const char* word) { -  char cw[MAXWORDUTF8LEN]; -  char wspace[MAXWORDUTF8LEN]; -  w_char unicw[MAXWORDLEN]; -  int wl2 = 0;    *slst = NULL;    if (!pSMgr || maxdic == 0)      return 0; @@ -1626,48 +1334,52 @@ int Hunspell::analyze(char*** slst, const char* word) {      if (nc >= MAXWORDLEN)        return 0;    } -  int captype = 0; -  int abbv = 0; -  int wl = 0; +  int captype = NOCAP; +  size_t abbv = 0; +  size_t wl = 0; + +  std::string scw; +  std::vector<w_char> sunicw;    // input conversion    RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; -  int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; -  if (convstatus < 0) -    return 0; -  else if (convstatus > 0) -    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); -  else -    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); +  { +    std::string wspace; + +    int convstatus = rl ? rl->conv(word, wspace) : 0; +    if (convstatus < 0) +      return 0; +    else if (convstatus > 0) +      wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv); +    else +      wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv); +  }    if (wl == 0) {      if (abbv) { +      scw.clear();        for (wl = 0; wl < abbv; wl++) -        cw[wl] = '.'; -      cw[wl] = '\0'; +        scw.push_back('.');        abbv = 0;      } else        return 0;    } -  char result[MAXLNLEN]; -  char* st = NULL; - -  *result = '\0'; +  std::string result; -  int n = 0; -  int n2 = 0; -  int n3 = 0; +  size_t n = 0; +  size_t n2 = 0; +  size_t n3 = 0;    // test numbers    // LANG_hu section: set dash information for suggestions    if (langnum == LANG_hu) { -    while ((n < wl) && (((cw[n] <= '9') && (cw[n] >= '0')) || -                        (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) { +    while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) || +                        (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {        n++; -      if ((cw[n] == '.') || (cw[n] == ',')) { +      if ((scw[n] == '.') || (scw[n] == ',')) {          if (((n2 == 0) && (n > 3)) || -            ((n2 > 0) && ((cw[n - 1] == '.') || (cw[n - 1] == ',')))) +            ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))            break;          n2++;          n3 = n; @@ -1676,21 +1388,21 @@ int Hunspell::analyze(char*** slst, const char* word) {      if ((n == wl) && (n3 > 0) && (n - n3 > 3))        return 0; -    if ((n == wl) || ((n > 0) && ((cw[n] == '%') || (cw[n] == '\xB0')) && -                      checkword(cw + n, NULL, NULL))) { -      mystrcat(result, cw, MAXLNLEN); -      result[n - 1] = '\0'; +    if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) && +                      checkword(scw.c_str() + n, NULL, NULL))) { +      result.append(scw); +      result.resize(n - 1);        if (n == wl) -        cat_result(result, pSMgr->suggest_morph(cw + n - 1)); +        cat_result(result, pSMgr->suggest_morph(scw.c_str() + n - 1));        else { -        char sign = cw[n]; -        cw[n] = '\0'; -        cat_result(result, pSMgr->suggest_morph(cw + n - 1)); -        mystrcat(result, "+", MAXLNLEN);  // XXX SPEC. MORPHCODE -        cw[n] = sign; -        cat_result(result, pSMgr->suggest_morph(cw + n)); +        char sign = scw[n]; +        scw[n] = '\0'; +        cat_result(result, pSMgr->suggest_morph(scw.c_str() + n - 1)); +        result.push_back('+');  // XXX SPEC. MORPHCODE +        scw[n] = sign; +        cat_result(result, pSMgr->suggest_morph(scw.c_str() + n));        } -      return line_tok(result, slst, MSEP_REC); +      return line_tok(result.c_str(), slst, MSEP_REC);      }    }    // END OF LANG_hu section @@ -1699,64 +1411,58 @@ int Hunspell::analyze(char*** slst, const char* word) {      case HUHCAP:      case HUHINITCAP:      case NOCAP: { -      cat_result(result, pSMgr->suggest_morph(cw)); +      cat_result(result, pSMgr->suggest_morph(scw.c_str()));        if (abbv) { -        memcpy(wspace, cw, wl); -        *(wspace + wl) = '.'; -        *(wspace + wl + 1) = '\0'; -        cat_result(result, pSMgr->suggest_morph(wspace)); +        std::string u8buffer(scw); +        u8buffer.push_back('.'); +        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));        }        break;      }      case INITCAP: { -      wl = mkallsmall2(cw, unicw, nc); -      memcpy(wspace, cw, (wl + 1)); -      wl2 = mkinitcap2(cw, unicw, nc); -      cat_result(result, pSMgr->suggest_morph(wspace)); -      cat_result(result, pSMgr->suggest_morph(cw)); +      wl = mkallsmall2(scw, sunicw); +      std::string u8buffer(scw); +      mkinitcap2(scw, sunicw); +      cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); +      cat_result(result, pSMgr->suggest_morph(scw.c_str()));        if (abbv) { -        *(wspace + wl) = '.'; -        *(wspace + wl + 1) = '\0'; -        cat_result(result, pSMgr->suggest_morph(wspace)); +        u8buffer.push_back('.'); +        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); -        memcpy(wspace, cw, wl2); -        *(wspace + wl2) = '.'; -        *(wspace + wl2 + 1) = '\0'; +        u8buffer = scw; +        u8buffer.push_back('.'); -        cat_result(result, pSMgr->suggest_morph(wspace)); +        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));        }        break;      }      case ALLCAP: { -      cat_result(result, pSMgr->suggest_morph(cw)); +      cat_result(result, pSMgr->suggest_morph(scw.c_str()));        if (abbv) { -        memcpy(wspace, cw, wl); -        *(wspace + wl) = '.'; -        *(wspace + wl + 1) = '\0'; -        cat_result(result, pSMgr->suggest_morph(cw)); +        std::string u8buffer(scw); +        u8buffer.push_back('.'); +        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));        } -      wl = mkallsmall2(cw, unicw, nc); -      memcpy(wspace, cw, (wl + 1)); -      wl2 = mkinitcap2(cw, unicw, nc); +      mkallsmall2(scw, sunicw); +      std::string u8buffer(scw); +      mkinitcap2(scw, sunicw); -      cat_result(result, pSMgr->suggest_morph(wspace)); -      cat_result(result, pSMgr->suggest_morph(cw)); +      cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); +      cat_result(result, pSMgr->suggest_morph(scw.c_str()));        if (abbv) { -        *(wspace + wl) = '.'; -        *(wspace + wl + 1) = '\0'; -        cat_result(result, pSMgr->suggest_morph(wspace)); +        u8buffer.push_back('.'); +        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); -        memcpy(wspace, cw, wl2); -        *(wspace + wl2) = '.'; -        *(wspace + wl2 + 1) = '\0'; +        u8buffer = scw; +        u8buffer.push_back('.'); -        cat_result(result, pSMgr->suggest_morph(wspace)); +        cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));        }        break;      }    } -  if (*result) { +  if (!result.empty()) {      // word reversing wrapper for complex prefixes      if (complexprefixes) {        if (utf8) @@ -1764,95 +1470,94 @@ int Hunspell::analyze(char*** slst, const char* word) {        else          reverseword(result);      } -    return line_tok(result, slst, MSEP_REC); +    return line_tok(result.c_str(), slst, MSEP_REC);    }    // compound word with dash (HU) I18n -  char* dash = NULL; -  int nresult = 0;    // LANG_hu section: set dash information for suggestions -  if (langnum == LANG_hu) -    dash = (char*)strchr(cw, '-'); -  if ((langnum == LANG_hu) && dash) { -    *dash = '\0'; + +  size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos; +  int nresult = 0; +  if (dash_pos != std::string::npos) { +    std::string part1 = scw.substr(0, dash_pos); +    std::string part2 = scw.substr(dash_pos+1); +      // examine 2 sides of the dash -    if (dash[1] == '\0') {  // base word ending with dash -      if (spell(cw)) { -        char* p = pSMgr->suggest_morph(cw); +    if (part2.empty()) {  // base word ending with dash +      if (spell(part1.c_str())) { +        char* p = pSMgr->suggest_morph(part1.c_str());          if (p) {            int ret = line_tok(p, slst, MSEP_REC);            free(p);            return ret;          }        } -    } else if ((dash[1] == 'e') && (dash[2] == '\0')) {  // XXX (HU) -e hat. -      if (spell(cw) && (spell("-e"))) { -        st = pSMgr->suggest_morph(cw); +    } else if (part2.size() == 1 && part2[0] == 'e') {  // XXX (HU) -e hat. +      if (spell(part1.c_str()) && (spell("-e"))) { +        char* st = pSMgr->suggest_morph(part1.c_str());          if (st) { -          mystrcat(result, st, MAXLNLEN); +          result.append(st);            free(st);          } -        mystrcat(result, "+", MAXLNLEN);  // XXX spec. separator in MORPHCODE +        result.push_back('+');  // XXX spec. separator in MORPHCODE          st = pSMgr->suggest_morph("-e");          if (st) { -          mystrcat(result, st, MAXLNLEN); +          result.append(st);            free(st);          } -        return line_tok(result, slst, MSEP_REC); +        return line_tok(result.c_str(), slst, MSEP_REC);        }      } else {        // first word ending with dash: word- XXX ??? -      char r2 = *(dash + 1); -      dash[0] = '-'; -      dash[1] = '\0'; -      nresult = spell(cw); -      dash[1] = r2; -      dash[0] = '\0'; -      if (nresult && spell(dash + 1) && -          ((strlen(dash + 1) > 1) || ((dash[1] > '0') && (dash[1] < '9')))) { -        st = pSMgr->suggest_morph(cw); +      part1.push_back(' '); +      nresult = spell(part1.c_str()); +      part1.erase(part1.size() - 1); +      if (nresult && spell(part2.c_str()) && +          ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) { +        char* st = pSMgr->suggest_morph(part1.c_str());          if (st) { -          mystrcat(result, st, MAXLNLEN); +          result.append(st);            free(st); -          mystrcat(result, "+", MAXLNLEN);  // XXX spec. separator in MORPHCODE +          result.push_back('+');  // XXX spec. separator in MORPHCODE          } -        st = pSMgr->suggest_morph(dash + 1); +        st = pSMgr->suggest_morph(part2.c_str());          if (st) { -          mystrcat(result, st, MAXLNLEN); +          result.append(st);            free(st);          } -        return line_tok(result, slst, MSEP_REC); +        return line_tok(result.c_str(), slst, MSEP_REC);        }      }      // affixed number in correct word -    if (nresult && (dash > cw) && -        (((*(dash - 1) <= '9') && (*(dash - 1) >= '0')) || -         (*(dash - 1) == '.'))) { -      *dash = '-'; +    if (nresult && (dash_pos > 0) && +        (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) || +         (scw[dash_pos - 1] == '.'))) {        n = 1; -      if (*(dash - n) == '.') +      if (scw[dash_pos - n] == '.')          n++;        // search first not a number character to left from dash -      while (((dash - n) >= cw) && ((*(dash - n) == '0') || (n < 3)) && +      while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&               (n < 6)) {          n++;        } -      if ((dash - n) < cw) +      if (dash_pos < n)          n--;        // numbers: valami1000000-hoz        // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,        // 56-hoz, 6-hoz        for (; n >= 1; n--) { -        if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && -            checkword(dash - n, NULL, NULL)) { -          mystrcat(result, cw, MAXLNLEN); -          result[dash - cw - n] = '\0'; -          st = pSMgr->suggest_morph(dash - n); +        if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') { +            continue; +        } +        std::string chunk = scw.substr(dash_pos - n); +        if (checkword(chunk.c_str(), NULL, NULL)) { +          result.append(chunk); +          char* st = pSMgr->suggest_morph(chunk.c_str());            if (st) { -            mystrcat(result, st, MAXLNLEN); +            result.append(st);              free(st);            } -          return line_tok(result, slst, MSEP_REC); +          return line_tok(result.c_str(), slst, MSEP_REC);          }        }      } @@ -1866,30 +1571,33 @@ int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {      return 0;    char** pl2;    int pl2n = analyze(&pl2, word); -  int captype = 0; +  int captype = NOCAP;    int abbv = 0; -  char cw[MAXWORDUTF8LEN]; +  std::string cw;    cleanword(cw, word, &captype, &abbv); -  char result[MAXLNLEN]; -  *result = '\0'; +  std::string result;    for (int i = 0; i < pln; i++) {      cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));    }    freelist(&pl2, pl2n); -  if (*result) { +  if (!result.empty()) {      // allcap      if (captype == ALLCAP)        mkallcap(result);      // line split -    int linenum = line_tok(result, slst, MSEP_REC); +    int linenum = line_tok(result.c_str(), slst, MSEP_REC);      // capitalize      if (captype == INITCAP || captype == HUHINITCAP) { -      for (int j = 0; j < linenum; j++) -        mkinitcap((*slst)[j]); +      for (int j = 0; j < linenum; j++) { +        std::string form((*slst)[j]); +        free((*slst)[j]); +        mkinitcap(form); +        (*slst)[j] = mystrdup(form.c_str()); +      }      }      // temporary filtering of prefix related errors (eg. @@ -1923,22 +1631,21 @@ int Hunspell::generate(char*** slst, const char* word, const char* pattern) {  }  // minimal XML parser functions -int Hunspell::get_xml_par(char* dest, const char* par, int max) { -  char* d = dest; +std::string Hunspell::get_xml_par(const char* par) { +  std::string dest;    if (!par) -    return 0; +    return dest;    char end = *par; -  char* dmax = dest + max;    if (end == '>')      end = '<';    else if (end != '\'' && end != '"')      return 0;  // bad XML -  for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) -    *d = *par; -  *d = '\0'; +  for (par++; *par != '\0' && *par != end; ++par) { +    dest.push_back(*par); +  }    mystrrep(dest, "<", "<");    mystrrep(dest, "&", "&"); -  return (int)(d - dest); +  return dest;  }  int Hunspell::get_langnum() const { @@ -1967,18 +1674,17 @@ const char* Hunspell::get_xml_pos(const char* s, const char* attr) {  int Hunspell::check_xml_par(const char* q,                              const char* attr,                              const char* value) { -  char cw[MAXWORDUTF8LEN]; -  if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) && -      strcmp(cw, value) == 0) +  std::string cw = get_xml_par(get_xml_pos(q, attr)); +  if (cw == value)      return 1;    return 0;  } -int Hunspell::get_xml_list(char*** slst, char* list, const char* tag) { -  int n = 0; -  char* p; +int Hunspell::get_xml_list(char*** slst, const char* list, const char* tag) {    if (!list)      return 0; +  int n = 0; +  const char* p;    for (p = list; ((p = strstr(p, tag)) != NULL); p++)      n++;    if (n == 0) @@ -1987,25 +1693,20 @@ int Hunspell::get_xml_list(char*** slst, char* list, const char* tag) {    if (!*slst)      return 0;    for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) { -    int l = strlen(p); -    (*slst)[n] = (char*)malloc(l + 1); -    if (!(*slst)[n]) -      return n; -    if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) { -      free((*slst)[n]); +    std::string cw = get_xml_par(p + strlen(tag) - 1); +    if (cw.empty()) {        break;      } +    (*slst)[n] = mystrdup(cw.c_str());    }    return n;  }  int Hunspell::spellml(char*** slst, const char* word) { -  char *q, *q2; -  char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN]; -  q = (char*)strstr(word, "<query"); +  const char* q = strstr(word, "<query");    if (!q)      return 0;  // bad XML input -  q2 = strchr(q, '>'); +  const char* q2 = strchr(q, '>');    if (!q2)      return 0;  // bad XML input    q2 = strstr(q2, "<word"); @@ -2013,8 +1714,9 @@ int Hunspell::spellml(char*** slst, const char* word) {      return 0;  // bad XML input    if (check_xml_par(q, "type=", "analyze")) {      int n = 0; -    if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) -      n = analyze(slst, cw); +    std::string cw = get_xml_par(strchr(q2, '>')); +    if (!cw.empty()) +      n = analyze(slst, cw.c_str());      if (n == 0)        return 0;      // convert the result to <code><a>ana1</a><a>ana2</a></code> format @@ -2036,22 +1738,25 @@ int Hunspell::spellml(char*** slst, const char* word) {      (*slst)[0] = mystrdup(r.c_str());      return 1;    } else if (check_xml_par(q, "type=", "stem")) { -    if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) -      return stem(slst, cw); +    std::string cw = get_xml_par(strchr(q2, '>')); +    if (!cw.empty()) +      return stem(slst, cw.c_str());    } else if (check_xml_par(q, "type=", "generate")) { -    int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1); -    if (n == 0) +    std::string cw = get_xml_par(strchr(q2, '>')); +    if (cw.empty())        return 0; -    char* q3 = strstr(q2 + 1, "<word"); +    const char* q3 = strstr(q2 + 1, "<word");      if (q3) { -      if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) { -        return generate(slst, cw, cw2); +      std::string cw2 = get_xml_par(strchr(q3, '>')); +      if (!cw2.empty()) { +        return generate(slst, cw.c_str(), cw2.c_str());        }      } else {        if ((q2 = strstr(q2 + 1, "<code")) != NULL) {          char** slst2; -        if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) { -          int n2 = generate(slst, cw, slst2, n); +        int n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"); +        if (n != 0) { +          int n2 = generate(slst, cw.c_str(), slst2, n);            freelist(&slst2, n);            return uniqlist(*slst, n2);          } @@ -2062,182 +1767,6 @@ int Hunspell::spellml(char*** slst, const char* word) {    return 0;  } -#ifdef HUNSPELL_EXPERIMENTAL -// XXX is UTF-8 support OK? -char* Hunspell::morph_with_correction(const char* word) { -  char cw[MAXWORDUTF8LEN]; -  char wspace[MAXWORDUTF8LEN]; -  if (!pSMgr || maxdic == 0) -    return NULL; -  w_char unicw[MAXWORDLEN]; -  int nc = strlen(word); -  if (utf8) { -    if (nc >= MAXWORDUTF8LEN) -      return NULL; -  } else { -    if (nc >= MAXWORDLEN) -      return NULL; -  } -  int captype = 0; -  int abbv = 0; -  int wl = 0; - -  // input conversion -  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; -  int convstatus = rl ? rl->conv(word, wspace) : 0; -  if (convstatus < 0) -    return 0; -  else if (convstatus > 0) -    wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); -  else -    wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - -  if (wl == 0) -    return NULL; - -  char result[MAXLNLEN]; -  char* st = NULL; - -  *result = '\0'; - -  switch (captype) { -    case NOCAP: { -      st = pSMgr->suggest_morph_for_spelling_error(cw); -      if (st) { -        mystrcat(result, st, MAXLNLEN); -        free(st); -      } -      if (abbv) { -        memcpy(wspace, cw, wl); -        *(wspace + wl) = '.'; -        *(wspace + wl + 1) = '\0'; -        st = pSMgr->suggest_morph_for_spelling_error(wspace); -        if (st) { -          if (*result) -            mystrcat(result, "\n", MAXLNLEN); -          mystrcat(result, st, MAXLNLEN); -          free(st); -        } -      } -      break; -    } -    case INITCAP: { -      memcpy(wspace, cw, (wl + 1)); -      mkallsmall2(wspace, unicw, nc); -      st = pSMgr->suggest_morph_for_spelling_error(wspace); -      if (st) { -        mystrcat(result, st, MAXLNLEN); -        free(st); -      } -      st = pSMgr->suggest_morph_for_spelling_error(cw); -      if (st) { -        if (*result) -          mystrcat(result, "\n", MAXLNLEN); -        mystrcat(result, st, MAXLNLEN); -        free(st); -      } -      if (abbv) { -        memcpy(wspace, cw, wl); -        *(wspace + wl) = '.'; -        *(wspace + wl + 1) = '\0'; -        mkallsmall2(wspace, unicw, nc); -        st = pSMgr->suggest_morph_for_spelling_error(wspace); -        if (st) { -          if (*result) -            mystrcat(result, "\n", MAXLNLEN); -          mystrcat(result, st, MAXLNLEN); -          free(st); -        } -        mkinitcap(wspace); -        st = pSMgr->suggest_morph_for_spelling_error(wspace); -        if (st) { -          if (*result) -            mystrcat(result, "\n", MAXLNLEN); -          mystrcat(result, st, MAXLNLEN); -          free(st); -        } -      } -      break; -    } -    case HUHCAP: { -      st = pSMgr->suggest_morph_for_spelling_error(cw); -      if (st) { -        mystrcat(result, st, MAXLNLEN); -        free(st); -      } -      memcpy(wspace, cw, (wl + 1)); -      mkallsmall2(wspace, unicw, nc); -      st = pSMgr->suggest_morph_for_spelling_error(wspace); -      if (st) { -        if (*result) -          mystrcat(result, "\n", MAXLNLEN); -        mystrcat(result, st, MAXLNLEN); -        free(st); -      } -      break; -    } -    case ALLCAP: { -      memcpy(wspace, cw, (wl + 1)); -      st = pSMgr->suggest_morph_for_spelling_error(wspace); -      if (st) { -        mystrcat(result, st, MAXLNLEN); -        free(st); -      } -      mkallsmall2(wspace, unicw, nc); -      st = pSMgr->suggest_morph_for_spelling_error(wspace); -      if (st) { -        if (*result) -          mystrcat(result, "\n", MAXLNLEN); -        mystrcat(result, st, MAXLNLEN); -        free(st); -      } -      mkinitcap(wspace); -      st = pSMgr->suggest_morph_for_spelling_error(wspace); -      if (st) { -        if (*result) -          mystrcat(result, "\n", MAXLNLEN); -        mystrcat(result, st, MAXLNLEN); -        free(st); -      } -      if (abbv) { -        memcpy(wspace, cw, (wl + 1)); -        *(wspace + wl) = '.'; -        *(wspace + wl + 1) = '\0'; -        if (*result) -          mystrcat(result, "\n", MAXLNLEN); -        st = pSMgr->suggest_morph_for_spelling_error(wspace); -        if (st) { -          mystrcat(result, st, MAXLNLEN); -          free(st); -        } -        mkallsmall2(wspace, unicw, nc); -        st = pSMgr->suggest_morph_for_spelling_error(wspace); -        if (st) { -          if (*result) -            mystrcat(result, "\n", MAXLNLEN); -          mystrcat(result, st, MAXLNLEN); -          free(st); -        } -        mkinitcap(wspace); -        st = pSMgr->suggest_morph_for_spelling_error(wspace); -        if (st) { -          if (*result) -            mystrcat(result, "\n", MAXLNLEN); -          mystrcat(result, st, MAXLNLEN); -          free(st); -        } -      } -      break; -    } -  } - -  if (*result) -    return mystrdup(result); -  return NULL; -} - -#endif  // END OF HUNSPELL_EXPERIMENTAL CODE -  Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {    return (Hunhandle*)(new Hunspell(affpath, dpath));  } @@ -2333,10 +1862,9 @@ int Hunspell::suffix_suggest(char*** slst, const char* root_word) {    if (ignoredchars != NULL) {      w2.assign(root_word);      if (utf8) { -      int ignoredchars_utf16_len; -      unsigned short* ignoredchars_utf16 = -          pAMgr->get_ignore_utf16(&ignoredchars_utf16_len); -      remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len); +      const std::vector<w_char>& ignoredchars_utf16 = +          pAMgr->get_ignore_utf16(); +      remove_ignored_chars_utf(w2, ignoredchars_utf16);      } else {        remove_ignored_chars(w2, ignoredchars);      } diff --git a/libs/hunspell/src/hunspell.hxx b/libs/hunspell/src/hunspell.hxx index 259d44f86c..3bcf75e39c 100644 --- a/libs/hunspell/src/hunspell.hxx +++ b/libs/hunspell/src/hunspell.hxx @@ -77,6 +77,7 @@  #include "affixmgr.hxx"  #include "suggestmgr.hxx"  #include "langnum.hxx" +#include <vector>  #define SPELL_XML "<?xml?>" @@ -215,7 +216,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {    /* get extra word characters definied in affix file for tokenization */    const char* get_wordchars(); -  unsigned short* get_wordchars_utf16(int* len); +  const std::vector<w_char>& get_wordchars_utf16();    struct cs_info* get_csconv();    const char* get_version(); @@ -229,45 +230,32 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {    {  	  return pAMgr->get_try_string();    } -/* experimental and deprecated functions */ - -#ifdef HUNSPELL_EXPERIMENTAL -  /* suffix is an affix flag string, similarly in dictionary files */ -  int put_word_suffix(const char* word, const char* suffix); -  char* morph_with_correction(const char* word); - -  /* spec. suggestions */ -  int suggest_auto(char*** slst, const char* word); -  int suggest_pos_stems(char*** slst, const char* word); -#endif   private: -  int cleanword(char*, const char*, int* pcaptype, int* pabbrev); -  int cleanword2(char*, -                 const char*, -                 w_char*, -                 int* w_len, -                 int* pcaptype, -                 int* pabbrev); -  void mkinitcap(char*); -  int mkinitcap2(char* p, w_char* u, int nc); -  int mkinitsmall2(char* p, w_char* u, int nc); -  void mkallcap(char*); -  int mkallcap2(char* p, w_char* u, int nc); -  void mkallsmall(char*); -  int mkallsmall2(char* p, w_char* u, int nc); +  void cleanword(std::string& dest, const char*, int* pcaptype, int* pabbrev); +  size_t cleanword2(std::string& dest, +                    std::vector<w_char>& dest_u, +                    const char*, +                    int* w_len, +                    int* pcaptype, +                    size_t* pabbrev); +  void mkinitcap(std::string& u8); +  int mkinitcap2(std::string& u8, std::vector<w_char>& u16); +  int mkinitsmall2(std::string& u8, std::vector<w_char>& u16); +  void mkallcap(std::string& u8); +  int mkallsmall2(std::string& u8, std::vector<w_char>& u16);    struct hentry* checkword(const char*, int* info, char** root); -  char* sharps_u8_l1(char* dest, char* source); +  std::string sharps_u8_l1(const std::string& source);    hentry* -  spellsharps(char* base, char*, int, int, char* tmp, int* info, char** root); +  spellsharps(std::string& base, size_t start_pos, int, int, int* info, char** root);    int is_keepcase(const hentry* rv); -  int insert_sug(char*** slst, char* word, int ns); -  void cat_result(char* result, char* st); +  int insert_sug(char*** slst, const char* word, int ns); +  void cat_result(std::string& result, char* st);    char* stem_description(const char* desc);    int spellml(char*** slst, const char* word); -  int get_xml_par(char* dest, const char* par, int maxl); +  std::string get_xml_par(const char* par);    const char* get_xml_pos(const char* s, const char* attr); -  int get_xml_list(char*** slst, char* list, const char* tag); +  int get_xml_list(char*** slst, const char* list, const char* tag);    int check_xml_par(const char* q, const char* attr, const char* value);  }; diff --git a/libs/hunspell/src/phonet.c++ b/libs/hunspell/src/phonet.c++ index 2b4d2ae504..17350e74a7 100644 --- a/libs/hunspell/src/phonet.c++ +++ b/libs/hunspell/src/phonet.c++ @@ -66,33 +66,30 @@ static int myisalpha(char ch) {    return 1;  } +/*  Do phonetic transformation.                        */  /*  phonetic transcription algorithm                   */  /*  see: http://aspell.net/man-html/Phonetic-Code.html */  /*  convert string to uppercase before this call       */ -int phonet(const char* inword, char* target, int len, phonetable& parms) { -  /**       Do phonetic transformation.       **/ -  /**  "len" = length of "inword" incl. '\0'. **/ +std::string phonet(const std::string& inword, phonetable& parms) { -  /**  result:  >= 0:  length of "target"    **/ -  /**            otherwise:  error            **/ - -  int i, j, k = 0, n, p, z; +  int i, k = 0, p, z;    int k0, n0, p0 = -333, z0; -  char c, c0; +  char c;    const char* s;    typedef unsigned char uchar; -  char word[MAXPHONETUTF8LEN + 1]; -  if (len == -1) -    len = strlen(inword); + +  size_t len = inword.size();    if (len > MAXPHONETUTF8LEN) -    return 0; -  strncpy(word, inword, MAXPHONETUTF8LEN); +    return std::string(); +  char word[MAXPHONETUTF8LEN + 1]; +  strncpy(word, inword.c_str(), MAXPHONETUTF8LEN);    word[MAXPHONETUTF8LEN] = '\0'; +  std::string target;    /**  check word  **/ -  i = j = z = 0; +  i = z = 0;    while ((c = word[i]) != '\0') { -    n = parms.hash[(uchar)c]; +    int n = parms.hash[(uchar)c];      z0 = 0;      if (n >= 0) { @@ -141,7 +138,7 @@ int phonet(const char* inword, char* target, int len, phonetable& parms) {               (!myisalpha(word[i + k0])))) {            /**  search for followup rules, if:     **/            /**  parms.followup and k > 1  and  NO '-' in searchstring **/ -          c0 = word[i + k - 1]; +          char c0 = word[i + k - 1];            n0 = parms.hash[(uchar)c0];            //            if (parms.followup  &&  k > 1  &&  n0 >= 0 @@ -216,9 +213,9 @@ int phonet(const char* inword, char* target, int len, phonetable& parms) {                     : 0;            if (p0 == 1 && z == 0) {              /**  rule with '<' is used  **/ -            if (j > 0 && *s != '\0' && -                (target[j - 1] == c || target[j - 1] == *s)) { -              j--; +            if (!target.empty() && *s != '\0' && +                (target[target.size()-1] == c || target[target.size()-1] == *s)) { +              target.erase(target.size() - 1);              }              z0 = 1;              z = 1; @@ -236,10 +233,9 @@ int phonet(const char* inword, char* target, int len, phonetable& parms) {            } else { /** no '<' rule used **/              i += k - 1;              z = 0; -            while (*s != '\0' && *(s + 1) != '\0' && j < len) { -              if (j == 0 || target[j - 1] != *s) { -                target[j] = *s; -                j++; +            while (*s != '\0' && *(s + 1) != '\0' && target.size() < len) { +              if (target.empty() || target[target.size()-1] != *s) { +                target.push_back(*s);                }                s++;              } @@ -248,8 +244,7 @@ int phonet(const char* inword, char* target, int len, phonetable& parms) {              if (parms.rules[n][0] != '\0' &&                  strstr(parms.rules[n] + 1, "^^") != NULL) {                if (c != '\0') { -                target[j] = c; -                j++; +                target.push_back(c);                }                strmove(&word[0], &word[0] + i + 1);                i = 0; @@ -262,15 +257,11 @@ int phonet(const char* inword, char* target, int len, phonetable& parms) {        } /**  end of while (parms.rules[n][0] == c)  **/      }   /**  end of if (n >= 0)  **/      if (z0 == 0) { -      //        if (k && (assert(p0!=-333),!p0) &&  j < len &&  c != '\0' -      //           && (!parms.collapse_result  ||  j == 0  ||  target[j-1] != -      //           c)){ -      if (k && !p0 && j < len && c != '\0' && -          (1 || j == 0 || target[j - 1] != c)) { +      if (k && !p0 && target.size() < len && c != '\0' && +          (1 || target.empty() || target[target.size()-1] != c)) {          /**  condense only double letters  **/ -        target[j] = c; +        target.push_back(c);          /// printf("\n setting \n"); -        j++;        }        i++; @@ -279,7 +270,5 @@ int phonet(const char* inword, char* target, int len, phonetable& parms) {      }    } /**  end of   while ((c = word[i]) != '\0')  **/ -  target[j] = '\0'; -  return (j); - +  return target;  } /**  end of function "phonet"  **/ diff --git a/libs/hunspell/src/phonet.hxx b/libs/hunspell/src/phonet.hxx index cb0dbed3fb..eb9fd0c628 100644 --- a/libs/hunspell/src/phonet.hxx +++ b/libs/hunspell/src/phonet.hxx @@ -46,9 +46,7 @@ struct phonetable {  LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable& parms); -LIBHUNSPELL_DLL_EXPORTED int phonet(const char* inword, -                                    char* target, -                                    int len, -                                    phonetable& phone); +LIBHUNSPELL_DLL_EXPORTED std::string phonet(const std::string& inword, +                                            phonetable& phone);  #endif diff --git a/libs/hunspell/src/replist.c++ b/libs/hunspell/src/replist.c++ index ace6c4aaf8..b3e6b37d20 100644 --- a/libs/hunspell/src/replist.c++ +++ b/libs/hunspell/src/replist.c++ @@ -151,7 +151,7 @@ int RepList::add(char* pat1, char* pat2) {  }  int RepList::conv(const char* word, char* dest, size_t destsize) { -  int stl = 0; +  size_t stl = 0;    int change = 0;    for (size_t i = 0; i < strlen(word); i++) {      int n = near(word + i); @@ -173,3 +173,21 @@ int RepList::conv(const char* word, char* dest, size_t destsize) {    dest[stl] = '\0';    return change;  } + +bool RepList::conv(const char* word, std::string& dest) { +  dest.clear(); + +  bool change = false; +  for (size_t i = 0; i < strlen(word); i++) { +    int n = near(word + i); +    int l = match(word + i, n); +    if (l) { +      dest.append(dat[n]->pattern2); +      i += l - 1; +      change = true; +    } else { +      dest.push_back(word[i]); +    } +  } +  return change; +} diff --git a/libs/hunspell/src/replist.hxx b/libs/hunspell/src/replist.hxx index 319eb03fb0..59366e9e02 100644 --- a/libs/hunspell/src/replist.hxx +++ b/libs/hunspell/src/replist.hxx @@ -79,6 +79,9 @@  #include "w_char.hxx" +#include <string> +#include <vector> +  class LIBHUNSPELL_DLL_EXPORTED RepList {   private:    RepList(const RepList&); @@ -100,5 +103,6 @@ class LIBHUNSPELL_DLL_EXPORTED RepList {    int near(const char* word);    int match(const char* word, int n);    int conv(const char* word, char* dest, size_t destsize); +  bool conv(const char* word, std::string& dest);  };  #endif diff --git a/libs/hunspell/src/suggestmgr.c++ b/libs/hunspell/src/suggestmgr.c++ index 4269a1181a..17becd7582 100644 --- a/libs/hunspell/src/suggestmgr.c++ +++ b/libs/hunspell/src/suggestmgr.c++ @@ -125,11 +125,11 @@ SuggestMgr::SuggestMgr(const char* tryme, int maxn, AffixMgr* aptr) {    if (ckey) {      if (utf8) { -      w_char t[MAXSWL]; -      ckeyl = u8_u16(t, MAXSWL, ckey); +      std::vector<w_char> t; +      ckeyl = u8_u16(t, ckey);        ckey_utf = (w_char*)malloc(ckeyl * sizeof(w_char));        if (ckey_utf) -        memcpy(ckey_utf, t, ckeyl * sizeof(w_char)); +        memcpy(ckey_utf, &t[0], ckeyl * sizeof(w_char));        else          ckeyl = 0;      } else { @@ -142,11 +142,11 @@ SuggestMgr::SuggestMgr(const char* tryme, int maxn, AffixMgr* aptr) {      if (ctry)        ctryl = strlen(ctry);      if (ctry && utf8) { -      w_char t[MAXSWL]; -      ctryl = u8_u16(t, MAXSWL, tryme); +      std::vector<w_char> t; +      ctryl = u8_u16(t, tryme);        ctry_utf = (w_char*)malloc(ctryl * sizeof(w_char));        if (ctry_utf) -        memcpy(ctry_utf, t, ctryl * sizeof(w_char)); +        memcpy(ctry_utf, &t[0], ctryl * sizeof(w_char));        else          ctryl = 0;      } @@ -213,7 +213,7 @@ int SuggestMgr::suggest(char*** slst,                          int* onlycompoundsug) {    int nocompoundtwowords = 0;    char** wlst; -  w_char word_utf[MAXSWL]; +  std::vector<w_char> word_utf;    int wl = 0;    int nsugorig = nsug;    std::string w2; @@ -242,7 +242,7 @@ int SuggestMgr::suggest(char*** slst,    }    if (utf8) { -    wl = u8_u16(word_utf, MAXSWL, word); +    wl = u8_u16(word_utf, word);      if (wl == -1) {        *slst = wlst;        return nsug; @@ -257,7 +257,7 @@ int SuggestMgr::suggest(char*** slst,      // suggestions for an uppercase word (html -> HTML)      if ((nsug < maxSug) && (nsug > -1)) { -      nsug = (utf8) ? capchars_utf(wlst, word_utf, wl, nsug, cpdsuggest) +      nsug = (utf8) ? capchars_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)                      : capchars(wlst, word, nsug, cpdsuggest);      } @@ -280,56 +280,56 @@ int SuggestMgr::suggest(char*** slst,      // did we swap the order of chars by mistake      if ((nsug < maxSug) && (nsug > -1) &&          (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { -      nsug = (utf8) ? swapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) +      nsug = (utf8) ? swapchar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)                      : swapchar(wlst, word, nsug, cpdsuggest);      }      // did we swap the order of non adjacent chars by mistake      if ((nsug < maxSug) && (nsug > -1) &&          (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { -      nsug = (utf8) ? longswapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) +      nsug = (utf8) ? longswapchar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)                      : longswapchar(wlst, word, nsug, cpdsuggest);      }      // did we just hit the wrong key in place of a good char (case and keyboard)      if ((nsug < maxSug) && (nsug > -1) &&          (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { -      nsug = (utf8) ? badcharkey_utf(wlst, word_utf, wl, nsug, cpdsuggest) +      nsug = (utf8) ? badcharkey_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)                      : badcharkey(wlst, word, nsug, cpdsuggest);      }      // did we add a char that should not be there      if ((nsug < maxSug) && (nsug > -1) &&          (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { -      nsug = (utf8) ? extrachar_utf(wlst, word_utf, wl, nsug, cpdsuggest) +      nsug = (utf8) ? extrachar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)                      : extrachar(wlst, word, nsug, cpdsuggest);      }      // did we forgot a char      if ((nsug < maxSug) && (nsug > -1) &&          (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { -      nsug = (utf8) ? forgotchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) +      nsug = (utf8) ? forgotchar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)                      : forgotchar(wlst, word, nsug, cpdsuggest);      }      // did we move a char      if ((nsug < maxSug) && (nsug > -1) &&          (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { -      nsug = (utf8) ? movechar_utf(wlst, word_utf, wl, nsug, cpdsuggest) +      nsug = (utf8) ? movechar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)                      : movechar(wlst, word, nsug, cpdsuggest);      }      // did we just hit the wrong key in place of a good char      if ((nsug < maxSug) && (nsug > -1) &&          (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { -      nsug = (utf8) ? badchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) +      nsug = (utf8) ? badchar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)                      : badchar(wlst, word, nsug, cpdsuggest);      }      // did we double two characters      if ((nsug < maxSug) && (nsug > -1) &&          (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { -      nsug = (utf8) ? doubletwochars_utf(wlst, word_utf, wl, nsug, cpdsuggest) +      nsug = (utf8) ? doubletwochars_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)                      : doubletwochars(wlst, word, nsug, cpdsuggest);      } @@ -357,88 +357,17 @@ int SuggestMgr::suggest(char*** slst,    return nsug;  } -// generate suggestions for a word with typical mistake -//    pass in address of array of char * pointers -#ifdef HUNSPELL_EXPERIMENTAL -int SuggestMgr::suggest_auto(char*** slst, const char* w, int nsug) { -  int nocompoundtwowords = 0; -  char** wlst; -  int oldSug; - -  char w2[MAXWORDUTF8LEN]; -  const char* word = w; - -  // word reversing wrapper for complex prefixes -  if (complexprefixes) { -    strcpy(w2, w); -    if (utf8) -      reverseword_utf(w2); -    else -      reverseword(w2); -    word = w2; -  } - -  if (*slst) { -    wlst = *slst; -  } else { -    wlst = (char**)malloc(maxSug * sizeof(char*)); -    if (wlst == NULL) -      return -1; -  } - -  for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0); -       cpdsuggest++) { -    // limit compound suggestion -    if (cpdsuggest > 0) -      oldSug = nsug; - -    // perhaps we made a typical fault of spelling -    if ((nsug < maxSug) && (nsug > -1)) -      nsug = replchars(wlst, word, nsug, cpdsuggest); - -    // perhaps we made chose the wrong char from a related set -    if ((nsug < maxSug) && (nsug > -1) && -        (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) -      nsug = mapchars(wlst, word, nsug, cpdsuggest); - -    if ((cpdsuggest == 0) && (nsug > 0)) -      nocompoundtwowords = 1; - -    // perhaps we forgot to hit space and two words ran together - -    if ((nsug < maxSug) && (nsug > -1) && -        (!cpdsuggest || (nsug < oldSug + maxcpdsugs)) && -        check_forbidden(word, strlen(word))) { -      nsug = twowords(wlst, word, nsug, cpdsuggest); -    } - -  }  // repeating ``for'' statement compounding support - -  if (nsug < 0) { -    for (int i = 0; i < maxSug; i++) -      if (wlst[i] != NULL) -        free(wlst[i]); -    free(wlst); -    return -1; -  } - -  *slst = wlst; -  return nsug; -} -#endif  // END OF HUNSPELL_EXPERIMENTAL CODE -  // suggestions for an uppercase word (html -> HTML)  int SuggestMgr::capchars_utf(char** wlst,                               const w_char* word,                               int wl,                               int ns,                               int cpdsuggest) { -  char candidate[MAXSWUTF8L]; -  w_char candidate_utf[MAXSWL]; -  memcpy(candidate_utf, word, wl * sizeof(w_char)); -  mkallcap_utf(candidate_utf, wl, langnum); -  u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); -  return testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, +  std::vector<w_char> candidate_utf(word, word + wl); +  mkallcap_utf(candidate_utf, langnum); +  std::string candidate; +  u16_u8(candidate, candidate_utf); +  return testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,                   NULL);  } @@ -449,7 +378,7 @@ int SuggestMgr::capchars(char** wlst,                           int cpdsuggest) {    std::string candidate(word);    mkallcap(candidate, csconv); -  return testsug(wlst, candidate.data(), candidate.size(), ns, cpdsuggest, NULL, +  return testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,                   NULL);  } @@ -458,10 +387,9 @@ int SuggestMgr::mapchars(char** wlst,                           const char* word,                           int ns,                           int cpdsuggest) { -  char candidate[MAXSWUTF8L]; +  std::string candidate;    clock_t timelimit;    int timer; -  candidate[0] = '\0';    int wl = strlen(word);    if (wl < 2 || !pAMgr) @@ -474,14 +402,13 @@ int SuggestMgr::mapchars(char** wlst,    timelimit = clock();    timer = MINTIMER; -  return map_related(word, (char*)&candidate, 0, 0, wlst, cpdsuggest, ns, +  return map_related(word, candidate, 0, wlst, cpdsuggest, ns,                       maptable, nummap, &timer, &timelimit);  }  int SuggestMgr::map_related(const char* word, -                            char* candidate, +                            std::string& candidate,                              int wn, -                            int cn,                              char** wlst,                              int cpdsuggest,                              int ns, @@ -491,17 +418,15 @@ int SuggestMgr::map_related(const char* word,                              clock_t* timelimit) {    if (*(word + wn) == '\0') {      int cwrd = 1; -    *(candidate + cn) = '\0'; -    int wl = strlen(candidate);      for (int m = 0; m < ns; m++) { -      if (strcmp(candidate, wlst[m]) == 0) { +      if (candidate == wlst[m]) {          cwrd = 0;          break;        }      } -    if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { +    if ((cwrd) && checkword(candidate.c_str(), candidate.size(), cpdsuggest, timer, timelimit)) {        if (ns < maxSug) { -        wlst[ns] = mystrdup(candidate); +        wlst[ns] = mystrdup(candidate.c_str());          if (wlst[ns] == NULL)            return -1;          ns++; @@ -515,9 +440,11 @@ int SuggestMgr::map_related(const char* word,        int len = strlen(maptable[j].set[k]);        if (strncmp(maptable[j].set[k], word + wn, len) == 0) {          in_map = 1; +        size_t cn = candidate.size();          for (int l = 0; l < maptable[j].len; l++) { -          strcpy(candidate + cn, maptable[j].set[l]); -          ns = map_related(word, candidate, wn + len, strlen(candidate), wlst, +          candidate.resize(cn); +          candidate.append(maptable[j].set[l]); +          ns = map_related(word, candidate, wn + len, wlst,                             cpdsuggest, ns, maptable, nummap, timer, timelimit);            if (!(*timer))              return ns; @@ -526,8 +453,8 @@ int SuggestMgr::map_related(const char* word,      }    }    if (!in_map) { -    *(candidate + cn) = *(word + wn); -    ns = map_related(word, candidate, wn + 1, cn + 1, wlst, cpdsuggest, ns, +    candidate.push_back(*(word + wn)); +    ns = map_related(word, candidate, wn + 1, wlst, cpdsuggest, ns,                       maptable, nummap, timer, timelimit);    }    return ns; @@ -539,9 +466,7 @@ int SuggestMgr::replchars(char** wlst,                            const char* word,                            int ns,                            int cpdsuggest) { -  char candidate[MAXSWUTF8L]; -  const char* r; -  int lenr, lenp; +  std::string candidate;    int wl = strlen(word);    if (wl < 2 || !pAMgr)      return ns; @@ -550,45 +475,42 @@ int SuggestMgr::replchars(char** wlst,    if (reptable == NULL)      return ns;    for (int i = 0; i < numrep; i++) { -    r = word; -    lenr = strlen(reptable[i].pattern2); -    lenp = strlen(reptable[i].pattern); +    const char* r = word;      // search every occurence of the pattern in the word      while ((r = strstr(r, reptable[i].pattern)) != NULL &&             (!reptable[i].end || strlen(r) == strlen(reptable[i].pattern)) &&             (!reptable[i].start || r == word)) { -      strcpy(candidate, word); -      if (r - word + lenr + strlen(r + lenp) >= MAXSWUTF8L) -        break; -      strcpy(candidate + (r - word), reptable[i].pattern2); -      strcpy(candidate + (r - word) + lenr, r + lenp); -      ns = testsug(wlst, candidate, wl - lenp + lenr, ns, cpdsuggest, NULL, +      candidate.assign(word); +      candidate.resize(r - word); +      candidate.append(reptable[i].pattern2); +      int lenp = strlen(reptable[i].pattern); +      candidate.append(r + lenp); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,                     NULL);        if (ns == -1)          return -1;        // check REP suggestions with space -      char* sp = strchr(candidate, ' '); -      if (sp) { -        char* prev = candidate; -        while (sp) { -          *sp = '\0'; -          if (checkword(prev, strlen(prev), 0, NULL, NULL)) { +      size_t sp = candidate.find(' '); +      if (sp != std::string::npos) { +        size_t prev = 0; +        while (sp != std::string::npos) { +          std::string prev_chunk = candidate.substr(prev, sp - prev); +          if (checkword(prev_chunk.c_str(), prev_chunk.size(), 0, NULL, NULL)) {              int oldns = ns; -            *sp = ' '; -            ns = testsug(wlst, sp + 1, strlen(sp + 1), ns, cpdsuggest, NULL, +            std::string post_chunk = candidate.substr(sp + 1); +            ns = testsug(wlst, post_chunk.c_str(), post_chunk.size(), ns, cpdsuggest, NULL,                           NULL);              if (ns == -1)                return -1;              if (oldns < ns) {                free(wlst[ns - 1]); -              wlst[ns - 1] = mystrdup(candidate); +              wlst[ns - 1] = mystrdup(candidate.c_str());                if (!wlst[ns - 1])                  return -1;              }            } -          *sp = ' ';            prev = sp + 1; -          sp = strchr(prev, ' '); +          sp = candidate.find(' ', prev);          }        }        r++;  // search for the next letter @@ -603,7 +525,6 @@ int SuggestMgr::doubletwochars(char** wlst,                                 const char* word,                                 int ns,                                 int cpdsuggest) { -  char candidate[MAXSWUTF8L];    int state = 0;    int wl = strlen(word);    if (wl < 5 || !pAMgr) @@ -612,9 +533,9 @@ int SuggestMgr::doubletwochars(char** wlst,      if (word[i] == word[i - 2]) {        state++;        if (state == 3) { -        strcpy(candidate, word); -        strcpy(candidate + i - 1, word + i + 1); -        ns = testsug(wlst, candidate, wl - 2, ns, cpdsuggest, NULL, NULL); +        std::string candidate(word, word + i - 1); +        candidate.insert(candidate.end(), word + i + 1, word + wl); +        ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);          if (ns == -1)            return -1;          state = 0; @@ -633,20 +554,18 @@ int SuggestMgr::doubletwochars_utf(char** wlst,                                     int wl,                                     int ns,                                     int cpdsuggest) { -  w_char candidate_utf[MAXSWL]; -  char candidate[MAXSWUTF8L];    int state = 0;    if (wl < 5 || !pAMgr)      return ns;    for (int i = 2; i < wl; i++) { -    if (w_char_eq(word[i], word[i - 2])) { +    if (word[i] == word[i - 2]) {        state++;        if (state == 3) { -        memcpy(candidate_utf, word, (i - 1) * sizeof(w_char)); -        memcpy(candidate_utf + i - 1, word + i + 1, -               (wl - i - 1) * sizeof(w_char)); -        u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl - 2); -        ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, +        std::vector<w_char> candidate_utf(word, word + i - 1); +        candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl); +        std::string candidate; +        u16_u8(candidate, candidate_utf); +        ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,                       NULL);          if (ns == -1)            return -1; @@ -665,19 +584,16 @@ int SuggestMgr::badcharkey(char** wlst,                             const char* word,                             int ns,                             int cpdsuggest) { -  char tmpc; -  char candidate[MAXSWUTF8L]; -  int wl = strlen(word); -  strcpy(candidate, word); +  std::string candidate(word); +    // swap out each char one by one and try uppercase and neighbor    // keyboard chars in its place to see if that makes a good word - -  for (int i = 0; i < wl; i++) { -    tmpc = candidate[i]; +  for (size_t i = 0; i < candidate.size(); ++i) { +    char tmpc = candidate[i];      // check with uppercase letters      candidate[i] = csconv[((unsigned char)tmpc)].cupper;      if (tmpc != candidate[i]) { -      ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);        if (ns == -1)          return -1;        candidate[i] = tmpc; @@ -689,13 +605,13 @@ int SuggestMgr::badcharkey(char** wlst,      while (loc) {        if ((loc > ckey) && (*(loc - 1) != '|')) {          candidate[i] = *(loc - 1); -        ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); +        ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);          if (ns == -1)            return -1;        }        if ((*(loc + 1) != '|') && (*(loc + 1) != '\0')) {          candidate[i] = *(loc + 1); -        ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); +        ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);          if (ns == -1)            return -1;        } @@ -713,19 +629,17 @@ int SuggestMgr::badcharkey_utf(char** wlst,                                 int wl,                                 int ns,                                 int cpdsuggest) { -  w_char tmpc; -  w_char candidate_utf[MAXSWL]; -  char candidate[MAXSWUTF8L]; -  memcpy(candidate_utf, word, wl * sizeof(w_char)); +  std::string candidate; +  std::vector<w_char> candidate_utf(word, word + wl);    // swap out each char one by one and try all the tryme    // chars in its place to see if that makes a good word    for (int i = 0; i < wl; i++) { -    tmpc = candidate_utf[i]; +    w_char tmpc = candidate_utf[i];      // check with uppercase letters -    mkallcap_utf(candidate_utf + i, 1, langnum); -    if (!w_char_eq(tmpc, candidate_utf[i])) { -      u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); -      ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, +    candidate_utf[i] = upper_utf(candidate_utf[i], 1); +    if (tmpc != candidate_utf[i]) { +      u16_u8(candidate, candidate_utf); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,                     NULL);        if (ns == -1)          return -1; @@ -735,28 +649,28 @@ int SuggestMgr::badcharkey_utf(char** wlst,      if (!ckey)        continue;      w_char* loc = ckey_utf; -    while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc)) +    while ((loc < (ckey_utf + ckeyl)) && *loc != tmpc)        loc++;      while (loc < (ckey_utf + ckeyl)) { -      if ((loc > ckey_utf) && !w_char_eq(*(loc - 1), W_VLINE)) { +      if ((loc > ckey_utf) && *(loc - 1) != W_VLINE) {          candidate_utf[i] = *(loc - 1); -        u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); -        ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, +        u16_u8(candidate, candidate_utf); +        ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,                       NULL);          if (ns == -1)            return -1;        } -      if (((loc + 1) < (ckey_utf + ckeyl)) && !w_char_eq(*(loc + 1), W_VLINE)) { +      if (((loc + 1) < (ckey_utf + ckeyl)) && (*(loc + 1) != W_VLINE)) {          candidate_utf[i] = *(loc + 1); -        u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); -        ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, +        u16_u8(candidate, candidate_utf); +        ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,                       NULL);          if (ns == -1)            return -1;        }        do {          loc++; -      } while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc)); +      } while ((loc < (ckey_utf + ckeyl)) && *loc != tmpc);      }      candidate_utf[i] = tmpc;    } @@ -765,26 +679,23 @@ int SuggestMgr::badcharkey_utf(char** wlst,  // error is wrong char in place of correct one  int SuggestMgr::badchar(char** wlst, const char* word, int ns, int cpdsuggest) { -  char tmpc; -  char candidate[MAXSWUTF8L]; +  std::string candidate(word);    clock_t timelimit = clock();    int timer = MINTIMER; -  int wl = strlen(word); -  strcpy(candidate, word);    // swap out each char one by one and try all the tryme    // chars in its place to see if that makes a good word    for (int j = 0; j < ctryl; j++) { -    for (int i = wl - 1; i >= 0; i--) { -      tmpc = candidate[i]; +    for (std::string::reverse_iterator aI = candidate.rbegin(), aEnd = candidate.rend(); aI != aEnd; ++aI) { +      char tmpc = *aI;        if (ctry[j] == tmpc)          continue; -      candidate[i] = ctry[j]; -      ns = testsug(wlst, candidate, wl, ns, cpdsuggest, &timer, &timelimit); +      *aI = ctry[j]; +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, &timer, &timelimit);        if (ns == -1)          return -1;        if (!timer)          return ns; -      candidate[i] = tmpc; +      *aI = tmpc;      }    }    return ns; @@ -796,22 +707,20 @@ int SuggestMgr::badchar_utf(char** wlst,                              int wl,                              int ns,                              int cpdsuggest) { -  w_char tmpc; -  w_char candidate_utf[MAXSWL]; -  char candidate[MAXSWUTF8L]; +  std::vector<w_char> candidate_utf(word, word + wl); +  std::string candidate;    clock_t timelimit = clock();    int timer = MINTIMER; -  memcpy(candidate_utf, word, wl * sizeof(w_char));    // swap out each char one by one and try all the tryme    // chars in its place to see if that makes a good word    for (int j = 0; j < ctryl; j++) {      for (int i = wl - 1; i >= 0; i--) { -      tmpc = candidate_utf[i]; -      if (w_char_eq(tmpc, ctry_utf[j])) +      w_char tmpc = candidate_utf[i]; +      if (tmpc == ctry_utf[j])          continue;        candidate_utf[i] = ctry_utf[j]; -      u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); -      ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer, +      u16_u8(candidate, candidate_utf); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, &timer,                     &timelimit);        if (ns == -1)          return -1; @@ -829,24 +738,20 @@ int SuggestMgr::extrachar_utf(char** wlst,                                int wl,                                int ns,                                int cpdsuggest) { -  char candidate[MAXSWUTF8L]; -  w_char candidate_utf[MAXSWL]; -  w_char* p; -  w_char tmpc = W_VLINE;  // not used value, only for VCC warning message -  if (wl < 2) +  std::vector<w_char> candidate_utf(word, word + wl); +  if (candidate_utf.size() < 2)      return ns;    // try omitting one char of word at a time -  memcpy(candidate_utf, word, wl * sizeof(w_char)); -  for (p = candidate_utf + wl - 1; p >= candidate_utf; p--) { -    w_char tmpc2 = *p; -    if (p < candidate_utf + wl - 1) -      *p = tmpc; -    u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl - 1); -    ns = -        testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); +  for (size_t i = 0; i < candidate_utf.size(); ++i) { +    size_t index = candidate_utf.size() - 1 - i; +    w_char tmpc = candidate_utf[index]; +    candidate_utf.erase(candidate_utf.begin() + index); +    std::string candidate; +    u16_u8(candidate, candidate_utf); +    ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);      if (ns == -1)        return -1; -    tmpc = tmpc2; +    candidate_utf.insert(candidate_utf.begin() + index, tmpc);    }    return ns;  } @@ -856,21 +761,18 @@ int SuggestMgr::extrachar(char** wlst,                            const char* word,                            int ns,                            int cpdsuggest) { -  char tmpc = '\0'; -  char candidate[MAXSWUTF8L]; -  char* p; -  int wl = strlen(word); -  if (wl < 2) +  std::string candidate(word); +  if (candidate.size() < 2)      return ns;    // try omitting one char of word at a time -  strcpy(candidate, word); -  for (p = candidate + wl - 1; p >= candidate; p--) { -    char tmpc2 = *p; -    *p = tmpc; -    ns = testsug(wlst, candidate, wl - 1, ns, cpdsuggest, NULL, NULL); +  for (size_t i = 0; i < candidate.size(); ++i) { +    size_t index = candidate.size() - 1 - i; +    char tmpc = candidate[index]; +    candidate.erase(candidate.begin() + index); +    ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);      if (ns == -1)        return -1; -    tmpc = tmpc2; +    candidate.insert(candidate.begin() + index, tmpc);    }    return ns;  } @@ -880,23 +782,22 @@ int SuggestMgr::forgotchar(char** wlst,                             const char* word,                             int ns,                             int cpdsuggest) { -  char candidate[MAXSWUTF8L + 4]; -  char* p; +  std::string candidate(word);    clock_t timelimit = clock();    int timer = MINTIMER; -  int wl = strlen(word); +    // try inserting a tryme character before every letter (and the null    // terminator) -  for (int i = 0; i < ctryl; i++) { -    strcpy(candidate, word); -    for (p = candidate + wl; p >= candidate; p--) { -      *(p + 1) = *p; -      *p = ctry[i]; -      ns = testsug(wlst, candidate, wl + 1, ns, cpdsuggest, &timer, &timelimit); +  for (int k = 0; k < ctryl; ++k) { +    for (size_t i = 0; i <= candidate.size(); ++i) { +      size_t index = candidate.size() - i; +      candidate.insert(candidate.begin() + index, ctry[k]); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, &timer, &timelimit);        if (ns == -1)          return -1;        if (!timer)          return ns; +      candidate.erase(candidate.begin() + index);      }    }    return ns; @@ -908,25 +809,25 @@ int SuggestMgr::forgotchar_utf(char** wlst,                                 int wl,                                 int ns,                                 int cpdsuggest) { -  w_char candidate_utf[MAXSWL + 1]; -  char candidate[MAXSWUTF8L + 4]; -  w_char* p; +  std::vector<w_char> candidate_utf(word, word + wl);    clock_t timelimit = clock();    int timer = MINTIMER; +    // try inserting a tryme character at the end of the word and before every    // letter -  for (int i = 0; i < ctryl; i++) { -    memcpy(candidate_utf, word, wl * sizeof(w_char)); -    for (p = candidate_utf + wl; p >= candidate_utf; p--) { -      *(p + 1) = *p; -      *p = ctry_utf[i]; -      u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl + 1); -      ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer, +  for (int k = 0; k < ctryl; ++k) { +    for (size_t i = 0; i <= candidate_utf.size(); ++i) { +      size_t index = candidate_utf.size() - i; +      candidate_utf.insert(candidate_utf.begin() + index, ctry_utf[k]); +      std::string candidate; +      u16_u8(candidate, candidate_utf); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, &timer,                     &timelimit);        if (ns == -1)          return -1;        if (!timer)          return ns; +      candidate_utf.erase(candidate_utf.begin() + index);      }    }    return ns; @@ -937,8 +838,6 @@ int SuggestMgr::twowords(char** wlst,                           const char* word,                           int ns,                           int cpdsuggest) { -  char candidate[MAXSWUTF8L]; -  char* p;    int c1, c2;    int forbidden = 0;    int cwrd; @@ -950,10 +849,12 @@ int SuggestMgr::twowords(char** wlst,    if (langnum == LANG_hu)      forbidden = check_forbidden(word, wl); +  char* candidate = (char*)malloc(wl + 2);    strcpy(candidate + 1, word); +    // split the string into two pieces after every char    // if both pieces are good words make them a suggestion -  for (p = candidate + 1; p[1] != '\0'; p++) { +  for (char* p = candidate + 1; p[1] != '\0'; p++) {      p[-1] = *p;      // go to end of the UTF-8 character      while (utf8 && ((p[1] & 0xc0) == 0x80)) { @@ -988,12 +889,16 @@ int SuggestMgr::twowords(char** wlst,          if (ns < maxSug) {            if (cwrd) {              wlst[ns] = mystrdup(candidate); -            if (wlst[ns] == NULL) +            if (wlst[ns] == NULL) { +              free(candidate);                return -1; +            }              ns++;            } -        } else +        } else { +          free(candidate);            return ns; +        }          // add two word suggestion with dash, if TRY string contains          // "a" or "-"          // NOTE: cwrd doesn't modified for REP twoword sugg. @@ -1009,16 +914,21 @@ int SuggestMgr::twowords(char** wlst,            if (ns < maxSug) {              if (cwrd) {                wlst[ns] = mystrdup(candidate); -              if (wlst[ns] == NULL) +              if (wlst[ns] == NULL) { +                free(candidate);                  return -1; +              }                ns++;              } -          } else +          } else { +            free(candidate);              return ns; +          }          }        }      }    } +  free(candidate);    return ns;  } @@ -1027,42 +937,40 @@ int SuggestMgr::swapchar(char** wlst,                           const char* word,                           int ns,                           int cpdsuggest) { -  char candidate[MAXSWUTF8L]; -  char* p; -  char tmpc; -  int wl = strlen(word); +  std::string candidate(word); +  if (candidate.size() < 2) +    return ns; +    // try swapping adjacent chars one by one -  strcpy(candidate, word); -  for (p = candidate; p[1] != 0; p++) { -    tmpc = *p; -    *p = p[1]; -    p[1] = tmpc; -    ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); +  for (size_t i = 0; i < candidate.size() - 1; ++i) { +    std::swap(candidate[i], candidate[i+1]); +    ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);      if (ns == -1)        return -1; -    p[1] = *p; -    *p = tmpc; +    std::swap(candidate[i], candidate[i+1]);    } +    // try double swaps for short words    // ahev -> have, owudl -> would -  if (wl == 4 || wl == 5) { +  if (candidate.size() == 4 || candidate.size() == 5) {      candidate[0] = word[1];      candidate[1] = word[0];      candidate[2] = word[2]; -    candidate[wl - 2] = word[wl - 1]; -    candidate[wl - 1] = word[wl - 2]; -    ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); +    candidate[candidate.size() - 2] = word[candidate.size() - 1]; +    candidate[candidate.size() - 1] = word[candidate.size() - 2]; +    ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);      if (ns == -1)        return -1; -    if (wl == 5) { +    if (candidate.size() == 5) {        candidate[0] = word[0];        candidate[1] = word[2];        candidate[2] = word[1]; -      ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);        if (ns == -1)          return -1;      }    } +    return ns;  } @@ -1072,44 +980,39 @@ int SuggestMgr::swapchar_utf(char** wlst,                               int wl,                               int ns,                               int cpdsuggest) { -  w_char candidate_utf[MAXSWL]; -  char candidate[MAXSWUTF8L]; -  w_char* p; -  w_char tmpc; -  int len = 0; +  std::vector<w_char> candidate_utf(word, word + wl); +  if (candidate_utf.size() < 2) +    return ns; + +  std::string candidate;    // try swapping adjacent chars one by one -  memcpy(candidate_utf, word, wl * sizeof(w_char)); -  for (p = candidate_utf; p < (candidate_utf + wl - 1); p++) { -    tmpc = *p; -    *p = p[1]; -    p[1] = tmpc; -    u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); -    if (len == 0) -      len = strlen(candidate); -    ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); +  for (size_t i = 0; i < candidate_utf.size() - 1; ++i) { +    std::swap(candidate_utf[i], candidate_utf[i+1]); +    u16_u8(candidate, candidate_utf); +    ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);      if (ns == -1)        return -1; -    p[1] = *p; -    *p = tmpc; +    std::swap(candidate_utf[i], candidate_utf[i+1]);    } +    // try double swaps for short words    // ahev -> have, owudl -> would, suodn -> sound -  if (wl == 4 || wl == 5) { +  if (candidate_utf.size() == 4 || candidate_utf.size() == 5) {      candidate_utf[0] = word[1];      candidate_utf[1] = word[0];      candidate_utf[2] = word[2]; -    candidate_utf[wl - 2] = word[wl - 1]; -    candidate_utf[wl - 1] = word[wl - 2]; -    u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); -    ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); +    candidate_utf[candidate_utf.size() - 2] = word[candidate_utf.size() - 1]; +    candidate_utf[candidate_utf.size() - 1] = word[candidate_utf.size() - 2]; +    u16_u8(candidate, candidate_utf); +    ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);      if (ns == -1)        return -1; -    if (wl == 5) { +    if (candidate_utf.size() == 5) {        candidate_utf[0] = word[0];        candidate_utf[1] = word[2];        candidate_utf[2] = word[1]; -      u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); -      ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); +      u16_u8(candidate, candidate_utf); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);        if (ns == -1)          return -1;      } @@ -1122,24 +1025,16 @@ int SuggestMgr::longswapchar(char** wlst,                               const char* word,                               int ns,                               int cpdsuggest) { -  char candidate[MAXSWUTF8L]; -  char* p; -  char* q; -  char tmpc; -  int wl = strlen(word); +  std::string candidate(word);    // try swapping not adjacent chars one by one -  strcpy(candidate, word); -  for (p = candidate; *p != 0; p++) { -    for (q = candidate; *q != 0; q++) { -      if (abs((int)(p - q)) > 1) { -        tmpc = *p; -        *p = *q; -        *q = tmpc; -        ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); +  for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) { +    for (std::string::iterator q = candidate.begin(); q < candidate.end(); ++q) { +      if (abs(std::distance(q, p)) > 1) { +        std::swap(*p, *q); +        ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);          if (ns == -1)            return -1; -        *q = *p; -        *p = tmpc; +        std::swap(*p, *q);        }      }    } @@ -1152,26 +1047,19 @@ int SuggestMgr::longswapchar_utf(char** wlst,                                   int wl,                                   int ns,                                   int cpdsuggest) { -  w_char candidate_utf[MAXSWL]; -  char candidate[MAXSWUTF8L]; -  w_char* p; -  w_char* q; -  w_char tmpc; +  std::vector<w_char> candidate_utf(word, word + wl);    // try swapping not adjacent chars -  memcpy(candidate_utf, word, wl * sizeof(w_char)); -  for (p = candidate_utf; p < (candidate_utf + wl); p++) { -    for (q = candidate_utf; q < (candidate_utf + wl); q++) { -      if (abs((int)(p - q)) > 1) { -        tmpc = *p; -        *p = *q; -        *q = tmpc; -        u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); -        ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, +  for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { +    for (std::vector<w_char>::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) { +      if (abs(std::distance(q, p)) > 1) { +        std::swap(*p, *q); +        std::string candidate; +        u16_u8(candidate, candidate_utf); +        ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,                       NULL);          if (ns == -1)            return -1; -        *q = *p; -        *p = tmpc; +        std::swap(*p, *q);        }      }    } @@ -1183,40 +1071,35 @@ int SuggestMgr::movechar(char** wlst,                           const char* word,                           int ns,                           int cpdsuggest) { -  char candidate[MAXSWUTF8L]; -  char* p; -  char* q; -  char tmpc; +  std::string candidate(word); +  if (candidate.size() < 2) +    return ns; -  int wl = strlen(word);    // try moving a char -  strcpy(candidate, word); -  for (p = candidate; *p != 0; p++) { -    for (q = p + 1; (*q != 0) && ((q - p) < 10); q++) { -      tmpc = *(q - 1); -      *(q - 1) = *q; -      *q = tmpc; -      if ((q - p) < 2) +  for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) { +    for (std::string::iterator q = p + 1; q < candidate.end() && std::distance(p, q) < 10; ++q) { +      std::swap(*q, *(q - 1)); +      if (std::distance(p, q) < 2)          continue;  // omit swap char -      ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);        if (ns == -1)          return -1;      } -    strcpy(candidate, word); +    std::copy(word, word + candidate.size(), candidate.begin());    } -  for (p = candidate + wl - 1; p > candidate; p--) { -    for (q = p - 1; (q >= candidate) && ((p - q) < 10); q--) { -      tmpc = *(q + 1); -      *(q + 1) = *q; -      *q = tmpc; -      if ((p - q) < 2) + +  for (std::string::reverse_iterator p = candidate.rbegin(), pEnd = candidate.rend() - 1; p != pEnd; ++p) { +    for (std::string::reverse_iterator q = p + 1, qEnd = candidate.rend(); q != qEnd && std::distance(p, q) < 10; ++q) { +      std::swap(*q, *(q - 1)); +      if (std::distance(p, q) < 2)          continue;  // omit swap char -      ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);        if (ns == -1)          return -1;      } -    strcpy(candidate, word); +    std::copy(word, word + candidate.size(), candidate.begin());    } +    return ns;  } @@ -1226,49 +1109,47 @@ int SuggestMgr::movechar_utf(char** wlst,                               int wl,                               int ns,                               int cpdsuggest) { -  w_char candidate_utf[MAXSWL]; -  char candidate[MAXSWUTF8L]; -  w_char* p; -  w_char* q; -  w_char tmpc; +  std::vector<w_char> candidate_utf(word, word + wl); +  if (candidate_utf.size() < 2) +    return ns; +    // try moving a char -  memcpy(candidate_utf, word, wl * sizeof(w_char)); -  for (p = candidate_utf; p < (candidate_utf + wl); p++) { -    for (q = p + 1; (q < (candidate_utf + wl)) && ((q - p) < 10); q++) { -      tmpc = *(q - 1); -      *(q - 1) = *q; -      *q = tmpc; -      if ((q - p) < 2) +  for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { +    for (std::vector<w_char>::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) { +      std::swap(*q, *(q - 1)); +      if (std::distance(p, q) < 2)          continue;  // omit swap char -      u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); -      ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, +      std::string candidate; +      u16_u8(candidate, candidate_utf); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,                     NULL);        if (ns == -1)          return -1;      } -    memcpy(candidate_utf, word, wl * sizeof(w_char)); +    std::copy(word, word + candidate_utf.size(), candidate_utf.begin());    } -  for (p = candidate_utf + wl - 1; p > candidate_utf; p--) { -    for (q = p - 1; (q >= candidate_utf) && ((p - q) < 10); q--) { -      tmpc = *(q + 1); -      *(q + 1) = *q; -      *q = tmpc; -      if ((p - q) < 2) + +  for (std::vector<w_char>::iterator p = candidate_utf.begin() + candidate_utf.size() - 1; p > candidate_utf.begin(); --p) { +    for (std::vector<w_char>::iterator q = p - 1; q >= candidate_utf.begin() && std::distance(q, p) < 10; --q) { +      std::swap(*q, *(q + 1)); +      if (std::distance(q, p) < 2)          continue;  // omit swap char -      u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); -      ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, +      std::string candidate; +      u16_u8(candidate, candidate_utf); +      ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,                     NULL);        if (ns == -1)          return -1;      } -    memcpy(candidate_utf, word, wl * sizeof(w_char)); +    std::copy(word, word + candidate_utf.size(), candidate_utf.begin());    } +    return ns;  }  // generate a set of suggestions for very poorly spelled words  int SuggestMgr::ngsuggest(char** wlst, -                          char* w, +                          const char* w,                            int ns,                            HashMgr** pHMgr,                            int md) { @@ -1295,7 +1176,6 @@ int SuggestMgr::ngsuggest(char** wlst,    int low = NGRAM_LOWERING;    std::string w2; -  char f[MAXSWUTF8L];    const char* word = w;    // word reversing wrapper for complex prefixes @@ -1308,10 +1188,9 @@ int SuggestMgr::ngsuggest(char** wlst,      word = w2.c_str();    } -  char mw[MAXSWUTF8L]; -  w_char u8[MAXSWL]; +  std::vector<w_char> u8;    int nc = strlen(word); -  int n = (utf8) ? u8_u16(u8, MAXSWL, word) : nc; +  int n = (utf8) ? u8_u16(u8, word) : nc;    // set character based ngram suggestion for words with non-BMP Unicode    // characters @@ -1325,21 +1204,20 @@ int SuggestMgr::ngsuggest(char** wlst,    struct hentry* hp = NULL;    int col = -1;    phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL; -  char target[MAXSWUTF8L]; +  std::string target;    std::string candidate;    if (ph) {      if (utf8) {        std::vector<w_char> _w; -      int _wl = u8_u16(_w, word); -      mkallcap_utf(_w, _wl, langnum); +      u8_u16(_w, word); +      mkallcap_utf(_w, langnum);        u16_u8(candidate, _w);      } else {        candidate.assign(word);        if (!nonbmp)          mkallcap(candidate, csconv);      } -    phonet(candidate.c_str(), target, nc, -           *ph);  // XXX phonet() is 8-bit (nc, not n) +    target = phonet(candidate, *ph);  // XXX phonet() is 8-bit (nc, not n)    }    FLAG forbiddenword = pAMgr ? pAMgr->get_forbiddenword() : FLAG_NULL; @@ -1361,27 +1239,27 @@ int SuggestMgr::ngsuggest(char** wlst,             leftcommonsubstring(word, HENTRY_WORD(hp));        // check special pronounciation +      std::string f;        if ((hp->var & H_OPT_PHON) &&            copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) {          int sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) + -                  +leftcommonsubstring(word, f); +                  +leftcommonsubstring(word, f.c_str());          if (sc2 > sc)            sc = sc2;        }        int scphon = -20000;        if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) { -        char target2[MAXSWUTF8L];          if (utf8) {            std::vector<w_char> _w; -          int _wl = u8_u16(_w, HENTRY_WORD(hp)); -          mkallcap_utf(_w, _wl, langnum); +          u8_u16(_w, HENTRY_WORD(hp)); +          mkallcap_utf(_w, langnum);            u16_u8(candidate, _w);          } else {            candidate.assign(HENTRY_WORD(hp));            mkallcap(candidate, csconv);          } -        phonet(candidate.c_str(), target2, -1, *ph); +        std::string target2 = phonet(candidate, *ph);          scphon = 2 * ngram(3, target, target2, NGRAM_LONGER_WORSE);        } @@ -1415,14 +1293,17 @@ int SuggestMgr::ngsuggest(char** wlst,    int thresh = 0;    for (int sp = 1; sp < 4; sp++) {      if (utf8) { -      for (int k = sp; k < n; k += 4) -        *((unsigned short*)u8 + k) = '*'; -      u16_u8(mw, MAXSWUTF8L, u8, n); +      for (int k = sp; k < n; k += 4) { +        u8[k].l = '*'; +        u8[k].h = 0; +      } +      std::string mw; +      u16_u8(mw, u8);        thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);      } else { -      strcpy(mw, word); +      std::string mw(word);        for (int k = sp; k < n; k += 4) -        *(mw + k) = '*'; +        mw[k] = '*';        thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);      }    } @@ -1454,11 +1335,14 @@ int SuggestMgr::ngsuggest(char** wlst,    for (i = 0; i < MAX_ROOTS; i++) {      if (roots[i]) {        struct hentry* rp = roots[i]; + +      std::string f; +      const char *field = NULL; +      if ((rp->var & H_OPT_PHON) && copy_field(f, HENTRY_DATA(rp), MORPH_PHON)) +          field = f.c_str();        int nw = pAMgr->expand_rootword(            glst, MAX_WORDS, HENTRY_WORD(rp), rp->blen, rp->astr, rp->alen, word, -          nc, -          ((rp->var & H_OPT_PHON) ? copy_field(f, HENTRY_DATA(rp), MORPH_PHON) -                                  : NULL)); +          nc, field);        for (int k = 0; k < nw; k++) {          sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH + low) + @@ -1524,7 +1408,7 @@ int SuggestMgr::ngsuggest(char** wlst,        if (utf8) {          std::vector<w_char> _w;          len = u8_u16(_w, guess[i]); -        mkallsmall_utf(_w, len, langnum); +        mkallsmall_utf(_w, langnum);          u16_u8(gl, _w);        } else {          gl.assign(guess[i]); @@ -1578,7 +1462,7 @@ int SuggestMgr::ngsuggest(char** wlst,          if (utf8) {            std::vector<w_char> _w;            len = u8_u16(_w, rootsphon[i]); -          mkallsmall_utf(_w, len, langnum); +          mkallsmall_utf(_w, langnum);            u16_u8(gl, _w);          } else {            gl.assign(rootsphon[i]); @@ -1707,7 +1591,8 @@ int SuggestMgr::checkword(const char* word,    if (pAMgr) {      if (cpdsuggest == 1) {        if (pAMgr->get_compound()) { -        rv = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 1, +        struct hentry* rwords[100];  // buffer for COMPOUND pattern checking +        rv = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 1,                                     0);  // EXT          if (rv &&              (!(rv2 = pAMgr->lookup(word)) || !rv2->astr || @@ -1790,51 +1675,6 @@ int SuggestMgr::check_forbidden(const char* word, int len) {    return 0;  } -#ifdef HUNSPELL_EXPERIMENTAL -// suggest possible stems -int SuggestMgr::suggest_pos_stems(char*** slst, const char* w, int nsug) { -  char** wlst; - -  struct hentry* rv = NULL; - -  char w2[MAXSWUTF8L]; -  const char* word = w; - -  // word reversing wrapper for complex prefixes -  if (complexprefixes) { -    strcpy(w2, w); -    if (utf8) -      reverseword_utf(w2); -    else -      reverseword(w2); -    word = w2; -  } - -  int wl = strlen(word); - -  if (*slst) { -    wlst = *slst; -  } else { -    wlst = (char**)calloc(maxSug, sizeof(char*)); -    if (wlst == NULL) -      return -1; -  } - -  rv = pAMgr->suffix_check(word, wl, 0, NULL, wlst, maxSug, &nsug); - -  // delete dash from end of word -  if (nsug > 0) { -    for (int j = 0; j < nsug; j++) { -      if (wlst[j][strlen(wlst[j]) - 1] == '-') -        wlst[j][strlen(wlst[j]) - 1] = '\0'; -    } -  } - -  *slst = wlst; -  return nsug; -} -#endif  // END OF HUNSPELL_EXPERIMENTAL CODE -  char* SuggestMgr::suggest_morph(const char* w) {    char result[MAXLNLEN];    char* r = (char*)result; @@ -1887,33 +1727,15 @@ char* SuggestMgr::suggest_morph(const char* w) {      free(st);    } -  if (pAMgr->get_compound() && (*result == '\0')) -    pAMgr->compound_check_morph(word, strlen(word), 0, 0, 100, 0, NULL, 0, &r, +  if (pAMgr->get_compound() && (*result == '\0')) { +    struct hentry* rwords[100];  // buffer for COMPOUND pattern checking +    pAMgr->compound_check_morph(word, strlen(word), 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, &r,                                  NULL); +  }    return (*result) ? mystrdup(line_uniq(result, MSEP_REC)) : NULL;  } -#ifdef HUNSPELL_EXPERIMENTAL -char* SuggestMgr::suggest_morph_for_spelling_error(const char* word) { -  char* p = NULL; -  char** wlst = (char**)calloc(maxSug, sizeof(char*)); -  if (!**wlst) -    return NULL; -  // we will use only the first suggestion -  for (int i = 0; i < maxSug - 1; i++) -    wlst[i] = ""; -  int ns = suggest(&wlst, word, maxSug - 1, NULL); -  if (ns == maxSug) { -    p = suggest_morph(wlst[maxSug - 1]); -    free(wlst[maxSug - 1]); -  } -  if (wlst) -    free(wlst); -  return p; -} -#endif  // END OF HUNSPELL_EXPERIMENTAL CODE -  /* affixation */  char* SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) {    char result[MAXLNLEN]; @@ -1973,23 +1795,24 @@ char* SuggestMgr::suggest_gen(char** desc, int n, const char* pattern) {    if (n == 0 || !pAMgr)      return NULL; -  char result[MAXLNLEN]; -  char result2[MAXLNLEN]; +  std::string result2;    std::string newpattern; -  *result2 = '\0';    struct hentry* rv = NULL;    // search affixed forms with and without derivational suffixes    while (1) {      for (int k = 0; k < n; k++) { -      *result = '\0'; +      std::string result; +        // add compound word parts (except the last one)        char* s = (char*)desc[k];        char* part = strstr(s, MORPH_PART);        if (part) {          char* nextpart = strstr(part + 1, MORPH_PART);          while (nextpart) { -          copy_field(result + strlen(result), part, MORPH_PART); +          std::string field; +          copy_field(field, part, MORPH_PART); +          result.append(field);            part = nextpart;            nextpart = strstr(part + 1, MORPH_PART);          } @@ -2030,16 +1853,14 @@ char* SuggestMgr::suggest_gen(char** desc, int n, const char* pattern) {                free(sg);                sg = NULL;                for (int j = 0; j < genl; j++) { +                result2.push_back(MSEP_REC); +                result2.append(result);                  if (strstr(pl[i], MORPH_SURF_PFX)) { -                  int r2l = strlen(result2); -                  result2[r2l] = MSEP_REC; -                  strcpy(result2 + r2l + 1, result); -                  copy_field(result2 + strlen(result2), pl[i], MORPH_SURF_PFX); -                  mystrcat(result2, gen[j], MAXLNLEN); -                } else { -                  sprintf(result2 + strlen(result2), "%c%s%s", MSEP_REC, result, -                          gen[j]); +                  std::string field; +                  copy_field(field, pl[i], MORPH_SURF_PFX); +                  result2.append(field);                  } +                result2.append(gen[j]);                }                freelist(&gen, genl);              } @@ -2050,14 +1871,14 @@ char* SuggestMgr::suggest_gen(char** desc, int n, const char* pattern) {        freelist(&pl, pln);      } -    if (*result2 || !strstr(pattern, MORPH_DERI_SFX)) +    if (!result2.empty() || !strstr(pattern, MORPH_DERI_SFX))        break;      newpattern.assign(pattern);      mystrrep(newpattern, MORPH_DERI_SFX, MORPH_TERM_SFX);      pattern = newpattern.c_str();    } -  return (*result2 ? mystrdup(result2) : NULL); +  return (!result2.empty() ? mystrdup(result2.c_str()) : NULL);  }  // generate an n-gram score comparing s1 and s2 @@ -2080,7 +1901,7 @@ int SuggestMgr::ngram(int n,        return 0;      // lowering dictionary word      if (opt & NGRAM_LOWERING) -      mkallsmall_utf(su2, l2, langnum); +      mkallsmall_utf(su2, langnum);      for (int j = 1; j <= n; j++) {        ns = 0;        for (int i = 0; i <= (l1 - j); i++) { @@ -2147,25 +1968,20 @@ int SuggestMgr::ngram(int n,  // length of the left common substring of s1 and (decapitalised) s2  int SuggestMgr::leftcommonsubstring(const char* s1, const char* s2) {    if (utf8) { -    w_char su1[MAXSWL]; -    w_char su2[MAXSWL]; -    su1[0].l = su2[0].l = su1[0].h = su2[0].h = 0; +    std::vector<w_char> su1; +    std::vector<w_char> su2; +    int l1 = u8_u16(su1, s1); +    int l2 = u8_u16(su2, s2);      // decapitalize dictionary word      if (complexprefixes) { -      int l1 = u8_u16(su1, MAXSWL, s1); -      int l2 = u8_u16(su2, MAXSWL, s2); -      if (*((short*)su1 + l1 - 1) == *((short*)su2 + l2 - 1)) +      if (su1[l1 - 1] == su2[l2 - 1])          return 1;      } else { -      int i; -      u8_u16(su1, 1, s1); -      u8_u16(su2, 1, s2); -      unsigned short idx = (su2->h << 8) + su2->l; -      unsigned short otheridx = (su1->h << 8) + su1->l; +      unsigned short idx = su2.empty() ? 0 : (su2[0].h << 8) + su2[0].l; +      unsigned short otheridx = su1.empty() ? 0 : (su1[0].h << 8) + su1[0].l;        if (otheridx != idx && (otheridx != unicodetolower(idx, langnum)))          return 0; -      int l1 = u8_u16(su1, MAXSWL, s1); -      int l2 = u8_u16(su2, MAXSWL, s2); +      int i;        for (i = 1; (i < l1) && (i < l2) && (su1[i].l == su2[i].l) &&                    (su1[i].h == su2[i].h);             i++) @@ -2176,9 +1992,9 @@ int SuggestMgr::leftcommonsubstring(const char* s1, const char* s2) {      if (complexprefixes) {        int l1 = strlen(s1);        int l2 = strlen(s2); -      if (*(s2 + l1 - 1) == *(s2 + l2 - 1)) +      if (l1 <= l2 && s2[l1 - 1] == s2[l2 - 1])          return 1; -    } else { +    } else if (csconv) {        const char* olds = s1;        // decapitalise dictionary word        if ((*s1 != *s2) && (*s1 != csconv[((unsigned char)*s2)].clower)) @@ -2201,22 +2017,22 @@ int SuggestMgr::commoncharacterpositions(const char* s1,    int diffpos[2];    *is_swap = 0;    if (utf8) { -    w_char su1[MAXSWL]; -    w_char su2[MAXSWL]; -    int l1 = u8_u16(su1, MAXSWL, s1); -    int l2 = u8_u16(su2, MAXSWL, s2); +    std::vector<w_char> su1; +    std::vector<w_char> su2; +    int l1 = u8_u16(su1, s1); +    int l2 = u8_u16(su2, s2);      if (l1 <= 0 || l2 <= 0)        return 0;      // decapitalize dictionary word      if (complexprefixes) { -      mkallsmall_utf(su2 + l2 - 1, 1, langnum); +      su2[l2 - 1] = lower_utf(su2[l2 - 1], langnum);      } else { -      mkallsmall_utf(su2, 1, langnum); +      su2[0] = lower_utf(su2[0], langnum);      }      for (int i = 0; (i < l1) && (i < l2); i++) { -      if (((short*)su1)[i] == ((short*)su2)[i]) { +      if (su1[i] == su2[i]) {          num++;        } else {          if (diff < 2) @@ -2225,8 +2041,8 @@ int SuggestMgr::commoncharacterpositions(const char* s1,        }      }      if ((diff == 2) && (l1 == l2) && -        (((short*)su1)[diffpos[0]] == ((short*)su2)[diffpos[1]]) && -        (((short*)su1)[diffpos[1]] == ((short*)su2)[diffpos[0]])) +        (su1[diffpos[0]] == su2[diffpos[1]]) && +        (su1[diffpos[1]] == su2[diffpos[0]]))        *is_swap = 1;    } else {      size_t i; @@ -2257,8 +2073,8 @@ int SuggestMgr::commoncharacterpositions(const char* s1,  int SuggestMgr::mystrlen(const char* word) {    if (utf8) { -    w_char w[MAXSWL]; -    return u8_u16(w, MAXSWL, word); +    std::vector<w_char> w; +    return u8_u16(w, word);    } else      return strlen(word);  } @@ -2297,15 +2113,15 @@ void SuggestMgr::lcs(const char* s,                       int* l2,                       char** result) {    int n, m; -  w_char su[MAXSWL]; -  w_char su2[MAXSWL]; +  std::vector<w_char> su; +  std::vector<w_char> su2;    char* b;    char* c;    int i;    int j;    if (utf8) { -    m = u8_u16(su, MAXSWL, s); -    n = u8_u16(su2, MAXSWL, s2); +    m = u8_u16(su, s); +    n = u8_u16(su2, s2);    } else {      m = strlen(s);      n = strlen(s2); @@ -2326,8 +2142,8 @@ void SuggestMgr::lcs(const char* s,      c[j] = 0;    for (i = 1; i <= m; i++) {      for (j = 1; j <= n; j++) { -      if (((utf8) && (*((short*)su + i - 1) == *((short*)su2 + j - 1))) || -          ((!utf8) && ((*(s + i - 1)) == (*(s2 + j - 1))))) { +      if (((utf8) && (su[i - 1] == su2[j - 1])) || +          ((!utf8) && (s[i - 1] == s2[j - 1]))) {          c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1;          b[i * (n + 1) + j] = LCS_UPLEFT;        } else if (c[(i - 1) * (n + 1) + j] >= c[i * (n + 1) + j - 1]) { diff --git a/libs/hunspell/src/suggestmgr.hxx b/libs/hunspell/src/suggestmgr.hxx index c8762f81ef..675d98eb8f 100644 --- a/libs/hunspell/src/suggestmgr.hxx +++ b/libs/hunspell/src/suggestmgr.hxx @@ -74,8 +74,6 @@  #ifndef _SUGGESTMGR_HXX_  #define _SUGGESTMGR_HXX_ -#define MAXSWL 100 -#define MAXSWUTF8L (MAXSWL * 4)  #define MAX_ROOTS 100  #define MAX_WORDS 100  #define MAX_GUESS 200 @@ -132,7 +130,7 @@ class LIBHUNSPELL_DLL_EXPORTED SuggestMgr {    ~SuggestMgr();    int suggest(char*** slst, const char* word, int nsug, int* onlycmpdsug); -  int ngsuggest(char** wlst, char* word, int ns, HashMgr** pHMgr, int md); +  int ngsuggest(char** wlst, const char* word, int ns, HashMgr** pHMgr, int md);    int suggest_auto(char*** slst, const char* word, int nsug);    int suggest_stems(char*** slst, const char* word, int nsug);    int suggest_pos_stems(char*** slst, const char* word, int nsug); @@ -177,8 +175,7 @@ class LIBHUNSPELL_DLL_EXPORTED SuggestMgr {    int mapchars(char**, const char*, int, int);    int map_related(const char*, -                  char*, -                  int, +                  std::string&,                    int,                    char** wlst,                    int, diff --git a/libs/hunspell/src/w_char.hxx b/libs/hunspell/src/w_char.hxx index 9de7989f4f..336c454f79 100644 --- a/libs/hunspell/src/w_char.hxx +++ b/libs/hunspell/src/w_char.hxx @@ -42,13 +42,27 @@  #define __WCHARHXX__  #ifndef GCC -typedef struct { +struct w_char {  #else -typedef struct __attribute__((packed)) { +struct __attribute__((packed)) w_char {  #endif    unsigned char l;    unsigned char h; -} w_char; + +  friend bool operator<(const w_char a, const w_char b) { +    unsigned short a_idx = (a.h << 8) + a.l; +    unsigned short b_idx = (b.h << 8) + b.l; +    return a_idx < b_idx; +  } + +  friend bool operator==(const w_char a, const w_char b) { +    return (((a).l == (b).l) && ((a).h == (b).h)); +  } + +  friend bool operator!=(const w_char a, const w_char b) { +    return !(a == b);; +  } +};  // two character arrays  struct replentry { diff --git a/plugins/SpellChecker/src/Version.h b/plugins/SpellChecker/src/Version.h index 237d49359b..cf5c959244 100644 --- a/plugins/SpellChecker/src/Version.h +++ b/plugins/SpellChecker/src/Version.h @@ -1,14 +1,14 @@  #define __MAJOR_VERSION           0
  #define __MINOR_VERSION           2
  #define __RELEASE_NUM             6
 -#define __BUILD_NUM               3
 +#define __BUILD_NUM               4
  #include <stdver.h>
  #define __PLUGIN_NAME             "Spell checker"
  #define __FILENAME                "SpellChecker.dll"
  #define __DESCRIPTION             "Spell checker for the message windows. Uses Hunspell to do the checking."
 -#define __AUTHOR                  "Ricardo Pescuma Domenecci, FREAK_THEMIGHTY"
 +#define __AUTHOR                  "Ricardo Pescuma Domenecci, Wishmaster"
  #define __AUTHOREMAIL             "pescuma@miranda-im.org"
  #define __AUTHORWEB               "http://miranda-ng.org/p/SpellChecker/"
  #define __COPYRIGHT               "© 2006-2010 Ricardo Pescuma Domenecci"
 diff --git a/plugins/SpellChecker/src/dictionary.cpp b/plugins/SpellChecker/src/dictionary.cpp index c726a9b92a..371b934d01 100644 --- a/plugins/SpellChecker/src/dictionary.cpp +++ b/plugins/SpellChecker/src/dictionary.cpp @@ -534,9 +534,8 @@ public:  		if (mir_strcmp(dic_enc, "UTF-8") == 0) {  			codePage = CP_UTF8; - -			int wcs_len; -			hwordchars = fromHunspell((char *)hunspell->get_wordchars_utf16(&wcs_len)); +			const std::vector<w_char> wordchars_utf16 = hunspell->get_wordchars_utf16(); +			hwordchars = fromHunspell((char *)&wordchars_utf16[0]);  		}  		else {  | 
