diff options
author | Tobias Weimer <wishmaster51@googlemail.com> | 2016-05-13 19:32:21 +0000 |
---|---|---|
committer | Tobias Weimer <wishmaster51@googlemail.com> | 2016-05-13 19:32:21 +0000 |
commit | 37c98eaad76b7f1bf86c75fe2c32cf6aa11f7c6f (patch) | |
tree | 32aede144aa0cd0f2dd058b003cdbd534a2f969c /libs/hunspell/src/hunspell.c++ | |
parent | e73bb3845517a31fa795e8d2174fcc8572835b33 (diff) |
SpellChecker: Updated hunspell to 1.4.1
git-svn-id: http://svn.miranda-ng.org/main/trunk@16828 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
Diffstat (limited to 'libs/hunspell/src/hunspell.c++')
-rw-r--r-- | libs/hunspell/src/hunspell.c++ | 1554 |
1 files changed, 541 insertions, 1013 deletions
diff --git a/libs/hunspell/src/hunspell.c++ b/libs/hunspell/src/hunspell.c++ index 726c72931a..f7c1581087 100644 --- a/libs/hunspell/src/hunspell.c++ +++ b/libs/hunspell/src/hunspell.c++ @@ -85,6 +85,9 @@ #include <limits> #include <string> +#define MAXWORDLEN 176 +#define MAXWORDUTF8LEN (MAXWORDLEN * 3) + Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) { encoding = NULL; csconv = NULL; @@ -158,14 +161,16 @@ int Hunspell::add_dic(const char* dpath, const char* key) { // set the capitalization type // return the length of the "cleaned" (and UTF-8 encoded) word -int Hunspell::cleanword2(char* dest, +size_t Hunspell::cleanword2(std::string& dest, + std::vector<w_char>& dest_utf, const char* src, - w_char* dest_utf, int* nc, int* pcaptype, - int* pabbrev) { - unsigned char* p = (unsigned char*)dest; - const unsigned char* q = (const unsigned char*)src; + size_t* pabbrev) { + dest.clear(); + dest_utf.clear(); + + const char* q = src; // first skip over any leading blanks while ((*q != '\0') && (*q == ' ')) @@ -173,7 +178,7 @@ int Hunspell::cleanword2(char* dest, // now strip off any trailing periods (recording their presence) *pabbrev = 0; - int nl = strlen((const char*)q); + int nl = strlen(q); while ((nl > 0) && (*(q + nl - 1) == '.')) { nl--; (*pabbrev)++; @@ -182,35 +187,26 @@ int Hunspell::cleanword2(char* dest, // if no characters are left it can't be capitalized if (nl <= 0) { *pcaptype = NOCAP; - *p = '\0'; return 0; } - strncpy(dest, (char*)q, nl); - *(dest + nl) = '\0'; - nl = strlen(dest); + dest.append(q, nl); + nl = dest.size(); if (utf8) { - *nc = u8_u16(dest_utf, MAXWORDLEN, dest); - // don't check too long words - if (*nc >= MAXWORDLEN) - return 0; - if (*nc == -1) { // big Unicode character (non BMP area) - *pcaptype = NOCAP; - return nl; - } - *pcaptype = get_captype_utf8(dest_utf, *nc, langnum); + *nc = u8_u16(dest_utf, dest); + *pcaptype = get_captype_utf8(dest_utf, langnum); } else { - *pcaptype = get_captype(dest, nl, csconv); + *pcaptype = get_captype(dest, csconv); *nc = nl; } return nl; } -int Hunspell::cleanword(char* dest, +void Hunspell::cleanword(std::string& dest, const char* src, int* pcaptype, int* pabbrev) { - unsigned char* p = (unsigned char*)dest; + dest.clear(); const unsigned char* q = (const unsigned char*)src; int firstcap = 0; @@ -229,8 +225,7 @@ int Hunspell::cleanword(char* dest, // if no characters are left it can't be capitalized if (nl <= 0) { *pcaptype = NOCAP; - *p = '\0'; - return 0; + return; } // now determine the capitalization type of the first nl letters @@ -245,27 +240,25 @@ int Hunspell::cleanword(char* dest, ncap++; if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++; - *p++ = *q++; + dest.push_back(*q++); nl--; } // remember to terminate the destination string - *p = '\0'; - firstcap = csconv[(unsigned char)(*dest)].ccase; + firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase; } else { - unsigned short idx; - w_char t[MAXWORDLEN]; - nc = u8_u16(t, MAXWORDLEN, src); - for (int i = 0; i < nc; i++) { - idx = (t[i].h << 8) + t[i].l; + std::vector<w_char> t; + u8_u16(t, src); + for (size_t i = 0; i < t.size(); ++i) { + unsigned short idx = (t[i].h << 8) + t[i].l; unsigned short low = unicodetolower(idx, langnum); if (idx != low) ncap++; if (unicodetoupper(idx, langnum) == low) nneutral++; } - u16_u8(dest, MAXWORDUTF8LEN, t, nc); + u16_u8(dest, t); if (ncap) { - idx = (t[0].h << 8) + t[0].l; + unsigned short idx = (t[0].h << 8) + t[0].l; firstcap = (idx != unicodetolower(idx, langnum)); } } @@ -282,117 +275,60 @@ int Hunspell::cleanword(char* dest, } else { *pcaptype = HUHCAP; } - return strlen(dest); } -void Hunspell::mkallcap(char* p) { +void Hunspell::mkallcap(std::string& u8) { if (utf8) { - w_char u[MAXWORDLEN]; - int nc = u8_u16(u, MAXWORDLEN, p); - unsigned short idx; - for (int i = 0; i < nc; i++) { - idx = (u[i].h << 8) + u[i].l; - if (idx != unicodetoupper(idx, langnum)) { - u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); - u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); - } - } - u16_u8(p, MAXWORDUTF8LEN, u, nc); + std::vector<w_char> u16; + u8_u16(u16, u8); + ::mkallcap_utf(u16, langnum); + u16_u8(u8, u16); } else { - while (*p != '\0') { - *p = csconv[((unsigned char)*p)].cupper; - p++; - } - } -} - -int Hunspell::mkallcap2(char* p, w_char* u, int nc) { - if (utf8) { - unsigned short idx; - for (int i = 0; i < nc; i++) { - idx = (u[i].h << 8) + u[i].l; - unsigned short up = unicodetoupper(idx, langnum); - if (idx != up) { - u[i].h = (unsigned char)(up >> 8); - u[i].l = (unsigned char)(up & 0x00FF); - } - } - u16_u8(p, MAXWORDUTF8LEN, u, nc); - return strlen(p); - } else { - while (*p != '\0') { - *p = csconv[((unsigned char)*p)].cupper; - p++; - } - } - return nc; -} - -void Hunspell::mkallsmall(char* p) { - while (*p != '\0') { - *p = csconv[((unsigned char)*p)].clower; - p++; + ::mkallcap(u8, csconv); } } -int Hunspell::mkallsmall2(char* p, w_char* u, int nc) { +int Hunspell::mkallsmall2(std::string& u8, std::vector<w_char>& u16) { if (utf8) { - unsigned short idx; - for (int i = 0; i < nc; i++) { - idx = (u[i].h << 8) + u[i].l; - unsigned short low = unicodetolower(idx, langnum); - if (idx != low) { - u[i].h = (unsigned char)(low >> 8); - u[i].l = (unsigned char)(low & 0x00FF); - } - } - u16_u8(p, MAXWORDUTF8LEN, u, nc); - return strlen(p); + ::mkallsmall_utf(u16, langnum); + u16_u8(u8, u16); } else { - while (*p != '\0') { - *p = csconv[((unsigned char)*p)].clower; - p++; - } + ::mkallsmall(u8, csconv); } - return nc; + return u8.size(); } // convert UTF-8 sharp S codes to latin 1 -char* Hunspell::sharps_u8_l1(char* dest, char* source) { - char* p = dest; - *p = *source; - for (p++, source++; *(source - 1); p++, source++) { - *p = *source; - if (*source == '\x9F') - *--p = '\xDF'; - } +std::string Hunspell::sharps_u8_l1(const std::string& source) { + std::string dest(source); + mystrrep(dest, "\xC3\x9F", "\xDF"); return dest; } // recursive search for right ss - sharp s permutations -hentry* Hunspell::spellsharps(char* base, - char* pos, +hentry* Hunspell::spellsharps(std::string& base, + size_t n_pos, int n, int repnum, - char* tmp, int* info, char** root) { - pos = strstr(pos, "ss"); - if (pos && (n < MAXSHARPS)) { - *pos = '\xC3'; - *(pos + 1) = '\x9F'; - hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root); + size_t pos = base.find("ss", n_pos); + if (pos != std::string::npos && (n < MAXSHARPS)) { + base[pos] = '\xC3'; + base[pos + 1] = '\x9F'; + hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root); if (h) return h; - *pos = 's'; - *(pos + 1) = 's'; - h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root); + base[pos] = 's'; + base[pos + 1] = 's'; + h = spellsharps(base, pos + 2, n + 1, repnum, info, root); if (h) return h; } else if (repnum > 0) { if (utf8) - return checkword(base, info, root); - return checkword(sharps_u8_l1(tmp, base), info, root); + return checkword(base.c_str(), info, root); + std::string tmp(sharps_u8_l1(base)); + return checkword(tmp.c_str(), info, root); } return NULL; } @@ -403,7 +339,7 @@ int Hunspell::is_keepcase(const hentry* rv) { } /* insert a word to the beginning of the suggestion array and return ns */ -int Hunspell::insert_sug(char*** slst, char* word, int ns) { +int Hunspell::insert_sug(char*** slst, const char* word, int ns) { if (!*slst) return ns; char* dup = mystrdup(word); @@ -421,11 +357,6 @@ int Hunspell::insert_sug(char*** slst, char* word, int ns) { int Hunspell::spell(const char* word, int* info, char** root) { struct hentry* rv = NULL; - // need larger vector. For example, Turkish capital letter I converted a - // 2-byte UTF-8 character (dotless i) by mkallsmall. - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; - w_char unicw[MAXWORDLEN]; int info2 = 0; if (!info) @@ -437,7 +368,6 @@ int Hunspell::spell(const char* word, int* info, char** root) { if (strcmp(word, SPELL_XML) == 0) return 1; int nc = strlen(word); - int wl2 = 0; if (utf8) { if (nc >= MAXWORDUTF8LEN) return 0; @@ -445,19 +375,26 @@ int Hunspell::spell(const char* word, int* info, char** root) { if (nc >= MAXWORDLEN) return 0; } - int captype = 0; - int abbv = 0; - int wl = 0; + int captype = NOCAP; + size_t abbv = 0; + size_t wl = 0; + + std::string scw; + std::vector<w_char> sunicw; // input conversion RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); + { + std::string wspace; + + int convstatus = rl ? rl->conv(word, wspace) : 0; + if (convstatus < 0) + return 0; + else if (convstatus > 0) + wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv); + else + wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv); + } #ifdef MOZILLA_CLIENT // accept the abbreviated words without dots @@ -474,12 +411,12 @@ int Hunspell::spell(const char* word, int* info, char** root) { // "..", "--" etc.) enum { NBEGIN, NNUM, NSEP }; int nstate = NBEGIN; - int i; + size_t i; for (i = 0; (i < wl); i++) { - if ((cw[i] <= '9') && (cw[i] >= '0')) { + if ((scw[i] <= '9') && (scw[i] >= '0')) { nstate = NNUM; - } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) { + } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) { if ((nstate == NSEP) || (i == 0)) break; nstate = NSEP; @@ -496,75 +433,75 @@ int Hunspell::spell(const char* word, int* info, char** root) { *info += SPELL_ORIGCAP; /* FALLTHROUGH */ case NOCAP: - rv = checkword(cw, info, root); + rv = checkword(scw.c_str(), info, root); if ((abbv) && !(rv)) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - rv = checkword(wspace, info, root); + std::string u8buffer(scw); + u8buffer.push_back('.'); + rv = checkword(u8buffer.c_str(), info, root); } break; case ALLCAP: { *info += SPELL_ORIGCAP; - rv = checkword(cw, info, root); + rv = checkword(scw.c_str(), info, root); if (rv) break; if (abbv) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - rv = checkword(wspace, info, root); + std::string u8buffer(scw); + u8buffer.push_back('.'); + rv = checkword(u8buffer.c_str(), info, root); if (rv) break; } // Spec. prefix handling for Catalan, French, Italian: // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia). - if (pAMgr && strchr(cw, '\'')) { - mkallsmall2(cw, unicw, nc); - // There are no really sane circumstances where this could fail, - // but anyway... - if (char* apostrophe = strchr(cw, '\'')) { + size_t apos = pAMgr ? scw.find('\'') : std::string::npos; + if (apos != std::string::npos) { + mkallsmall2(scw, sunicw); + //conversion may result in string with different len to pre-mkallsmall2 + //so re-scan + if (apos != std::string::npos && apos < scw.size() - 1) { + std::string part1 = scw.substr(0, apos+1); + std::string part2 = scw.substr(apos+1); if (utf8) { - w_char tmpword[MAXWORDLEN]; - *apostrophe = '\0'; - wl2 = u8_u16(tmpword, MAXWORDLEN, cw); - *apostrophe = '\''; - if (wl2 >= 0 && wl2 < nc) { - mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1); - rv = checkword(cw, info, root); - if (rv) - break; - } + std::vector<w_char> part1u, part2u; + u8_u16(part1u, part1); + u8_u16(part2u, part2); + mkinitcap2(part2, part2u); + scw = part1 + part2; + sunicw = part1u; + sunicw.insert(sunicw.end(), part2u.begin(), part2u.end()); + rv = checkword(scw.c_str(), info, root); + if (rv) + break; } else { - mkinitcap2(apostrophe + 1, unicw, nc); - rv = checkword(cw, info, root); + mkinitcap2(part2, sunicw); + scw = part1 + part2; + rv = checkword(scw.c_str(), info, root); if (rv) break; } + mkinitcap2(scw, sunicw); + rv = checkword(scw.c_str(), info, root); + if (rv) + break; } - mkinitcap2(cw, unicw, nc); - rv = checkword(cw, info, root); - if (rv) - break; } - if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) { - char tmpword[MAXWORDUTF8LEN]; - wl = mkallsmall2(cw, unicw, nc); - memcpy(wspace, cw, (wl + 1)); - rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); + if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) { + + mkallsmall2(scw, sunicw); + std::string u8buffer(scw); + rv = spellsharps(u8buffer, 0, 0, 0, info, root); if (!rv) { - wl2 = mkinitcap2(cw, unicw, nc); - rv = spellsharps(cw, cw, 0, 0, tmpword, info, root); + mkinitcap2(scw, sunicw); + rv = spellsharps(scw, 0, 0, 0, info, root); } if ((abbv) && !(rv)) { - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); + u8buffer.push_back('.'); + rv = spellsharps(u8buffer, 0, 0, 0, info, root); if (!rv) { - memcpy(wspace, cw, wl2); - *(wspace + wl2) = '.'; - *(wspace + wl2 + 1) = '\0'; - rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); + u8buffer = std::string(scw); + u8buffer.push_back('.'); + rv = spellsharps(u8buffer, 0, 0, 0, info, root); } } if (rv) @@ -572,13 +509,14 @@ int Hunspell::spell(const char* word, int* info, char** root) { } } case INITCAP: { + *info += SPELL_ORIGCAP; - wl = mkallsmall2(cw, unicw, nc); - memcpy(wspace, cw, (wl + 1)); - wl2 = mkinitcap2(cw, unicw, nc); + mkallsmall2(scw, sunicw); + std::string u8buffer(scw); + mkinitcap2(scw, sunicw); if (captype == INITCAP) *info += SPELL_INITCAP; - rv = checkword(cw, info, root); + rv = checkword(scw.c_str(), info, root); if (captype == INITCAP) *info -= SPELL_INITCAP; // forbid bad capitalization @@ -593,18 +531,16 @@ int Hunspell::spell(const char* word, int* info, char** root) { if (rv) break; - rv = checkword(wspace, info, root); + rv = checkword(u8buffer.c_str(), info, root); if (abbv && !rv) { - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - rv = checkword(wspace, info, root); + u8buffer.push_back('.'); + rv = checkword(u8buffer.c_str(), info, root); if (!rv) { - memcpy(wspace, cw, wl2); - *(wspace + wl2) = '.'; - *(wspace + wl2 + 1) = '\0'; + u8buffer = scw; + u8buffer.push_back('.'); if (captype == INITCAP) *info += SPELL_INITCAP; - rv = checkword(wspace, info, root); + rv = checkword(u8buffer.c_str(), info, root); if (captype == INITCAP) *info -= SPELL_INITCAP; if (rv && is_keepcase(rv) && (captype == ALLCAP)) @@ -617,8 +553,8 @@ int Hunspell::spell(const char* word, int* info, char** root) { // if CHECKSHARPS: KEEPCASE words with \xDF are allowed // in INITCAP form, too. !(pAMgr->get_checksharps() && - ((utf8 && strstr(wspace, "\xC3\x9F")) || - (!utf8 && strchr(wspace, '\xDF')))))) + ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) || + (!utf8 && u8buffer.find('\xDF') != std::string::npos))))) rv = NULL; break; } @@ -637,67 +573,66 @@ int Hunspell::spell(const char* word, int* info, char** root) { // recursive breaking at break points if (wordbreak) { - char* s; - char r; + int nbr = 0; - wl = strlen(cw); + wl = scw.size(); int numbreak = pAMgr ? pAMgr->get_numbreak() : 0; // calculate break points for recursion limit for (int j = 0; j < numbreak; j++) { - s = cw; - do { - s = (char*)strstr(s, wordbreak[j]); - if (s) { - nbr++; - s++; - } - } while (s); + size_t len = strlen(wordbreak[j]); + size_t pos = 0; + while ((pos = scw.find(wordbreak[j], pos, len)) != std::string::npos) { + ++nbr; + pos += len; + } } if (nbr >= 10) return 0; // check boundary patterns (^begin and end$) for (int j = 0; j < numbreak; j++) { - int plen = strlen(wordbreak[j]); + size_t plen = strlen(wordbreak[j]); if (plen == 1 || plen > wl) continue; + if (wordbreak[j][0] == '^' && - strncmp(cw, wordbreak[j] + 1, plen - 1) == 0 && spell(cw + plen - 1)) + scw.compare(0, plen - 1, wordbreak[j] + 1, plen -1) == 0 && spell(scw.c_str() + plen - 1)) return 1; + if (wordbreak[j][plen - 1] == '$' && - strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) { - r = cw[wl - plen + 1]; - cw[wl - plen + 1] = '\0'; - if (spell(cw)) + scw.compare(wl - plen + 1, plen - 1, wordbreak[j], plen - 1) == 0) { + char r = scw[wl - plen + 1]; + scw[wl - plen + 1] = '\0'; + if (spell(scw.c_str())) return 1; - cw[wl - plen + 1] = r; + scw[wl - plen + 1] = r; } } // other patterns for (int j = 0; j < numbreak; j++) { - int plen = strlen(wordbreak[j]); - s = (char*)strstr(cw, wordbreak[j]); - if (s && (s > cw) && (s < cw + wl - plen)) { - if (!spell(s + plen)) + size_t plen = strlen(wordbreak[j]); + size_t found = scw.find(wordbreak[j]); + if ((found > 0) && (found < wl - plen)) { + if (!spell(scw.c_str() + found + plen)) continue; - r = *s; - *s = '\0'; + char r = scw[found]; + scw[found] = '\0'; // examine 2 sides of the break point - if (spell(cw)) + if (spell(scw.c_str())) return 1; - *s = r; + scw[found] = r; // LANG_hu: spec. dash rule if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) { - r = s[1]; - s[1] = '\0'; - if (spell(cw)) + r = scw[found + 1]; + scw[found + 1] = '\0'; + if (spell(scw.c_str())) return 1; // check the first part with dash - s[1] = r; + scw[found + 1] = r; } - // end of LANG speficic region + // end of LANG specific region } } } @@ -716,10 +651,9 @@ struct hentry* Hunspell::checkword(const char* w, int* info, char** root) { if (ignoredchars != NULL) { w2.assign(w); if (utf8) { - int ignoredchars_utf16_len; - unsigned short* ignoredchars_utf16 = - pAMgr->get_ignore_utf16(&ignoredchars_utf16_len); - remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len); + const std::vector<w_char>& ignoredchars_utf16 = + pAMgr->get_ignore_utf16(); + remove_ignored_chars_utf(w2, ignoredchars_utf16); } else { remove_ignored_chars(w2, ignoredchars); } @@ -802,37 +736,40 @@ struct hentry* Hunspell::checkword(const char* w, int* info, char** root) { return NULL; } if (root) { - *root = mystrdup(he->word); - if (*root && complexprefixes) { + std::string word_root(he->word); + if (complexprefixes) { if (utf8) - reverseword_utf(*root); + reverseword_utf(word_root); else - reverseword(*root); + reverseword(word_root); } + *root = mystrdup(word_root.c_str()); } // try check compound word } else if (pAMgr->get_compound()) { - he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info); + struct hentry* rwords[100]; // buffer for COMPOUND pattern checking + he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info); // LANG_hu section: `moving rule' with last dash if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) { char* dup = mystrdup(word); if (!dup) return NULL; dup[len - 1] = '\0'; - he = pAMgr->compound_check(dup, len - 1, -5, 0, 100, 0, NULL, 1, 0, + he = pAMgr->compound_check(dup, len - 1, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info); free(dup); } - // end of LANG speficic region + // end of LANG specific region if (he) { if (root) { - *root = mystrdup(he->word); - if (*root && complexprefixes) { + std::string word_root(he->word); + if (complexprefixes) { if (utf8) - reverseword_utf(*root); + reverseword_utf(word_root); else - reverseword(*root); + reverseword(word_root); } + *root = mystrdup(word_root.c_str()); } if (info) *info += SPELL_COMPOUND; @@ -845,11 +782,8 @@ struct hentry* Hunspell::checkword(const char* w, int* info, char** root) { int Hunspell::suggest(char*** slst, const char* word) { int onlycmpdsug = 0; - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; if (!pSMgr || maxdic == 0) return 0; - w_char unicw[MAXWORDLEN]; *slst = NULL; // process XML input of the simplified API (see manual) if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) { @@ -863,130 +797,132 @@ int Hunspell::suggest(char*** slst, const char* word) { if (nc >= MAXWORDLEN) return 0; } - int captype = 0; - int abbv = 0; - int wl = 0; + int captype = NOCAP; + size_t abbv = 0; + size_t wl = 0; + + std::string scw; + std::vector<w_char> sunicw; // input conversion RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); + { + std::string wspace; + + int convstatus = rl ? rl->conv(word, wspace) : 0; + if (convstatus < 0) + return 0; + else if (convstatus > 0) + wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv); + else + wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv); + + if (wl == 0) + return 0; + } - if (wl == 0) - return 0; int ns = 0; int capwords = 0; // check capitalized form for FORCEUCASE if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) { int info = SPELL_ORIGCAP; - char** wlst; - if (checkword(cw, &info, NULL)) { - if (*slst) { - wlst = *slst; - } else { - wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*)); - if (wlst == NULL) - return -1; - *slst = wlst; - for (int i = 0; i < MAXSUGGESTION; i++) { - wlst[i] = NULL; - } + if (checkword(scw.c_str(), &info, NULL)) { + std::string form(scw); + mkinitcap(form); + + char** wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*)); + if (wlst == NULL) + return -1; + *slst = wlst; + wlst[0] = mystrdup(form.c_str()); + for (int i = 1; i < MAXSUGGESTION; ++i) { + wlst[i] = NULL; } - wlst[0] = mystrdup(cw); - mkinitcap(wlst[0]); + return 1; } } switch (captype) { case NOCAP: { - ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); + ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug); break; } case INITCAP: { capwords = 1; - ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); + ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug); if (ns == -1) break; - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug); break; } case HUHINITCAP: capwords = 1; case HUHCAP: { - ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); + ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug); if (ns != -1) { - int prevns; // something.The -> something. The - char* dot = strchr(cw, '.'); - if (dot && (dot > cw)) { + size_t dot_pos = scw.find('.'); + if (dot_pos != std::string::npos) { + std::string postdot = scw.substr(dot_pos + 1); int captype_; if (utf8) { - w_char w_[MAXWORDLEN]; - int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1); - captype_ = get_captype_utf8(w_, wl_, langnum); - } else - captype_ = get_captype(dot + 1, strlen(dot + 1), csconv); + std::vector<w_char> postdotu; + u8_u16(postdotu, postdot); + captype_ = get_captype_utf8(postdotu, langnum); + } else { + captype_ = get_captype(postdot, csconv); + } if (captype_ == INITCAP) { - char* st = mystrdup(cw); - if (st) { - char* newst = (char*)realloc(st, wl + 2); - if (newst == NULL) - free(st); - st = newst; - } - if (st) { - st[(dot - cw) + 1] = ' '; - strcpy(st + (dot - cw) + 2, dot + 1); - ns = insert_sug(slst, st, ns); - free(st); - } + std::string str(scw); + str.insert(dot_pos + 1, 1, ' '); + ns = insert_sug(slst, str.c_str(), ns); } } + + std::string wspace; + if (captype == HUHINITCAP) { // TheOpenOffice.org -> The OpenOffice.org - memcpy(wspace, cw, (wl + 1)); - mkinitsmall2(wspace, unicw, nc); - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + wspace = scw; + mkinitsmall2(wspace, sunicw); + ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug); } - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - if (spell(wspace)) - ns = insert_sug(slst, wspace, ns); - prevns = ns; - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + wspace = scw; + mkallsmall2(wspace, sunicw); + if (spell(wspace.c_str())) + ns = insert_sug(slst, wspace.c_str(), ns); + int prevns = ns; + ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug); if (captype == HUHINITCAP) { - mkinitcap2(wspace, unicw, nc); - if (spell(wspace)) - ns = insert_sug(slst, wspace, ns); - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + mkinitcap2(wspace, sunicw); + if (spell(wspace.c_str())) + ns = insert_sug(slst, wspace.c_str(), ns); + ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug); } // aNew -> "a New" (instead of "a new") for (int j = prevns; j < ns; j++) { char* space = strchr((*slst)[j], ' '); if (space) { - int slen = strlen(space + 1); + size_t slen = strlen(space + 1); // different case after space (need capitalisation) - if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) { - w_char w[MAXWORDLEN]; - int wc = 0; - char* r = (*slst)[j]; + if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) { + std::string first((*slst)[j], space + 1); + std::string second(space + 1); + std::vector<w_char> w; if (utf8) - wc = u8_u16(w, MAXWORDLEN, space + 1); - mkinitcap2(space + 1, w, wc); + u8_u16(w, second); + mkinitcap2(second, w); // set as first suggestion + char* r = (*slst)[j]; for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1]; - (*slst)[0] = r; + free(r); + (*slst)[0] = mystrdup((first + second).c_str()); } } } @@ -995,35 +931,30 @@ int Hunspell::suggest(char*** slst, const char* word) { } case ALLCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug); if (ns == -1) break; - if (pAMgr && pAMgr->get_keepcase() && spell(wspace)) - ns = insert_sug(slst, wspace, ns); - mkinitcap2(wspace, unicw, nc); - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str())) + ns = insert_sug(slst, wspace.c_str(), ns); + mkinitcap2(wspace, sunicw); + ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug); for (int j = 0; j < ns; j++) { - mkallcap((*slst)[j]); + std::string form((*slst)[j]); + mkallcap(form); + if (pAMgr && pAMgr->get_checksharps()) { - char* pos; if (utf8) { - pos = strstr((*slst)[j], "\xC3\x9F"); - while (pos) { - *pos = 'S'; - *(pos + 1) = 'S'; - pos = strstr(pos + 2, "\xC3\x9F"); - } + mystrrep(form, "\xC3\x9F", "SS"); } else { - pos = strchr((*slst)[j], '\xDF'); - while (pos) { - (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 2); - mystrrep((*slst)[j], "\xDF", "SS"); - pos = strchr((*slst)[j], '\xDF'); - } + mystrrep(form, "\xDF", "SS"); } } + + free((*slst)[j]); + (*slst)[j] = mystrdup(form.c_str()); + } break; } @@ -1035,11 +966,10 @@ int Hunspell::suggest(char*** slst, const char* word) { char* pos = strchr((*slst)[j], '-'); if (pos) { int info; - char w[MAXWORDUTF8LEN]; *pos = '\0'; - strcpy(w, (*slst)[j]); - strcat(w, pos + 1); - (void)spell(w, &info, NULL); + std::string w((*slst)[j]); + w.append(pos + 1); + (void)spell(w.c_str(), &info, NULL); if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { *pos = ' '; } else @@ -1054,64 +984,67 @@ int Hunspell::suggest(char*** slst, const char* word) { (*slst)) { switch (captype) { case NOCAP: { - ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic); + ns = pSMgr->ngsuggest(*slst, scw.c_str(), ns, pHMgr, maxdic); break; } case HUHINITCAP: capwords = 1; case HUHCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic); break; } case INITCAP: { capwords = 1; - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic); break; } case ALLCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); + std::string wspace(scw); + mkallsmall2(wspace, sunicw); int oldns = ns; - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); - for (int j = oldns; j < ns; j++) - mkallcap((*slst)[j]); + ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic); + for (int j = oldns; j < ns; j++) { + std::string form((*slst)[j]); + mkallcap(form); + free((*slst)[j]); + (*slst)[j] = mystrdup(form.c_str()); + } break; } } } // try dash suggestion (Afo-American -> Afro-American) - if (char* pos = strchr(cw, '-')) { - char* ppos = cw; + size_t dash_pos = scw.find('-'); + if (dash_pos != std::string::npos) { int nodashsug = 1; - char** nlst = NULL; - int nn = 0; - int last = 0; - if (*slst) { - for (int j = 0; j < ns && nodashsug == 1; j++) { - if (strchr((*slst)[j], '-')) - nodashsug = 0; - } + for (int j = 0; j < ns && nodashsug == 1; j++) { + if (strchr((*slst)[j], '-')) + nodashsug = 0; } + + size_t prev_pos = 0; + bool last = false; + while (nodashsug && !last) { - if (*pos == '\0') + if (dash_pos == scw.size()) last = 1; - else - *pos = '\0'; - if (!spell(ppos)) { - nn = suggest(&nlst, ppos); + std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos); + if (!spell(chunk.c_str())) { + char** nlst = NULL; + int nn = suggest(&nlst, chunk.c_str()); for (int j = nn - 1; j >= 0; j--) { - strncpy(wspace, cw, ppos - cw); - strcpy(wspace + (ppos - cw), nlst[j]); + std::string wspace = scw.substr(0, prev_pos); + wspace.append(nlst[j]); if (!last) { - strcat(wspace, "-"); - strcat(wspace, pos + 1); + wspace.append("-"); + wspace.append(scw.substr(dash_pos + 1)); } - ns = insert_sug(slst, wspace, ns); + ns = insert_sug(slst, wspace.c_str(), ns); free(nlst[j]); } if (nlst != NULL) @@ -1119,29 +1052,34 @@ int Hunspell::suggest(char*** slst, const char* word) { nodashsug = 0; } if (!last) { - *pos = '-'; - ppos = pos + 1; - pos = strchr(ppos, '-'); + prev_pos = dash_pos + 1; + dash_pos = scw.find('-', prev_pos); } - if (!pos) - pos = cw + strlen(cw); + if (dash_pos == std::string::npos) + dash_pos = scw.size(); } } // word reversing wrapper for complex prefixes if (complexprefixes) { for (int j = 0; j < ns; j++) { + std::string root((*slst)[j]); + free((*slst)[j]); if (utf8) - reverseword_utf((*slst)[j]); + reverseword_utf(root); else - reverseword((*slst)[j]); + reverseword(root); + (*slst)[j] = mystrdup(root.c_str()); } } // capitalize if (capwords) for (int j = 0; j < ns; j++) { - mkinitcap((*slst)[j]); + std::string form((*slst)[j]); + free((*slst)[j]); + mkinitcap(form); + (*slst)[j] = mystrdup(form.c_str()); } // expand suggestions with dot(s) @@ -1160,25 +1098,23 @@ int Hunspell::suggest(char*** slst, const char* word) { int l = 0; for (int j = 0; j < ns; j++) { if (!strchr((*slst)[j], ' ') && !spell((*slst)[j])) { - char s[MAXSWUTF8L]; - w_char w[MAXSWL]; - int len; + std::string s; + std::vector<w_char> w; if (utf8) { - len = u8_u16(w, MAXSWL, (*slst)[j]); + u8_u16(w, (*slst)[j]); } else { - strcpy(s, (*slst)[j]); - len = strlen(s); + s = (*slst)[j]; } - mkallsmall2(s, w, len); + mkallsmall2(s, w); free((*slst)[j]); - if (spell(s)) { - (*slst)[l] = mystrdup(s); + if (spell(s.c_str())) { + (*slst)[l] = mystrdup(s.c_str()); if ((*slst)[l]) l++; } else { - mkinitcap2(s, w, len); - if (spell(s)) { - (*slst)[l] = mystrdup(s); + mkinitcap2(s, w); + if (spell(s.c_str())) { + (*slst)[l] = mystrdup(s.c_str()); if ((*slst)[l]) l++; } @@ -1211,9 +1147,10 @@ int Hunspell::suggest(char*** slst, const char* word) { // output conversion rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; for (int j = 0; rl && j < ns; j++) { - if (rl->conv((*slst)[j], wspace, MAXWORDUTF8LEN) > 0) { + std::string wspace; + if (rl->conv((*slst)[j], wspace) > 0) { free((*slst)[j]); - (*slst)[j] = mystrdup(wspace); + (*slst)[j] = mystrdup(wspace.c_str()); } } @@ -1233,151 +1170,25 @@ char* Hunspell::get_dic_encoding() { return encoding; } -#ifdef HUNSPELL_EXPERIMENTAL -// XXX UTF-8 support is OK? -int Hunspell::suggest_auto(char*** slst, const char* word) { - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; - if (!pSMgr || maxdic == 0) - return 0; - w_char unicw[MAXWORDLEN]; - int nc = strlen(word); - if (utf8) { - if (nc >= MAXWORDUTF8LEN) - return 0; - } else { - if (nc >= MAXWORDLEN) - return 0; - } - int captype = 0; - int abbv = 0; - int wl = 0; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - - if (wl == 0) - return 0; - int ns = 0; - *slst = NULL; // HU, nsug in pSMgr->suggest - - switch (captype) { - case NOCAP: { - ns = pSMgr->suggest_auto(slst, cw, ns); - if (ns > 0) - break; - break; - } - - case INITCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest_auto(slst, wspace, ns); - for (int j = 0; j < ns; j++) - mkinitcap((*slst)[j]); - ns = pSMgr->suggest_auto(slst, cw, ns); - break; - } - - case HUHINITCAP: - case HUHCAP: { - ns = pSMgr->suggest_auto(slst, cw, ns); - if (ns == 0) { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest_auto(slst, wspace, ns); - } - break; - } - - case ALLCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest_auto(slst, wspace, ns); - - mkinitcap(wspace); - ns = pSMgr->suggest_auto(slst, wspace, ns); - - for (int j = 0; j < ns; j++) - mkallcap((*slst)[j]); - break; - } - } - - // word reversing wrapper for complex prefixes - if (complexprefixes) { - for (int j = 0; j < ns; j++) { - if (utf8) - reverseword_utf((*slst)[j]); - else - reverseword((*slst)[j]); - } - } - - // expand suggestions with dot(s) - if (abbv && pAMgr && pAMgr->get_sugswithdots()) { - for (int j = 0; j < ns; j++) { - (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv); - strcat((*slst)[j], word + strlen(word) - abbv); - } - } - - // LANG_hu section: replace '-' with ' ' in Hungarian - if (langnum == LANG_hu) { - for (int j = 0; j < ns; j++) { - char* pos = strchr((*slst)[j], '-'); - if (pos) { - int info; - char w[MAXWORDUTF8LEN]; - *pos = '\0'; - strcpy(w, (*slst)[j]); - strcat(w, pos + 1); - spell(w, &info, NULL); - if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { - *pos = ' '; - } else - *pos = '-'; - } - } - } - - // output conversion - rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; - for (int j = 0; rl && j < ns; j++) { - if (rl->conv((*slst)[j], wspace) > 0) { - free((*slst)[j]); - (*slst)[j] = mystrdup(wspace); - } - } - - // END OF LANG_hu section - return ns; -} -#endif - int Hunspell::stem(char*** slst, char** desc, int n) { - char result[MAXLNLEN]; - char result2[MAXLNLEN]; + + std::string result2; *slst = NULL; if (n == 0) return 0; - *result2 = '\0'; for (int i = 0; i < n; i++) { - *result = '\0'; + + std::string result; + // add compound word parts (except the last one) char* s = (char*)desc[i]; char* part = strstr(s, MORPH_PART); if (part) { char* nextpart = strstr(part + 1, MORPH_PART); while (nextpart) { - copy_field(result + strlen(result), part, MORPH_PART); + std::string field; + copy_field(field, part, MORPH_PART); + result.append(field); part = nextpart; nextpart = strstr(part + 1, MORPH_PART); } @@ -1404,22 +1215,28 @@ int Hunspell::stem(char*** slst, char** desc, int n) { int genl = line_tok(sg, &gen, MSEP_REC); free(sg); for (int j = 0; j < genl; j++) { - sprintf(result2 + strlen(result2), "%c%s%s", MSEP_REC, result, - gen[j]); + result2.push_back(MSEP_REC); + result2.append(result); + result2.append(gen[j]); } freelist(&gen, genl); } } else { - sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result); + result2.push_back(MSEP_REC); + result2.append(result); if (strstr(pl[k], MORPH_SURF_PFX)) { - copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX); + std::string field; + copy_field(field, pl[k], MORPH_SURF_PFX); + result2.append(field); } - copy_field(result2 + strlen(result2), pl[k], MORPH_STEM); + std::string field; + copy_field(field, pl[k], MORPH_STEM); + result2.append(field); } } freelist(&pl, pln); } - int sln = line_tok(result2, slst, MSEP_REC); + int sln = line_tok(result2.c_str(), slst, MSEP_REC); return uniqlist(*slst, sln); } @@ -1431,148 +1248,43 @@ int Hunspell::stem(char*** slst, const char* word) { return pln2; } -#ifdef HUNSPELL_EXPERIMENTAL -int Hunspell::suggest_pos_stems(char*** slst, const char* word) { - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; - if (!pSMgr || maxdic == 0) - return 0; - w_char unicw[MAXWORDLEN]; - int nc = strlen(word); - if (utf8) { - if (nc >= MAXWORDUTF8LEN) - return 0; - } else { - if (nc >= MAXWORDLEN) - return 0; - } - int captype = 0; - int abbv = 0; - int wl = 0; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - - if (wl == 0) - return 0; - - int ns = 0; // ns=0 = normalized input - - *slst = NULL; // HU, nsug in pSMgr->suggest - - switch (captype) { - case HUHCAP: - case NOCAP: { - ns = pSMgr->suggest_pos_stems(slst, cw, ns); - - if ((abbv) && (ns == 0)) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - ns = pSMgr->suggest_pos_stems(slst, wspace, ns); - } - - break; - } - - case INITCAP: { - ns = pSMgr->suggest_pos_stems(slst, cw, ns); - - if (ns == 0 || ((*slst)[0][0] == '#')) { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest_pos_stems(slst, wspace, ns); - } - - break; - } - - case ALLCAP: { - ns = pSMgr->suggest_pos_stems(slst, cw, ns); - if (ns != 0) - break; - - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest_pos_stems(slst, wspace, ns); - - if (ns == 0) { - mkinitcap(wspace); - ns = pSMgr->suggest_pos_stems(slst, wspace, ns); - } - break; - } - } - - // output conversion - rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; - for (int j = 0; rl && j < ns; j++) { - if (rl->conv((*slst)[j], wspace) > 0) { - free((*slst)[j]); - (*slst)[j] = mystrdup(wspace); - } - } - - return ns; -} -#endif // END OF HUNSPELL_EXPERIMENTAL CODE - const char* Hunspell::get_wordchars() { return pAMgr->get_wordchars(); } -unsigned short* Hunspell::get_wordchars_utf16(int* len) { - return pAMgr->get_wordchars_utf16(len); +const std::vector<w_char>& Hunspell::get_wordchars_utf16() { + return pAMgr->get_wordchars_utf16(); } -void Hunspell::mkinitcap(char* p) { - if (!utf8) { - if (*p != '\0') - *p = csconv[((unsigned char)*p)].cupper; +void Hunspell::mkinitcap(std::string& u8) { + if (utf8) { + std::vector<w_char> u16; + u8_u16(u16, u8); + ::mkinitcap_utf(u16, langnum); + u16_u8(u8, u16); } else { - int len; - w_char u[MAXWORDLEN]; - len = u8_u16(u, MAXWORDLEN, p); - unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum); - u[0].h = (unsigned char)(i >> 8); - u[0].l = (unsigned char)(i & 0x00FF); - u16_u8(p, MAXWORDUTF8LEN, u, len); + ::mkinitcap(u8, csconv); } } -int Hunspell::mkinitcap2(char* p, w_char* u, int nc) { - if (!utf8) { - if (*p != '\0') - *p = csconv[((unsigned char)*p)].cupper; - } else if (nc > 0) { - unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum); - u[0].h = (unsigned char)(i >> 8); - u[0].l = (unsigned char)(i & 0x00FF); - u16_u8(p, MAXWORDUTF8LEN, u, nc); - return strlen(p); +int Hunspell::mkinitcap2(std::string& u8, std::vector<w_char>& u16) { + if (utf8) { + ::mkinitcap_utf(u16, langnum); + u16_u8(u8, u16); + } else { + ::mkinitcap(u8, csconv); } - return nc; + return u8.size(); } -int Hunspell::mkinitsmall2(char* p, w_char* u, int nc) { - if (!utf8) { - if (*p != '\0') - *p = csconv[((unsigned char)*p)].clower; - } else if (nc > 0) { - unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum); - u[0].h = (unsigned char)(i >> 8); - u[0].l = (unsigned char)(i & 0x00FF); - u16_u8(p, MAXWORDUTF8LEN, u, nc); - return strlen(p); +int Hunspell::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) { + if (utf8) { + ::mkinitsmall_utf(u16, langnum); + u16_u8(u8, u16); + } else { + ::mkinitsmall(u8, csconv); } - return nc; + return u8.size(); } int Hunspell::add(const char* word) { @@ -1601,20 +1313,16 @@ struct cs_info* Hunspell::get_csconv() { return csconv; } -void Hunspell::cat_result(char* result, char* st) { +void Hunspell::cat_result(std::string& result, char* st) { if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); + if (!result.empty()) + result.append("\n"); + result.append(st); free(st); } } int Hunspell::analyze(char*** slst, const char* word) { - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; - w_char unicw[MAXWORDLEN]; - int wl2 = 0; *slst = NULL; if (!pSMgr || maxdic == 0) return 0; @@ -1626,48 +1334,52 @@ int Hunspell::analyze(char*** slst, const char* word) { if (nc >= MAXWORDLEN) return 0; } - int captype = 0; - int abbv = 0; - int wl = 0; + int captype = NOCAP; + size_t abbv = 0; + size_t wl = 0; + + std::string scw; + std::vector<w_char> sunicw; // input conversion RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); + { + std::string wspace; + + int convstatus = rl ? rl->conv(word, wspace) : 0; + if (convstatus < 0) + return 0; + else if (convstatus > 0) + wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv); + else + wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv); + } if (wl == 0) { if (abbv) { + scw.clear(); for (wl = 0; wl < abbv; wl++) - cw[wl] = '.'; - cw[wl] = '\0'; + scw.push_back('.'); abbv = 0; } else return 0; } - char result[MAXLNLEN]; - char* st = NULL; - - *result = '\0'; + std::string result; - int n = 0; - int n2 = 0; - int n3 = 0; + size_t n = 0; + size_t n2 = 0; + size_t n3 = 0; // test numbers // LANG_hu section: set dash information for suggestions if (langnum == LANG_hu) { - while ((n < wl) && (((cw[n] <= '9') && (cw[n] >= '0')) || - (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) { + while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) || + (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) { n++; - if ((cw[n] == '.') || (cw[n] == ',')) { + if ((scw[n] == '.') || (scw[n] == ',')) { if (((n2 == 0) && (n > 3)) || - ((n2 > 0) && ((cw[n - 1] == '.') || (cw[n - 1] == ',')))) + ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ',')))) break; n2++; n3 = n; @@ -1676,21 +1388,21 @@ int Hunspell::analyze(char*** slst, const char* word) { if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0; - if ((n == wl) || ((n > 0) && ((cw[n] == '%') || (cw[n] == '\xB0')) && - checkword(cw + n, NULL, NULL))) { - mystrcat(result, cw, MAXLNLEN); - result[n - 1] = '\0'; + if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) && + checkword(scw.c_str() + n, NULL, NULL))) { + result.append(scw); + result.resize(n - 1); if (n == wl) - cat_result(result, pSMgr->suggest_morph(cw + n - 1)); + cat_result(result, pSMgr->suggest_morph(scw.c_str() + n - 1)); else { - char sign = cw[n]; - cw[n] = '\0'; - cat_result(result, pSMgr->suggest_morph(cw + n - 1)); - mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE - cw[n] = sign; - cat_result(result, pSMgr->suggest_morph(cw + n)); + char sign = scw[n]; + scw[n] = '\0'; + cat_result(result, pSMgr->suggest_morph(scw.c_str() + n - 1)); + result.push_back('+'); // XXX SPEC. MORPHCODE + scw[n] = sign; + cat_result(result, pSMgr->suggest_morph(scw.c_str() + n)); } - return line_tok(result, slst, MSEP_REC); + return line_tok(result.c_str(), slst, MSEP_REC); } } // END OF LANG_hu section @@ -1699,64 +1411,58 @@ int Hunspell::analyze(char*** slst, const char* word) { case HUHCAP: case HUHINITCAP: case NOCAP: { - cat_result(result, pSMgr->suggest_morph(cw)); + cat_result(result, pSMgr->suggest_morph(scw.c_str())); if (abbv) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - cat_result(result, pSMgr->suggest_morph(wspace)); + std::string u8buffer(scw); + u8buffer.push_back('.'); + cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); } break; } case INITCAP: { - wl = mkallsmall2(cw, unicw, nc); - memcpy(wspace, cw, (wl + 1)); - wl2 = mkinitcap2(cw, unicw, nc); - cat_result(result, pSMgr->suggest_morph(wspace)); - cat_result(result, pSMgr->suggest_morph(cw)); + wl = mkallsmall2(scw, sunicw); + std::string u8buffer(scw); + mkinitcap2(scw, sunicw); + cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); + cat_result(result, pSMgr->suggest_morph(scw.c_str())); if (abbv) { - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - cat_result(result, pSMgr->suggest_morph(wspace)); + u8buffer.push_back('.'); + cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); - memcpy(wspace, cw, wl2); - *(wspace + wl2) = '.'; - *(wspace + wl2 + 1) = '\0'; + u8buffer = scw; + u8buffer.push_back('.'); - cat_result(result, pSMgr->suggest_morph(wspace)); + cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); } break; } case ALLCAP: { - cat_result(result, pSMgr->suggest_morph(cw)); + cat_result(result, pSMgr->suggest_morph(scw.c_str())); if (abbv) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - cat_result(result, pSMgr->suggest_morph(cw)); + std::string u8buffer(scw); + u8buffer.push_back('.'); + cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); } - wl = mkallsmall2(cw, unicw, nc); - memcpy(wspace, cw, (wl + 1)); - wl2 = mkinitcap2(cw, unicw, nc); + mkallsmall2(scw, sunicw); + std::string u8buffer(scw); + mkinitcap2(scw, sunicw); - cat_result(result, pSMgr->suggest_morph(wspace)); - cat_result(result, pSMgr->suggest_morph(cw)); + cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); + cat_result(result, pSMgr->suggest_morph(scw.c_str())); if (abbv) { - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - cat_result(result, pSMgr->suggest_morph(wspace)); + u8buffer.push_back('.'); + cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); - memcpy(wspace, cw, wl2); - *(wspace + wl2) = '.'; - *(wspace + wl2 + 1) = '\0'; + u8buffer = scw; + u8buffer.push_back('.'); - cat_result(result, pSMgr->suggest_morph(wspace)); + cat_result(result, pSMgr->suggest_morph(u8buffer.c_str())); } break; } } - if (*result) { + if (!result.empty()) { // word reversing wrapper for complex prefixes if (complexprefixes) { if (utf8) @@ -1764,95 +1470,94 @@ int Hunspell::analyze(char*** slst, const char* word) { else reverseword(result); } - return line_tok(result, slst, MSEP_REC); + return line_tok(result.c_str(), slst, MSEP_REC); } // compound word with dash (HU) I18n - char* dash = NULL; - int nresult = 0; // LANG_hu section: set dash information for suggestions - if (langnum == LANG_hu) - dash = (char*)strchr(cw, '-'); - if ((langnum == LANG_hu) && dash) { - *dash = '\0'; + + size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos; + int nresult = 0; + if (dash_pos != std::string::npos) { + std::string part1 = scw.substr(0, dash_pos); + std::string part2 = scw.substr(dash_pos+1); + // examine 2 sides of the dash - if (dash[1] == '\0') { // base word ending with dash - if (spell(cw)) { - char* p = pSMgr->suggest_morph(cw); + if (part2.empty()) { // base word ending with dash + if (spell(part1.c_str())) { + char* p = pSMgr->suggest_morph(part1.c_str()); if (p) { int ret = line_tok(p, slst, MSEP_REC); free(p); return ret; } } - } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat. - if (spell(cw) && (spell("-e"))) { - st = pSMgr->suggest_morph(cw); + } else if (part2.size() == 1 && part2[0] == 'e') { // XXX (HU) -e hat. + if (spell(part1.c_str()) && (spell("-e"))) { + char* st = pSMgr->suggest_morph(part1.c_str()); if (st) { - mystrcat(result, st, MAXLNLEN); + result.append(st); free(st); } - mystrcat(result, "+", MAXLNLEN); // XXX spec. separator in MORPHCODE + result.push_back('+'); // XXX spec. separator in MORPHCODE st = pSMgr->suggest_morph("-e"); if (st) { - mystrcat(result, st, MAXLNLEN); + result.append(st); free(st); } - return line_tok(result, slst, MSEP_REC); + return line_tok(result.c_str(), slst, MSEP_REC); } } else { // first word ending with dash: word- XXX ??? - char r2 = *(dash + 1); - dash[0] = '-'; - dash[1] = '\0'; - nresult = spell(cw); - dash[1] = r2; - dash[0] = '\0'; - if (nresult && spell(dash + 1) && - ((strlen(dash + 1) > 1) || ((dash[1] > '0') && (dash[1] < '9')))) { - st = pSMgr->suggest_morph(cw); + part1.push_back(' '); + nresult = spell(part1.c_str()); + part1.erase(part1.size() - 1); + if (nresult && spell(part2.c_str()) && + ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) { + char* st = pSMgr->suggest_morph(part1.c_str()); if (st) { - mystrcat(result, st, MAXLNLEN); + result.append(st); free(st); - mystrcat(result, "+", MAXLNLEN); // XXX spec. separator in MORPHCODE + result.push_back('+'); // XXX spec. separator in MORPHCODE } - st = pSMgr->suggest_morph(dash + 1); + st = pSMgr->suggest_morph(part2.c_str()); if (st) { - mystrcat(result, st, MAXLNLEN); + result.append(st); free(st); } - return line_tok(result, slst, MSEP_REC); + return line_tok(result.c_str(), slst, MSEP_REC); } } // affixed number in correct word - if (nresult && (dash > cw) && - (((*(dash - 1) <= '9') && (*(dash - 1) >= '0')) || - (*(dash - 1) == '.'))) { - *dash = '-'; + if (nresult && (dash_pos > 0) && + (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) || + (scw[dash_pos - 1] == '.'))) { n = 1; - if (*(dash - n) == '.') + if (scw[dash_pos - n] == '.') n++; // search first not a number character to left from dash - while (((dash - n) >= cw) && ((*(dash - n) == '0') || (n < 3)) && + while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) && (n < 6)) { n++; } - if ((dash - n) < cw) + if (dash_pos < n) n--; // numbers: valami1000000-hoz // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz, // 56-hoz, 6-hoz for (; n >= 1; n--) { - if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && - checkword(dash - n, NULL, NULL)) { - mystrcat(result, cw, MAXLNLEN); - result[dash - cw - n] = '\0'; - st = pSMgr->suggest_morph(dash - n); + if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') { + continue; + } + std::string chunk = scw.substr(dash_pos - n); + if (checkword(chunk.c_str(), NULL, NULL)) { + result.append(chunk); + char* st = pSMgr->suggest_morph(chunk.c_str()); if (st) { - mystrcat(result, st, MAXLNLEN); + result.append(st); free(st); } - return line_tok(result, slst, MSEP_REC); + return line_tok(result.c_str(), slst, MSEP_REC); } } } @@ -1866,30 +1571,33 @@ int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) { return 0; char** pl2; int pl2n = analyze(&pl2, word); - int captype = 0; + int captype = NOCAP; int abbv = 0; - char cw[MAXWORDUTF8LEN]; + std::string cw; cleanword(cw, word, &captype, &abbv); - char result[MAXLNLEN]; - *result = '\0'; + std::string result; for (int i = 0; i < pln; i++) { cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i])); } freelist(&pl2, pl2n); - if (*result) { + if (!result.empty()) { // allcap if (captype == ALLCAP) mkallcap(result); // line split - int linenum = line_tok(result, slst, MSEP_REC); + int linenum = line_tok(result.c_str(), slst, MSEP_REC); // capitalize if (captype == INITCAP || captype == HUHINITCAP) { - for (int j = 0; j < linenum; j++) - mkinitcap((*slst)[j]); + for (int j = 0; j < linenum; j++) { + std::string form((*slst)[j]); + free((*slst)[j]); + mkinitcap(form); + (*slst)[j] = mystrdup(form.c_str()); + } } // temporary filtering of prefix related errors (eg. @@ -1923,22 +1631,21 @@ int Hunspell::generate(char*** slst, const char* word, const char* pattern) { } // minimal XML parser functions -int Hunspell::get_xml_par(char* dest, const char* par, int max) { - char* d = dest; +std::string Hunspell::get_xml_par(const char* par) { + std::string dest; if (!par) - return 0; + return dest; char end = *par; - char* dmax = dest + max; if (end == '>') end = '<'; else if (end != '\'' && end != '"') return 0; // bad XML - for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) - *d = *par; - *d = '\0'; + for (par++; *par != '\0' && *par != end; ++par) { + dest.push_back(*par); + } mystrrep(dest, "<", "<"); mystrrep(dest, "&", "&"); - return (int)(d - dest); + return dest; } int Hunspell::get_langnum() const { @@ -1967,18 +1674,17 @@ const char* Hunspell::get_xml_pos(const char* s, const char* attr) { int Hunspell::check_xml_par(const char* q, const char* attr, const char* value) { - char cw[MAXWORDUTF8LEN]; - if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) && - strcmp(cw, value) == 0) + std::string cw = get_xml_par(get_xml_pos(q, attr)); + if (cw == value) return 1; return 0; } -int Hunspell::get_xml_list(char*** slst, char* list, const char* tag) { - int n = 0; - char* p; +int Hunspell::get_xml_list(char*** slst, const char* list, const char* tag) { if (!list) return 0; + int n = 0; + const char* p; for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++; if (n == 0) @@ -1987,25 +1693,20 @@ int Hunspell::get_xml_list(char*** slst, char* list, const char* tag) { if (!*slst) return 0; for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) { - int l = strlen(p); - (*slst)[n] = (char*)malloc(l + 1); - if (!(*slst)[n]) - return n; - if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) { - free((*slst)[n]); + std::string cw = get_xml_par(p + strlen(tag) - 1); + if (cw.empty()) { break; } + (*slst)[n] = mystrdup(cw.c_str()); } return n; } int Hunspell::spellml(char*** slst, const char* word) { - char *q, *q2; - char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN]; - q = (char*)strstr(word, "<query"); + const char* q = strstr(word, "<query"); if (!q) return 0; // bad XML input - q2 = strchr(q, '>'); + const char* q2 = strchr(q, '>'); if (!q2) return 0; // bad XML input q2 = strstr(q2, "<word"); @@ -2013,8 +1714,9 @@ int Hunspell::spellml(char*** slst, const char* word) { return 0; // bad XML input if (check_xml_par(q, "type=", "analyze")) { int n = 0; - if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) - n = analyze(slst, cw); + std::string cw = get_xml_par(strchr(q2, '>')); + if (!cw.empty()) + n = analyze(slst, cw.c_str()); if (n == 0) return 0; // convert the result to <code><a>ana1</a><a>ana2</a></code> format @@ -2036,22 +1738,25 @@ int Hunspell::spellml(char*** slst, const char* word) { (*slst)[0] = mystrdup(r.c_str()); return 1; } else if (check_xml_par(q, "type=", "stem")) { - if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) - return stem(slst, cw); + std::string cw = get_xml_par(strchr(q2, '>')); + if (!cw.empty()) + return stem(slst, cw.c_str()); } else if (check_xml_par(q, "type=", "generate")) { - int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1); - if (n == 0) + std::string cw = get_xml_par(strchr(q2, '>')); + if (cw.empty()) return 0; - char* q3 = strstr(q2 + 1, "<word"); + const char* q3 = strstr(q2 + 1, "<word"); if (q3) { - if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) { - return generate(slst, cw, cw2); + std::string cw2 = get_xml_par(strchr(q3, '>')); + if (!cw2.empty()) { + return generate(slst, cw.c_str(), cw2.c_str()); } } else { if ((q2 = strstr(q2 + 1, "<code")) != NULL) { char** slst2; - if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) { - int n2 = generate(slst, cw, slst2, n); + int n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"); + if (n != 0) { + int n2 = generate(slst, cw.c_str(), slst2, n); freelist(&slst2, n); return uniqlist(*slst, n2); } @@ -2062,182 +1767,6 @@ int Hunspell::spellml(char*** slst, const char* word) { return 0; } -#ifdef HUNSPELL_EXPERIMENTAL -// XXX is UTF-8 support OK? -char* Hunspell::morph_with_correction(const char* word) { - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; - if (!pSMgr || maxdic == 0) - return NULL; - w_char unicw[MAXWORDLEN]; - int nc = strlen(word); - if (utf8) { - if (nc >= MAXWORDUTF8LEN) - return NULL; - } else { - if (nc >= MAXWORDLEN) - return NULL; - } - int captype = 0; - int abbv = 0; - int wl = 0; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - - if (wl == 0) - return NULL; - - char result[MAXLNLEN]; - char* st = NULL; - - *result = '\0'; - - switch (captype) { - case NOCAP: { - st = pSMgr->suggest_morph_for_spelling_error(cw); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - if (abbv) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - } - break; - } - case INITCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - st = pSMgr->suggest_morph_for_spelling_error(cw); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - if (abbv) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - mkallsmall2(wspace, unicw, nc); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - mkinitcap(wspace); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - } - break; - } - case HUHCAP: { - st = pSMgr->suggest_morph_for_spelling_error(cw); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - break; - } - case ALLCAP: { - memcpy(wspace, cw, (wl + 1)); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - mkallsmall2(wspace, unicw, nc); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - mkinitcap(wspace); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - if (abbv) { - memcpy(wspace, cw, (wl + 1)); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - if (*result) - mystrcat(result, "\n", MAXLNLEN); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - mkallsmall2(wspace, unicw, nc); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - mkinitcap(wspace); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - } - break; - } - } - - if (*result) - return mystrdup(result); - return NULL; -} - -#endif // END OF HUNSPELL_EXPERIMENTAL CODE - Hunhandle* Hunspell_create(const char* affpath, const char* dpath) { return (Hunhandle*)(new Hunspell(affpath, dpath)); } @@ -2333,10 +1862,9 @@ int Hunspell::suffix_suggest(char*** slst, const char* root_word) { if (ignoredchars != NULL) { w2.assign(root_word); if (utf8) { - int ignoredchars_utf16_len; - unsigned short* ignoredchars_utf16 = - pAMgr->get_ignore_utf16(&ignoredchars_utf16_len); - remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len); + const std::vector<w_char>& ignoredchars_utf16 = + pAMgr->get_ignore_utf16(); + remove_ignored_chars_utf(w2, ignoredchars_utf16); } else { remove_ignored_chars(w2, ignoredchars); } |