diff options
author | George Hazan <ghazan@miranda.im> | 2022-08-30 17:13:21 +0300 |
---|---|---|
committer | George Hazan <ghazan@miranda.im> | 2022-08-30 17:13:21 +0300 |
commit | 3ad2f2b7c2bfb3166363239d67a6645692ffb2b6 (patch) | |
tree | 0201fd31d0c0e5c193752f7b80cdc69096b563cf /libs/hunspell/src/hunspell.c++ | |
parent | d82b809f6af58a1d10fa503138b912d336dca75e (diff) |
fixes #3183 (Update hunspell to 1.7.1)
Diffstat (limited to 'libs/hunspell/src/hunspell.c++')
-rw-r--r-- | libs/hunspell/src/hunspell.c++ | 1040 |
1 files changed, 636 insertions, 404 deletions
diff --git a/libs/hunspell/src/hunspell.c++ b/libs/hunspell/src/hunspell.c++ index b1535013fe..4afafdadc1 100644 --- a/libs/hunspell/src/hunspell.c++ +++ b/libs/hunspell/src/hunspell.c++ @@ -1,7 +1,7 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * - * Copyright (C) 2002-2017 Németh László + * Copyright (C) 2002-2022 Németh László * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with @@ -71,6 +71,7 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> +#include <time.h> #include "affixmgr.hxx" #include "hunspell.hxx" @@ -86,30 +87,41 @@ class HunspellImpl { public: - HunspellImpl(const char* affpath, const char* dpath, const char* key); + HunspellImpl(const char* affpath, const char* dpath, const char* key = NULL); ~HunspellImpl(); - int add_dic(const char* dpath, const char* key); + int add_dic(const char* dpath, const char* key = NULL); std::vector<std::string> suffix_suggest(const std::string& root_word); std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl); std::vector<std::string> generate(const std::string& word, const std::string& pattern); std::vector<std::string> stem(const std::string& word); std::vector<std::string> stem(const std::vector<std::string>& morph); std::vector<std::string> analyze(const std::string& word); + int get_langnum() const; bool input_conv(const std::string& word, std::string& dest); bool spell(const std::string& word, int* info = NULL, std::string* root = NULL); std::vector<std::string> suggest(const std::string& word); - const std::string& get_wordchars() const; + const std::string& get_wordchars_cpp() const; const std::vector<w_char>& get_wordchars_utf16() const; const std::string& get_dict_encoding() const; int add(const std::string& word); int add_with_affix(const std::string& word, const std::string& example); int remove(const std::string& word); + const std::string& get_version_cpp() const; struct cs_info* get_csconv(); - std::vector<char> dic_encoding_vec; - int get_langnum() const { return langnum; } - const char* get_try_string() const { return pAMgr->get_try_string(); } - const std::string& get_version() const { return pAMgr->get_version(); } + int spell(const char* word, int* info = NULL, char** root = NULL); + int suggest(char*** slst, const char* word); + int suffix_suggest(char*** slst, const char* root_word); + void free_list(char*** slst, int n); + char* get_dic_encoding(); + int analyze(char*** slst, const char* word); + int stem(char*** slst, const char* word); + int stem(char*** slst, char** morph, int n); + int generate(char*** slst, const char* word, const char* word2); + int generate(char*** slst, const char* word, char** desc, int n); + const char* get_wordchars() const; + const char* get_version() const; + int input_conv(const char* word, char* dest, size_t destsize); private: AffixMgr* pAMgr; @@ -124,12 +136,17 @@ private: std::vector<std::string> wordbreak; private: + std::vector<std::string> analyze_internal(const std::string& word); + bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL); + std::vector<std::string> suggest_internal(const std::string& word, + bool& capitalized, size_t& abbreviated, int& captype); void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev); size_t cleanword2(std::string& dest, std::vector<w_char>& dest_u, const std::string& src, int* pcaptype, size_t* pabbrev); + void clean_ignore(std::string& dest, const std::string& src); void mkinitcap(std::string& u8); int mkinitcap2(std::string& u8, std::vector<w_char>& u16); int mkinitsmall2(std::string& u8, std::vector<w_char>& u16); @@ -143,19 +160,15 @@ private: void insert_sug(std::vector<std::string>& slst, const std::string& word); void cat_result(std::string& result, const std::string& st); std::vector<std::string> spellml(const std::string& word); - std::string get_xml_par(const char* par); - const char* get_xml_pos(const char* s, const char* attr); - std::vector<std::string> get_xml_list(const char* list, const char* tag); - int check_xml_par(const char* q, const char* attr, const char* value); + std::string get_xml_par(const std::string& par, std::string::size_type pos); + std::string::size_type get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr); + std::vector<std::string> get_xml_list(const std::string& list, std::string::size_type pos, const char* tag); + int check_xml_par(const std::string& q, std::string::size_type pos, const char* attr, const char* value); private: HunspellImpl(const HunspellImpl&); HunspellImpl& operator=(const HunspellImpl&); }; -Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) - : m_Impl(new HunspellImpl(affpath, dpath, key)) { -} - HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) { csconv = NULL; utf8 = 0; @@ -180,19 +193,12 @@ HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* k complexprefixes = pAMgr->get_complexprefixes(); wordbreak = pAMgr->get_breaktable(); - dic_encoding_vec.resize(encoding.size()+1); - strcpy(&dic_encoding_vec[0], encoding.c_str()); - /* and finally set up the suggestion manager */ pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr); if (try_string) free(try_string); } -Hunspell::~Hunspell() { - delete m_Impl; -} - HunspellImpl::~HunspellImpl() { delete pSMgr; delete pAMgr; @@ -210,11 +216,6 @@ HunspellImpl::~HunspellImpl() { } // load extra dictionaries -int Hunspell::add_dic(const char* dpath, const char* key) { - return m_Impl->add_dic(dpath, key); -} - -// load extra dictionaries int HunspellImpl::add_dic(const char* dpath, const char* key) { if (!affixpath) return 1; @@ -222,6 +223,26 @@ int HunspellImpl::add_dic(const char* dpath, const char* key) { return 0; } + +// make a copy of src at dest while removing all characters +// specified in IGNORE rule +void HunspellImpl::clean_ignore(std::string& dest, + const std::string& src) { + dest.clear(); + dest.assign(src); + const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL; + if (ignoredchars != NULL) { + if (utf8) { + const std::vector<w_char>& ignoredchars_utf16 = + pAMgr->get_ignore_utf16(); + remove_ignored_chars_utf(dest, ignoredchars_utf16); + } else { + remove_ignored_chars(dest, ignoredchars); + } + } +} + + // make a copy of src at destination while removing all leading // blanks and removing any trailing periods after recording // their presence with the abbreviation flag @@ -237,7 +258,11 @@ size_t HunspellImpl::cleanword2(std::string& dest, dest.clear(); dest_utf.clear(); - const char* q = src.c_str(); + // remove IGNORE characters from the string + std::string w2; + clean_ignore(w2, src); + + const char* q = w2.c_str(); // first skip over any leading blanks while (*q == ' ') @@ -409,11 +434,22 @@ void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& slst.insert(slst.begin(), word); } -bool Hunspell::spell(const std::string& word, int* info, std::string* root) { - return m_Impl->spell(word, info, root); +bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) { + bool r = spell_internal(word, info, root); + if (r && root) { + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + std::string wspace; + if (rl->conv(*root, wspace)) { + *root = wspace; + } + } + } + return r; } -bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) { +bool HunspellImpl::spell_internal(const std::string& word, int* info, std::string* root) { struct hentry* rv = NULL; int info2 = 0; @@ -485,7 +521,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) case HUHCAP: /* FALLTHROUGH */ case HUHINITCAP: - *info += SPELL_ORIGCAP; + *info |= SPELL_ORIGCAP; /* FALLTHROUGH */ case NOCAP: rv = checkword(scw, info, root); @@ -496,7 +532,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) } break; case ALLCAP: { - *info += SPELL_ORIGCAP; + *info |= SPELL_ORIGCAP; rv = checkword(scw, info, root); if (rv) break; @@ -563,17 +599,22 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) break; } } + /* FALLTHROUGH */ case INITCAP: { - - *info += SPELL_ORIGCAP; - mkallsmall2(scw, sunicw); - std::string u8buffer(scw); - mkinitcap2(scw, sunicw); + // handle special capitalization of dotted I + bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0); + *info |= SPELL_ORIGCAP; + if (captype == ALLCAP) { + mkallsmall2(scw, sunicw); + mkinitcap2(scw, sunicw); + if (Idot) + scw.replace(0, 1, "\xc4\xb0"); + } if (captype == INITCAP) - *info += SPELL_INITCAP; + *info |= SPELL_INITCAP; rv = checkword(scw, info, root); if (captype == INITCAP) - *info -= SPELL_INITCAP; + *info &= ~SPELL_INITCAP; // forbid bad capitalization // (for example, ijs -> Ijs instead of IJs in Dutch) // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag) @@ -583,9 +624,13 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) } if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL; - if (rv) + if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh)) break; + mkallsmall2(scw, sunicw); + std::string u8buffer(scw); + mkinitcap2(scw, sunicw); + rv = checkword(u8buffer, info, root); if (abbv && !rv) { u8buffer.push_back('.'); @@ -594,10 +639,10 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) u8buffer = scw; u8buffer.push_back('.'); if (captype == INITCAP) - *info += SPELL_INITCAP; + *info |= SPELL_INITCAP; rv = checkword(u8buffer, info, root); if (captype == INITCAP) - *info -= SPELL_INITCAP; + *info &= ~SPELL_INITCAP; if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL; break; @@ -618,7 +663,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) if (rv) { if (pAMgr && pAMgr->get_warn() && rv->astr && TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) { - *info += SPELL_WARN; + *info |= SPELL_WARN; if (pAMgr->get_forbidwarn()) return false; return true; @@ -627,7 +672,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) } // recursive breaking at break points - if (!wordbreak.empty()) { + if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN)) { int nbr = 0; wl = scw.size(); @@ -668,6 +713,37 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) size_t plen = wordbreak[j].size(); size_t found = scw.find(wordbreak[j]); if ((found > 0) && (found < wl - plen)) { + size_t found2 = scw.find(wordbreak[j], found + 1); + // try to break at the second occurance + // to recognize dictionary words with wordbreak + if (found2 > 0 && (found2 < wl - plen)) + found = found2; + if (!spell(scw.substr(found + plen))) + continue; + std::string suffix(scw.substr(found)); + scw.resize(found); + // examine 2 sides of the break point + if (spell(scw)) + return true; + scw.append(suffix); + + // LANG_hu: spec. dash rule + if (langnum == LANG_hu && wordbreak[j] == "-") { + suffix = scw.substr(found + 1); + scw.resize(found + 1); + if (spell(scw)) + return true; // check the first part with dash + scw.append(suffix); + } + // end of LANG specific region + } + } + + // other patterns (break at first break point) + for (size_t j = 0; j < wordbreak.size(); ++j) { + size_t plen = wordbreak[j].size(); + size_t found = scw.find(wordbreak[j]); + if ((found > 0) && (found < wl - plen)) { if (!spell(scw.substr(found + plen))) continue; std::string suffix(scw.substr(found)); @@ -694,47 +770,28 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) } struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) { - bool usebuffer = false; std::string w2; const char* word; int len; - const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL; - if (ignoredchars != NULL) { - w2.assign(w); - if (utf8) { - const std::vector<w_char>& ignoredchars_utf16 = - pAMgr->get_ignore_utf16(); - remove_ignored_chars_utf(w2, ignoredchars_utf16); - } else { - remove_ignored_chars(w2, ignoredchars); - } - word = w2.c_str(); - len = w2.size(); - usebuffer = true; - } else { - word = w.c_str(); - len = w.size(); - } + // remove IGNORE characters from the string + clean_ignore(w2, w); + + word = w2.c_str(); + len = w2.size(); if (!len) return NULL; // word reversing wrapper for complex prefixes if (complexprefixes) { - if (!usebuffer) { - w2.assign(word); - usebuffer = true; - } if (utf8) reverseword_utf(w2); else reverseword(w2); } - if (usebuffer) { - word = w2.c_str(); - } + word = w2.c_str(); // look word in hash table struct hentry* he = NULL; @@ -745,13 +802,13 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { if (info) - *info += SPELL_FORBIDDEN; + *info |= SPELL_FORBIDDEN; // LANG_hu section: set dash information for suggestions if (langnum == LANG_hu) { if (pAMgr->get_compoundflag() && TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) { if (info) - *info += SPELL_COMPOUND; + *info |= SPELL_COMPOUND; } } return NULL; @@ -786,7 +843,7 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { if (info) - *info += SPELL_FORBIDDEN; + *info |= SPELL_FORBIDDEN; return NULL; } if (root) { @@ -819,7 +876,7 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str } } if (info) - *info += SPELL_COMPOUND; + *info |= SPELL_COMPOUND; } } } @@ -827,11 +884,103 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str return he; } -std::vector<std::string> Hunspell::suggest(const std::string& word) { - return m_Impl->suggest(word); +std::vector<std::string> HunspellImpl::suggest(const std::string& word) { + bool capwords; + size_t abbv; + int captype; + std::vector<std::string> slst = suggest_internal(word, capwords, abbv, captype); + // word reversing wrapper for complex prefixes + if (complexprefixes) { + for (size_t j = 0; j < slst.size(); ++j) { + if (utf8) + reverseword_utf(slst[j]); + else + reverseword(slst[j]); + } + } + + // capitalize + if (capwords) + for (size_t j = 0; j < slst.size(); ++j) { + mkinitcap(slst[j]); + } + + // expand suggestions with dot(s) + if (abbv && pAMgr && pAMgr->get_sugswithdots()) { + for (size_t j = 0; j < slst.size(); ++j) { + slst[j].append(word.substr(word.size() - abbv)); + } + } + + // remove bad capitalized and forbidden forms + if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) { + switch (captype) { + case INITCAP: + case ALLCAP: { + size_t l = 0; + for (size_t j = 0; j < slst.size(); ++j) { + if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) { + std::string s; + std::vector<w_char> w; + if (utf8) { + u8_u16(w, slst[j]); + } else { + s = slst[j]; + } + mkallsmall2(s, w); + if (spell(s)) { + slst[l] = s; + ++l; + } else { + mkinitcap2(s, w); + if (spell(s)) { + slst[l] = s; + ++l; + } + } + } else { + slst[l] = slst[j]; + ++l; + } + } + slst.resize(l); + } + } + } + + // remove duplications + size_t l = 0; + for (size_t j = 0; j < slst.size(); ++j) { + slst[l] = slst[j]; + for (size_t k = 0; k < l; ++k) { + if (slst[k] == slst[j]) { + --l; + break; + } + } + ++l; + } + slst.resize(l); + + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + for (size_t i = 0; rl && i < slst.size(); ++i) { + std::string wspace; + if (rl->conv(slst[i], wspace)) { + slst[i] = wspace; + } + } + } + return slst; } -std::vector<std::string> HunspellImpl::suggest(const std::string& word) { +std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word, + bool& capwords, size_t& abbv, int& captype) { + captype = NOCAP; + abbv = 0; + capwords = false; + std::vector<std::string> slst; int onlycmpdsug = 0; @@ -849,8 +998,6 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { if (word.size() >= MAXWORDLEN) return slst; } - int captype = NOCAP; - size_t abbv = 0; size_t wl = 0; std::string scw; @@ -871,7 +1018,11 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { return slst; } - int capwords = 0; + bool good = false; + + clock_t timelimit; + // initialize in every suggestion call + timelimit = clock(); // check capitalized form for FORCEUCASE if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) { @@ -886,22 +1037,38 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { switch (captype) { case NOCAP: { - pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + if (abbv) { + std::string wspace(scw); + wspace.push_back('.'); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + } break; } case INITCAP: { - capwords = 1; - pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + capwords = true; + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; std::string wspace(scw); mkallsmall2(wspace, sunicw); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; break; } case HUHINITCAP: - capwords = 1; + capwords = true; + /* FALLTHROUGH */ case HUHCAP: { - pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; // something.The -> something. The size_t dot_pos = scw.find('.'); if (dot_pos != std::string::npos) { @@ -927,19 +1094,25 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { // TheOpenOffice.org -> The OpenOffice.org wspace = scw; mkinitsmall2(wspace, sunicw); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; } wspace = scw; mkallsmall2(wspace, sunicw); if (spell(wspace.c_str())) insert_sug(slst, wspace); size_t prevns = slst.size(); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; if (captype == HUHINITCAP) { mkinitcap2(wspace, sunicw); if (spell(wspace.c_str())) insert_sug(slst, wspace); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; } // aNew -> "a New" (instead of "a new") for (size_t j = prevns; j < slst.size(); ++j) { @@ -966,11 +1139,15 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { case ALLCAP: { std::string wspace(scw); mkallsmall2(wspace, sunicw); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str())) insert_sug(slst, wspace); mkinitcap2(wspace, sunicw); - pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; for (size_t j = 0; j < slst.size(); ++j) { mkallcap(slst[j]); if (pAMgr && pAMgr->get_checksharps()) { @@ -1002,34 +1179,43 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { } } // END OF LANG_hu section - - // try ngram approach since found nothing or only compound words - if (pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) { + // try ngram approach since found nothing good suggestion + if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) { switch (captype) { case NOCAP: { - pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs); + pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; break; } + /* FALLTHROUGH */ case HUHINITCAP: - capwords = 1; + capwords = true; + /* FALLTHROUGH */ case HUHCAP: { std::string wspace(scw); mkallsmall2(wspace, sunicw); - pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; break; } case INITCAP: { - capwords = 1; + capwords = true; std::string wspace(scw); mkallsmall2(wspace, sunicw); - pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; break; } case ALLCAP: { std::string wspace(scw); mkallsmall2(wspace, sunicw); size_t oldns = slst.size(); - pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; for (size_t j = oldns; j < slst.size(); ++j) { mkallcap(slst[j]); } @@ -1039,6 +1225,11 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { } // try dash suggestion (Afo-American -> Afro-American) + // Note: LibreOffice was modified to treat dashes as word + // characters to check "scot-free" etc. word forms, but + // we need to handle suggestions for "Afo-American", etc., + // while "Afro-American" is missing from the dictionary. + // TODO avoid possible overgeneration size_t dash_pos = scw.find('-'); if (dash_pos != std::string::npos) { int nodashsug = 1; @@ -1050,12 +1241,14 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { size_t prev_pos = 0; bool last = false; - while (nodashsug && !last) { + while (!good && nodashsug && !last) { if (dash_pos == scw.size()) last = 1; std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos); if (!spell(chunk.c_str())) { std::vector<std::string> nlst = suggest(chunk.c_str()); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) { std::string wspace = scw.substr(0, prev_pos); wspace.append(*j); @@ -1063,7 +1256,11 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { wspace.append("-"); wspace.append(scw.substr(dash_pos + 1)); } - insert_sug(slst, wspace); + int info = 0; + if (pAMgr && pAMgr->get_forbiddenword()) + checkword(wspace, &info, NULL); + if (!(info & SPELL_FORBIDDEN)) + insert_sug(slst, wspace); } nodashsug = 0; } @@ -1075,104 +1272,13 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { dash_pos = scw.size(); } } - - // word reversing wrapper for complex prefixes - if (complexprefixes) { - for (size_t j = 0; j < slst.size(); ++j) { - if (utf8) - reverseword_utf(slst[j]); - else - reverseword(slst[j]); - } - } - - // capitalize - if (capwords) - for (size_t j = 0; j < slst.size(); ++j) { - mkinitcap(slst[j]); - } - - // expand suggestions with dot(s) - if (abbv && pAMgr && pAMgr->get_sugswithdots()) { - for (size_t j = 0; j < slst.size(); ++j) { - slst[j].append(word.substr(word.size() - abbv)); - } - } - - // remove bad capitalized and forbidden forms - if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) { - switch (captype) { - case INITCAP: - case ALLCAP: { - size_t l = 0; - for (size_t j = 0; j < slst.size(); ++j) { - if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) { - std::string s; - std::vector<w_char> w; - if (utf8) { - u8_u16(w, slst[j]); - } else { - s = slst[j]; - } - mkallsmall2(s, w); - if (spell(s)) { - slst[l] = s; - ++l; - } else { - mkinitcap2(s, w); - if (spell(s)) { - slst[l] = s; - ++l; - } - } - } else { - slst[l] = slst[j]; - ++l; - } - } - slst.resize(l); - } - } - } - - // remove duplications - size_t l = 0; - for (size_t j = 0; j < slst.size(); ++j) { - slst[l] = slst[j]; - for (size_t k = 0; k < l; ++k) { - if (slst[k] == slst[j]) { - --l; - break; - } - } - ++l; - } - slst.resize(l); - - // output conversion - rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; - for (size_t j = 0; rl && j < slst.size(); ++j) { - std::string wspace; - if (rl->conv(slst[j], wspace)) { - slst[j] = wspace; - } - } - return slst; } -const std::string& Hunspell::get_dict_encoding() const { - return m_Impl->get_dict_encoding(); -} - const std::string& HunspellImpl::get_dict_encoding() const { return encoding; } -std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) { - return m_Impl->stem(desc); -} - std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) { std::vector<std::string> slst; @@ -1241,30 +1347,14 @@ std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc return slst; } -std::vector<std::string> Hunspell::stem(const std::string& word) { - return m_Impl->stem(word); -} - std::vector<std::string> HunspellImpl::stem(const std::string& word) { return stem(analyze(word)); } -const char* Hunspell::get_wordchars() const { - return m_Impl->get_wordchars().c_str(); -} - -const std::string& Hunspell::get_wordchars_cpp() const { - return m_Impl->get_wordchars(); -} - -const std::string& HunspellImpl::get_wordchars() const { +const std::string& HunspellImpl::get_wordchars_cpp() const { return pAMgr->get_wordchars(); } -const std::vector<w_char>& Hunspell::get_wordchars_utf16() const { - return m_Impl->get_wordchars_utf16(); -} - const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const { return pAMgr->get_wordchars_utf16(); } @@ -1300,56 +1390,32 @@ int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) { return u8.size(); } -int Hunspell::add(const std::string& word) { - return m_Impl->add(word); -} - int HunspellImpl::add(const std::string& word) { if (!m_HMgrs.empty()) return m_HMgrs[0]->add(word); return 0; } -int Hunspell::add_with_affix(const std::string& word, const std::string& example) { - return m_Impl->add_with_affix(word, example); -} - int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) { if (!m_HMgrs.empty()) return m_HMgrs[0]->add_with_affix(word, example); return 0; } -int Hunspell::remove(const std::string& word) { - return m_Impl->remove(word); -} - int HunspellImpl::remove(const std::string& word) { if (!m_HMgrs.empty()) return m_HMgrs[0]->remove(word); return 0; } -const char* Hunspell::get_version() const { - return m_Impl->get_version().c_str(); -} - -const std::string& Hunspell::get_version_cpp() const { - return m_Impl->get_version(); -} - -const char* Hunspell::get_try_string() const { - return m_Impl->get_try_string(); +const std::string& HunspellImpl::get_version_cpp() const { + return pAMgr->get_version(); } struct cs_info* HunspellImpl::get_csconv() { return csconv; } -struct cs_info* Hunspell::get_csconv() { - return m_Impl->get_csconv(); -} - void HunspellImpl::cat_result(std::string& result, const std::string& st) { if (!st.empty()) { if (!result.empty()) @@ -1358,11 +1424,22 @@ void HunspellImpl::cat_result(std::string& result, const std::string& st) { } } -std::vector<std::string> Hunspell::analyze(const std::string& word) { - return m_Impl->analyze(word); +std::vector<std::string> HunspellImpl::analyze(const std::string& word) { + std::vector<std::string> slst = analyze_internal(word); + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + for (size_t i = 0; rl && i < slst.size(); ++i) { + std::string wspace; + if (rl->conv(slst[i], wspace)) { + slst[i] = wspace; + } + } + } + return slst; } -std::vector<std::string> HunspellImpl::analyze(const std::string& word) { +std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) { std::vector<std::string> slst; if (!pSMgr || m_HMgrs.empty()) return slst; @@ -1595,10 +1672,6 @@ std::vector<std::string> HunspellImpl::analyze(const std::string& word) { return slst; } -std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) { - return m_Impl->generate(word, pl); -} - std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) { std::vector<std::string> slst; if (!pSMgr || pl.empty()) @@ -1643,10 +1716,6 @@ std::vector<std::string> HunspellImpl::generate(const std::string& word, const s return slst; } -std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) { - return m_Impl->generate(word, pattern); -} - std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) { std::vector<std::string> pl = analyze(pattern); std::vector<std::string> slst = generate(word, pl); @@ -1655,10 +1724,11 @@ std::vector<std::string> HunspellImpl::generate(const std::string& word, const s } // minimal XML parser functions -std::string HunspellImpl::get_xml_par(const char* par) { +std::string HunspellImpl::get_xml_par(const std::string& in_par, std::string::size_type pos) { std::string dest; - if (!par) + if (pos == std::string::npos) return dest; + const char* par = in_par.c_str() + pos; char end = *par; if (end == '>') end = '<'; @@ -1672,22 +1742,8 @@ std::string HunspellImpl::get_xml_par(const char* par) { return dest; } -int Hunspell::get_langnum() const { - return m_Impl->get_langnum(); -} - -bool Hunspell::input_conv(const std::string& word, std::string& dest) { - return m_Impl->input_conv(word, dest); -} - -int Hunspell::input_conv(const char* word, char* dest, size_t destsize) { - std::string d; - bool ret = input_conv(word, d); - if (ret && d.size() < destsize) { - strncpy(dest, d.c_str(), destsize); - return 1; - } - return 0; +int HunspellImpl::get_langnum() const { + return langnum; } bool HunspellImpl::input_conv(const std::string& word, std::string& dest) { @@ -1700,42 +1756,47 @@ bool HunspellImpl::input_conv(const std::string& word, std::string& dest) { } // return the beginning of the element (attr == NULL) or the attribute -const char* HunspellImpl::get_xml_pos(const char* s, const char* attr) { - const char* end = strchr(s, '>'); +std::string::size_type HunspellImpl::get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr) { + if (pos == std::string::npos) + return std::string::npos; + + std::string::size_type endpos = s.find('>', pos); if (attr == NULL) - return end; - const char* p = s; - while (1) { - p = strstr(p, attr); - if (!p || p >= end) - return 0; - if (*(p - 1) == ' ' || *(p - 1) == '\n') + return endpos; + while (true) { + pos = s.find(attr, pos); + if (pos == std::string::npos || pos >= endpos) + return std::string::npos; + if (s[pos - 1] == ' ' || s[pos - 1] == '\n') break; - p += strlen(attr); + pos += strlen(attr); } - return p + strlen(attr); + return pos + strlen(attr); } -int HunspellImpl::check_xml_par(const char* q, - const char* attr, - const char* value) { - std::string cw = get_xml_par(get_xml_pos(q, attr)); +int HunspellImpl::check_xml_par(const std::string& q, std::string::size_type pos, + const char* attr, + const char* value) { + std::string cw = get_xml_par(q, get_xml_pos(q, pos, attr)); if (cw == value) return 1; return 0; } -std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char* tag) { +std::vector<std::string> HunspellImpl::get_xml_list(const std::string& list, std::string::size_type pos, const char* tag) { std::vector<std::string> slst; - if (!list) + if (pos == std::string::npos) return slst; - const char* p = list; - for (size_t n = 0; ((p = strstr(p, tag)) != NULL); ++p, ++n) { - std::string cw = get_xml_par(p + strlen(tag) - 1); + while (true) { + pos = list.find(tag, pos); + if (pos == std::string::npos) + break; + std::string cw = get_xml_par(list, pos + strlen(tag) - 1); if (cw.empty()) { break; } slst.push_back(cw); + ++pos; } return slst; } @@ -1743,19 +1804,20 @@ std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) { std::vector<std::string> slst; - const char* word = in_word.c_str(); - - const char* q = strstr(word, "<query"); - if (!q) + std::string::size_type qpos = in_word.find("<query"); + if (qpos == std::string::npos) return slst; // bad XML input - const char* q2 = strchr(q, '>'); - if (!q2) + + std::string::size_type q2pos = in_word.find('>', qpos); + if (q2pos == std::string::npos) return slst; // bad XML input - q2 = strstr(q2, "<word"); - if (!q2) + + q2pos = in_word.find("<word", q2pos); + if (q2pos == std::string::npos) return slst; // bad XML input - if (check_xml_par(q, "type=", "analyze")) { - std::string cw = get_xml_par(strchr(q2, '>')); + + if (check_xml_par(in_word, qpos, "type=", "analyze")) { + std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); if (!cw.empty()) slst = analyze(cw); if (slst.empty()) @@ -1778,23 +1840,24 @@ std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) { slst.clear(); slst.push_back(r); return slst; - } else if (check_xml_par(q, "type=", "stem")) { - std::string cw = get_xml_par(strchr(q2, '>')); + } else if (check_xml_par(in_word, qpos, "type=", "stem")) { + std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); if (!cw.empty()) return stem(cw); - } else if (check_xml_par(q, "type=", "generate")) { - std::string cw = get_xml_par(strchr(q2, '>')); + } else if (check_xml_par(in_word, qpos, "type=", "generate")) { + std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); if (cw.empty()) return slst; - const char* q3 = strstr(q2 + 1, "<word"); - if (q3) { - std::string cw2 = get_xml_par(strchr(q3, '>')); + std::string::size_type q3pos = in_word.find("<word", q2pos + 1); + if (q3pos != std::string::npos) { + std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos)); if (!cw2.empty()) { return generate(cw, cw2); } } else { - if ((q2 = strstr(q2 + 1, "<code")) != NULL) { - std::vector<std::string> slst2 = get_xml_list(strchr(q2, '>'), "<a>"); + q2pos = in_word.find("<code", q2pos + 1); + if (q2pos != std::string::npos) { + std::vector<std::string> slst2 = get_xml_list(in_word, in_word.find('>', q2pos), "<a>"); if (!slst2.empty()) { slst = generate(cw, slst2); uniqlist(slst); @@ -1802,21 +1865,57 @@ std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) { } } } + } else if (check_xml_par(in_word, qpos, "type=", "add")) { + std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); + if (cw.empty()) + return slst; + std::string::size_type q3pos = in_word.find("<word", q2pos + 1); + if (q3pos != std::string::npos) { + std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos)); + if (!cw2.empty()) { + add_with_affix(cw, cw2); + } else { + add(cw); + } + } else { + add(cw); + } } return slst; } -int Hunspell::spell(const char* word, int* info, char** root) { - std::string sroot; - bool ret = m_Impl->spell(word, info, root ? &sroot : NULL); - if (root) { - if (sroot.empty()) { - *root = NULL; +std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) { + std::vector<std::string> slst; + struct hentry* he = NULL; + int len; + std::string w2; + const char* word; + const char* ignoredchars = pAMgr->get_ignore(); + if (ignoredchars != NULL) { + w2.assign(root_word); + if (utf8) { + const std::vector<w_char>& ignoredchars_utf16 = + pAMgr->get_ignore_utf16(); + remove_ignored_chars_utf(w2, ignoredchars_utf16); } else { - *root = mystrdup(sroot.c_str()); + remove_ignored_chars(w2, ignoredchars); } + word = w2.c_str(); + } else + word = root_word.c_str(); + + len = strlen(word); + + if (!len) + return slst; + + for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) { + he = m_HMgrs[i]->lookup(word); } - return ret; + if (he) { + slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str()); + } + return slst; } namespace { @@ -1835,113 +1934,289 @@ namespace { } } -void Hunspell::free_list(char*** slst, int n) { - Hunspell_free_list((Hunhandle*)(this), slst, n); +int HunspellImpl::spell(const char* word, int* info, char** root) { + std::string sroot; + bool ret = spell(word, info, root ? &sroot : NULL); + if (root) { + if (sroot.empty()) { + *root = NULL; + } else { + *root = mystrdup(sroot.c_str()); + } + } + return ret; +} + +int HunspellImpl::suggest(char*** slst, const char* word) { + std::vector<std::string> suggests = suggest(word); + return munge_vector(slst, suggests); +} + +int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) { + std::vector<std::string> stems = suffix_suggest(root_word); + return munge_vector(slst, stems); +} + +void HunspellImpl::free_list(char*** slst, int n) { + if (slst && *slst) { + for (int i = 0; i < n; i++) + free((*slst)[i]); + free(*slst); + *slst = NULL; + } +} + +char* HunspellImpl::get_dic_encoding() { + return &encoding[0]; +} + +int HunspellImpl::analyze(char*** slst, const char* word) { + std::vector<std::string> stems = analyze(word); + return munge_vector(slst, stems); +} + +int HunspellImpl::stem(char*** slst, const char* word) { + std::vector<std::string> stems = stem(word); + return munge_vector(slst, stems); +} + +int HunspellImpl::stem(char*** slst, char** desc, int n) { + std::vector<std::string> morph; + morph.reserve(n); + for (int i = 0; i < n; ++i) + morph.push_back(desc[i]); + + std::vector<std::string> stems = stem(morph); + return munge_vector(slst, stems); +} + +int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) { + std::vector<std::string> stems = generate(word, pattern); + return munge_vector(slst, stems); +} + +int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) { + std::vector<std::string> morph; + morph.reserve(pln); + for (int i = 0; i < pln; ++i) + morph.push_back(pl[i]); + + std::vector<std::string> stems = generate(word, morph); + return munge_vector(slst, stems); +} + +const char* HunspellImpl::get_wordchars() const { + return get_wordchars_cpp().c_str(); +} + +const char* HunspellImpl::get_version() const { + return get_version_cpp().c_str(); +} + +int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) { + std::string d; + bool ret = input_conv(word, d); + if (ret && d.size() < destsize) { + strncpy(dest, d.c_str(), destsize); + return 1; + } + return 0; +} + +Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) + : m_Impl(new HunspellImpl(affpath, dpath, key)) { +} + +Hunspell::~Hunspell() { + delete m_Impl; +} + +// load extra dictionaries +int Hunspell::add_dic(const char* dpath, const char* key) { + return m_Impl->add_dic(dpath, key); +} + +bool Hunspell::spell(const std::string& word, int* info, std::string* root) { + return m_Impl->spell(word, info, root); +} + +std::vector<std::string> Hunspell::suggest(const std::string& word) { + return m_Impl->suggest(word); +} + +std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) { + return m_Impl->suffix_suggest(root_word); +} + +const std::string& Hunspell::get_dict_encoding() const { + return m_Impl->get_dict_encoding(); +} + +std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) { + return m_Impl->stem(desc); +} + +std::vector<std::string> Hunspell::stem(const std::string& word) { + return m_Impl->stem(word); +} + +const std::string& Hunspell::get_wordchars_cpp() const { + return m_Impl->get_wordchars_cpp(); +} + +const std::vector<w_char>& Hunspell::get_wordchars_utf16() const { + return m_Impl->get_wordchars_utf16(); +} + +int Hunspell::add(const std::string& word) { + return m_Impl->add(word); +} + +int Hunspell::add_with_affix(const std::string& word, const std::string& example) { + return m_Impl->add_with_affix(word, example); +} + +int Hunspell::remove(const std::string& word) { + return m_Impl->remove(word); +} + +const std::string& Hunspell::get_version_cpp() const { + return m_Impl->get_version_cpp(); +} + +struct cs_info* Hunspell::get_csconv() { + return m_Impl->get_csconv(); +} + +std::vector<std::string> Hunspell::analyze(const std::string& word) { + return m_Impl->analyze(word); +} + +std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) { + return m_Impl->generate(word, pl); +} + +std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) { + return m_Impl->generate(word, pattern); +} + +int Hunspell::get_langnum() const { + return m_Impl->get_langnum(); +} + +bool Hunspell::input_conv(const std::string& word, std::string& dest) { + return m_Impl->input_conv(word, dest); +} + +int Hunspell::spell(const char* word, int* info, char** root) { + return m_Impl->spell(word, info, root); } int Hunspell::suggest(char*** slst, const char* word) { - return Hunspell_suggest((Hunhandle*)(this), slst, word); + return m_Impl->suggest(slst, word); } int Hunspell::suffix_suggest(char*** slst, const char* root_word) { - std::vector<std::string> stems = m_Impl->suffix_suggest(root_word); - return munge_vector(slst, stems); + return m_Impl->suffix_suggest(slst, root_word); +} + +void Hunspell::free_list(char*** slst, int n) { + m_Impl->free_list(slst, n); } char* Hunspell::get_dic_encoding() { - return &(m_Impl->dic_encoding_vec[0]); + return m_Impl->get_dic_encoding(); } -int Hunspell::stem(char*** slst, char** desc, int n) { - return Hunspell_stem2((Hunhandle*)(this), slst, desc, n); +int Hunspell::analyze(char*** slst, const char* word) { + return m_Impl->analyze(slst, word); } int Hunspell::stem(char*** slst, const char* word) { - return Hunspell_stem((Hunhandle*)(this), slst, word); + return m_Impl->stem(slst, word); } -int Hunspell::analyze(char*** slst, const char* word) { - return Hunspell_analyze((Hunhandle*)(this), slst, word); +int Hunspell::stem(char*** slst, char** desc, int n) { + return m_Impl->stem(slst, desc, n); +} + +int Hunspell::generate(char*** slst, const char* word, const char* pattern) { + return m_Impl->generate(slst, word, pattern); } int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) { - return Hunspell_generate2((Hunhandle*)(this), slst, word, pl, pln); + return m_Impl->generate(slst, word, pl, pln); } -int Hunspell::generate(char*** slst, const char* word, const char* pattern) { - return Hunspell_generate((Hunhandle*)(this), slst, word, pattern); +const char* Hunspell::get_wordchars() const { + return m_Impl->get_wordchars(); +} + +const char* Hunspell::get_version() const { + return m_Impl->get_version(); +} + +int Hunspell::input_conv(const char* word, char* dest, size_t destsize) { + return m_Impl->input_conv(word, dest, destsize); } Hunhandle* Hunspell_create(const char* affpath, const char* dpath) { - return (Hunhandle*)(new Hunspell(affpath, dpath)); + return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath)); } Hunhandle* Hunspell_create_key(const char* affpath, const char* dpath, const char* key) { - return reinterpret_cast<Hunhandle*>(new Hunspell(affpath, dpath, key)); + return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath, key)); } void Hunspell_destroy(Hunhandle* pHunspell) { - delete reinterpret_cast<Hunspell*>(pHunspell); + delete reinterpret_cast<HunspellImpl*>(pHunspell); } int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) { - return reinterpret_cast<Hunspell*>(pHunspell)->add_dic(dpath); + return reinterpret_cast<HunspellImpl*>(pHunspell)->add_dic(dpath); } int Hunspell_spell(Hunhandle* pHunspell, const char* word) { - return reinterpret_cast<Hunspell*>(pHunspell)->spell(std::string(word)); + return reinterpret_cast<HunspellImpl*>(pHunspell)->spell(word); } char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) { - return reinterpret_cast<Hunspell*>(pHunspell)->get_dic_encoding(); + return reinterpret_cast<HunspellImpl*>(pHunspell)->get_dic_encoding(); } int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) { - std::vector<std::string> suggests = reinterpret_cast<Hunspell*>(pHunspell)->suggest(word); - return munge_vector(slst, suggests); + return reinterpret_cast<HunspellImpl*>(pHunspell)->suggest(slst, word); } int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) { - std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->analyze(word); - return munge_vector(slst, stems); + return reinterpret_cast<HunspellImpl*>(pHunspell)->analyze(slst, word); } int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) { - - std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(word); - return munge_vector(slst, stems); + return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, word); } int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) { - std::vector<std::string> morph; - for (int i = 0; i < n; ++i) - morph.push_back(desc[i]); - - std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(morph); - return munge_vector(slst, stems); + return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, desc, n); } int Hunspell_generate(Hunhandle* pHunspell, char*** slst, const char* word, - const char* pattern) { - std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, pattern); - return munge_vector(slst, stems); + const char* pattern) +{ + return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, pattern); } int Hunspell_generate2(Hunhandle* pHunspell, char*** slst, const char* word, char** desc, - int n) { - std::vector<std::string> morph; - for (int i = 0; i < n; ++i) - morph.push_back(desc[i]); - - std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, morph); - return munge_vector(slst, stems); + int n) +{ + return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, desc, n); } /* functions for run-time modification of the dictionary */ @@ -1949,7 +2224,7 @@ int Hunspell_generate2(Hunhandle* pHunspell, /* add word to the run-time dictionary */ int Hunspell_add(Hunhandle* pHunspell, const char* word) { - return reinterpret_cast<Hunspell*>(pHunspell)->add(word); + return reinterpret_cast<HunspellImpl*>(pHunspell)->add(word); } /* add word to the run-time dictionary with affix flags of @@ -1960,58 +2235,15 @@ int Hunspell_add(Hunhandle* pHunspell, const char* word) { int Hunspell_add_with_affix(Hunhandle* pHunspell, const char* word, const char* example) { - return reinterpret_cast<Hunspell*>(pHunspell)->add_with_affix(word, example); + return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_affix(word, example); } /* remove word from the run-time dictionary */ int Hunspell_remove(Hunhandle* pHunspell, const char* word) { - return reinterpret_cast<Hunspell*>(pHunspell)->remove(word); + return reinterpret_cast<HunspellImpl*>(pHunspell)->remove(word); } -void Hunspell_free_list(Hunhandle*, char*** list, int n) { - if (list && *list) { - for (int i = 0; i < n; i++) - free((*list)[i]); - free(*list); - *list = NULL; - } -} - -std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) { - return m_Impl->suffix_suggest(root_word); -} - -std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) { - std::vector<std::string> slst; - struct hentry* he = NULL; - int len; - std::string w2; - const char* word; - const char* ignoredchars = pAMgr->get_ignore(); - if (ignoredchars != NULL) { - w2.assign(root_word); - if (utf8) { - const std::vector<w_char>& ignoredchars_utf16 = - pAMgr->get_ignore_utf16(); - remove_ignored_chars_utf(w2, ignoredchars_utf16); - } else { - remove_ignored_chars(w2, ignoredchars); - } - word = w2.c_str(); - } else - word = root_word.c_str(); - - len = strlen(word); - - if (!len) - return slst; - - for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) { - he = m_HMgrs[i]->lookup(word); - } - if (he) { - slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str()); - } - return slst; +void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) { + reinterpret_cast<HunspellImpl*>(pHunspell)->free_list(list, n); } |