summaryrefslogtreecommitdiff
path: root/libs/hunspell/src/hunspell.c++
diff options
context:
space:
mode:
authorGeorge Hazan <ghazan@miranda.im>2022-08-30 17:13:21 +0300
committerGeorge Hazan <ghazan@miranda.im>2022-08-30 17:13:21 +0300
commit3ad2f2b7c2bfb3166363239d67a6645692ffb2b6 (patch)
tree0201fd31d0c0e5c193752f7b80cdc69096b563cf /libs/hunspell/src/hunspell.c++
parentd82b809f6af58a1d10fa503138b912d336dca75e (diff)
fixes #3183 (Update hunspell to 1.7.1)
Diffstat (limited to 'libs/hunspell/src/hunspell.c++')
-rw-r--r--libs/hunspell/src/hunspell.c++1040
1 files changed, 636 insertions, 404 deletions
diff --git a/libs/hunspell/src/hunspell.c++ b/libs/hunspell/src/hunspell.c++
index b1535013fe..4afafdadc1 100644
--- a/libs/hunspell/src/hunspell.c++
+++ b/libs/hunspell/src/hunspell.c++
@@ -1,7 +1,7 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
- * Copyright (C) 2002-2017 Németh László
+ * Copyright (C) 2002-2022 Németh László
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
@@ -71,6 +71,7 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+#include <time.h>
#include "affixmgr.hxx"
#include "hunspell.hxx"
@@ -86,30 +87,41 @@
class HunspellImpl
{
public:
- HunspellImpl(const char* affpath, const char* dpath, const char* key);
+ HunspellImpl(const char* affpath, const char* dpath, const char* key = NULL);
~HunspellImpl();
- int add_dic(const char* dpath, const char* key);
+ int add_dic(const char* dpath, const char* key = NULL);
std::vector<std::string> suffix_suggest(const std::string& root_word);
std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
std::vector<std::string> generate(const std::string& word, const std::string& pattern);
std::vector<std::string> stem(const std::string& word);
std::vector<std::string> stem(const std::vector<std::string>& morph);
std::vector<std::string> analyze(const std::string& word);
+ int get_langnum() const;
bool input_conv(const std::string& word, std::string& dest);
bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
std::vector<std::string> suggest(const std::string& word);
- const std::string& get_wordchars() const;
+ const std::string& get_wordchars_cpp() const;
const std::vector<w_char>& get_wordchars_utf16() const;
const std::string& get_dict_encoding() const;
int add(const std::string& word);
int add_with_affix(const std::string& word, const std::string& example);
int remove(const std::string& word);
+ const std::string& get_version_cpp() const;
struct cs_info* get_csconv();
- std::vector<char> dic_encoding_vec;
- int get_langnum() const { return langnum; }
- const char* get_try_string() const { return pAMgr->get_try_string(); }
- const std::string& get_version() const { return pAMgr->get_version(); }
+ int spell(const char* word, int* info = NULL, char** root = NULL);
+ int suggest(char*** slst, const char* word);
+ int suffix_suggest(char*** slst, const char* root_word);
+ void free_list(char*** slst, int n);
+ char* get_dic_encoding();
+ int analyze(char*** slst, const char* word);
+ int stem(char*** slst, const char* word);
+ int stem(char*** slst, char** morph, int n);
+ int generate(char*** slst, const char* word, const char* word2);
+ int generate(char*** slst, const char* word, char** desc, int n);
+ const char* get_wordchars() const;
+ const char* get_version() const;
+ int input_conv(const char* word, char* dest, size_t destsize);
private:
AffixMgr* pAMgr;
@@ -124,12 +136,17 @@ private:
std::vector<std::string> wordbreak;
private:
+ std::vector<std::string> analyze_internal(const std::string& word);
+ bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL);
+ std::vector<std::string> suggest_internal(const std::string& word,
+ bool& capitalized, size_t& abbreviated, int& captype);
void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
size_t cleanword2(std::string& dest,
std::vector<w_char>& dest_u,
const std::string& src,
int* pcaptype,
size_t* pabbrev);
+ void clean_ignore(std::string& dest, const std::string& src);
void mkinitcap(std::string& u8);
int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
@@ -143,19 +160,15 @@ private:
void insert_sug(std::vector<std::string>& slst, const std::string& word);
void cat_result(std::string& result, const std::string& st);
std::vector<std::string> spellml(const std::string& word);
- std::string get_xml_par(const char* par);
- const char* get_xml_pos(const char* s, const char* attr);
- std::vector<std::string> get_xml_list(const char* list, const char* tag);
- int check_xml_par(const char* q, const char* attr, const char* value);
+ std::string get_xml_par(const std::string& par, std::string::size_type pos);
+ std::string::size_type get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr);
+ std::vector<std::string> get_xml_list(const std::string& list, std::string::size_type pos, const char* tag);
+ int check_xml_par(const std::string& q, std::string::size_type pos, const char* attr, const char* value);
private:
HunspellImpl(const HunspellImpl&);
HunspellImpl& operator=(const HunspellImpl&);
};
-Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
- : m_Impl(new HunspellImpl(affpath, dpath, key)) {
-}
-
HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {
csconv = NULL;
utf8 = 0;
@@ -180,19 +193,12 @@ HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* k
complexprefixes = pAMgr->get_complexprefixes();
wordbreak = pAMgr->get_breaktable();
- dic_encoding_vec.resize(encoding.size()+1);
- strcpy(&dic_encoding_vec[0], encoding.c_str());
-
/* and finally set up the suggestion manager */
pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
if (try_string)
free(try_string);
}
-Hunspell::~Hunspell() {
- delete m_Impl;
-}
-
HunspellImpl::~HunspellImpl() {
delete pSMgr;
delete pAMgr;
@@ -210,11 +216,6 @@ HunspellImpl::~HunspellImpl() {
}
// load extra dictionaries
-int Hunspell::add_dic(const char* dpath, const char* key) {
- return m_Impl->add_dic(dpath, key);
-}
-
-// load extra dictionaries
int HunspellImpl::add_dic(const char* dpath, const char* key) {
if (!affixpath)
return 1;
@@ -222,6 +223,26 @@ int HunspellImpl::add_dic(const char* dpath, const char* key) {
return 0;
}
+
+// make a copy of src at dest while removing all characters
+// specified in IGNORE rule
+void HunspellImpl::clean_ignore(std::string& dest,
+ const std::string& src) {
+ dest.clear();
+ dest.assign(src);
+ const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
+ if (ignoredchars != NULL) {
+ if (utf8) {
+ const std::vector<w_char>& ignoredchars_utf16 =
+ pAMgr->get_ignore_utf16();
+ remove_ignored_chars_utf(dest, ignoredchars_utf16);
+ } else {
+ remove_ignored_chars(dest, ignoredchars);
+ }
+ }
+}
+
+
// make a copy of src at destination while removing all leading
// blanks and removing any trailing periods after recording
// their presence with the abbreviation flag
@@ -237,7 +258,11 @@ size_t HunspellImpl::cleanword2(std::string& dest,
dest.clear();
dest_utf.clear();
- const char* q = src.c_str();
+ // remove IGNORE characters from the string
+ std::string w2;
+ clean_ignore(w2, src);
+
+ const char* q = w2.c_str();
// first skip over any leading blanks
while (*q == ' ')
@@ -409,11 +434,22 @@ void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string&
slst.insert(slst.begin(), word);
}
-bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
- return m_Impl->spell(word, info, root);
+bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
+ bool r = spell_internal(word, info, root);
+ if (r && root) {
+ // output conversion
+ RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
+ if (rl) {
+ std::string wspace;
+ if (rl->conv(*root, wspace)) {
+ *root = wspace;
+ }
+ }
+ }
+ return r;
}
-bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
+bool HunspellImpl::spell_internal(const std::string& word, int* info, std::string* root) {
struct hentry* rv = NULL;
int info2 = 0;
@@ -485,7 +521,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
case HUHCAP:
/* FALLTHROUGH */
case HUHINITCAP:
- *info += SPELL_ORIGCAP;
+ *info |= SPELL_ORIGCAP;
/* FALLTHROUGH */
case NOCAP:
rv = checkword(scw, info, root);
@@ -496,7 +532,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
}
break;
case ALLCAP: {
- *info += SPELL_ORIGCAP;
+ *info |= SPELL_ORIGCAP;
rv = checkword(scw, info, root);
if (rv)
break;
@@ -563,17 +599,22 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
break;
}
}
+ /* FALLTHROUGH */
case INITCAP: {
-
- *info += SPELL_ORIGCAP;
- mkallsmall2(scw, sunicw);
- std::string u8buffer(scw);
- mkinitcap2(scw, sunicw);
+ // handle special capitalization of dotted I
+ bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0);
+ *info |= SPELL_ORIGCAP;
+ if (captype == ALLCAP) {
+ mkallsmall2(scw, sunicw);
+ mkinitcap2(scw, sunicw);
+ if (Idot)
+ scw.replace(0, 1, "\xc4\xb0");
+ }
if (captype == INITCAP)
- *info += SPELL_INITCAP;
+ *info |= SPELL_INITCAP;
rv = checkword(scw, info, root);
if (captype == INITCAP)
- *info -= SPELL_INITCAP;
+ *info &= ~SPELL_INITCAP;
// forbid bad capitalization
// (for example, ijs -> Ijs instead of IJs in Dutch)
// use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
@@ -583,9 +624,13 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
}
if (rv && is_keepcase(rv) && (captype == ALLCAP))
rv = NULL;
- if (rv)
+ if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh))
break;
+ mkallsmall2(scw, sunicw);
+ std::string u8buffer(scw);
+ mkinitcap2(scw, sunicw);
+
rv = checkword(u8buffer, info, root);
if (abbv && !rv) {
u8buffer.push_back('.');
@@ -594,10 +639,10 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
u8buffer = scw;
u8buffer.push_back('.');
if (captype == INITCAP)
- *info += SPELL_INITCAP;
+ *info |= SPELL_INITCAP;
rv = checkword(u8buffer, info, root);
if (captype == INITCAP)
- *info -= SPELL_INITCAP;
+ *info &= ~SPELL_INITCAP;
if (rv && is_keepcase(rv) && (captype == ALLCAP))
rv = NULL;
break;
@@ -618,7 +663,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
if (rv) {
if (pAMgr && pAMgr->get_warn() && rv->astr &&
TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
- *info += SPELL_WARN;
+ *info |= SPELL_WARN;
if (pAMgr->get_forbidwarn())
return false;
return true;
@@ -627,7 +672,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
}
// recursive breaking at break points
- if (!wordbreak.empty()) {
+ if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN)) {
int nbr = 0;
wl = scw.size();
@@ -668,6 +713,37 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
size_t plen = wordbreak[j].size();
size_t found = scw.find(wordbreak[j]);
if ((found > 0) && (found < wl - plen)) {
+ size_t found2 = scw.find(wordbreak[j], found + 1);
+ // try to break at the second occurance
+ // to recognize dictionary words with wordbreak
+ if (found2 > 0 && (found2 < wl - plen))
+ found = found2;
+ if (!spell(scw.substr(found + plen)))
+ continue;
+ std::string suffix(scw.substr(found));
+ scw.resize(found);
+ // examine 2 sides of the break point
+ if (spell(scw))
+ return true;
+ scw.append(suffix);
+
+ // LANG_hu: spec. dash rule
+ if (langnum == LANG_hu && wordbreak[j] == "-") {
+ suffix = scw.substr(found + 1);
+ scw.resize(found + 1);
+ if (spell(scw))
+ return true; // check the first part with dash
+ scw.append(suffix);
+ }
+ // end of LANG specific region
+ }
+ }
+
+ // other patterns (break at first break point)
+ for (size_t j = 0; j < wordbreak.size(); ++j) {
+ size_t plen = wordbreak[j].size();
+ size_t found = scw.find(wordbreak[j]);
+ if ((found > 0) && (found < wl - plen)) {
if (!spell(scw.substr(found + plen)))
continue;
std::string suffix(scw.substr(found));
@@ -694,47 +770,28 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
}
struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
- bool usebuffer = false;
std::string w2;
const char* word;
int len;
- const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
- if (ignoredchars != NULL) {
- w2.assign(w);
- if (utf8) {
- const std::vector<w_char>& ignoredchars_utf16 =
- pAMgr->get_ignore_utf16();
- remove_ignored_chars_utf(w2, ignoredchars_utf16);
- } else {
- remove_ignored_chars(w2, ignoredchars);
- }
- word = w2.c_str();
- len = w2.size();
- usebuffer = true;
- } else {
- word = w.c_str();
- len = w.size();
- }
+ // remove IGNORE characters from the string
+ clean_ignore(w2, w);
+
+ word = w2.c_str();
+ len = w2.size();
if (!len)
return NULL;
// word reversing wrapper for complex prefixes
if (complexprefixes) {
- if (!usebuffer) {
- w2.assign(word);
- usebuffer = true;
- }
if (utf8)
reverseword_utf(w2);
else
reverseword(w2);
}
- if (usebuffer) {
- word = w2.c_str();
- }
+ word = w2.c_str();
// look word in hash table
struct hentry* he = NULL;
@@ -745,13 +802,13 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str
if ((he) && (he->astr) && (pAMgr) &&
TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
if (info)
- *info += SPELL_FORBIDDEN;
+ *info |= SPELL_FORBIDDEN;
// LANG_hu section: set dash information for suggestions
if (langnum == LANG_hu) {
if (pAMgr->get_compoundflag() &&
TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
if (info)
- *info += SPELL_COMPOUND;
+ *info |= SPELL_COMPOUND;
}
}
return NULL;
@@ -786,7 +843,7 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str
if ((he->astr) && (pAMgr) &&
TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
if (info)
- *info += SPELL_FORBIDDEN;
+ *info |= SPELL_FORBIDDEN;
return NULL;
}
if (root) {
@@ -819,7 +876,7 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str
}
}
if (info)
- *info += SPELL_COMPOUND;
+ *info |= SPELL_COMPOUND;
}
}
}
@@ -827,11 +884,103 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str
return he;
}
-std::vector<std::string> Hunspell::suggest(const std::string& word) {
- return m_Impl->suggest(word);
+std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
+ bool capwords;
+ size_t abbv;
+ int captype;
+ std::vector<std::string> slst = suggest_internal(word, capwords, abbv, captype);
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ for (size_t j = 0; j < slst.size(); ++j) {
+ if (utf8)
+ reverseword_utf(slst[j]);
+ else
+ reverseword(slst[j]);
+ }
+ }
+
+ // capitalize
+ if (capwords)
+ for (size_t j = 0; j < slst.size(); ++j) {
+ mkinitcap(slst[j]);
+ }
+
+ // expand suggestions with dot(s)
+ if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
+ for (size_t j = 0; j < slst.size(); ++j) {
+ slst[j].append(word.substr(word.size() - abbv));
+ }
+ }
+
+ // remove bad capitalized and forbidden forms
+ if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
+ switch (captype) {
+ case INITCAP:
+ case ALLCAP: {
+ size_t l = 0;
+ for (size_t j = 0; j < slst.size(); ++j) {
+ if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
+ std::string s;
+ std::vector<w_char> w;
+ if (utf8) {
+ u8_u16(w, slst[j]);
+ } else {
+ s = slst[j];
+ }
+ mkallsmall2(s, w);
+ if (spell(s)) {
+ slst[l] = s;
+ ++l;
+ } else {
+ mkinitcap2(s, w);
+ if (spell(s)) {
+ slst[l] = s;
+ ++l;
+ }
+ }
+ } else {
+ slst[l] = slst[j];
+ ++l;
+ }
+ }
+ slst.resize(l);
+ }
+ }
+ }
+
+ // remove duplications
+ size_t l = 0;
+ for (size_t j = 0; j < slst.size(); ++j) {
+ slst[l] = slst[j];
+ for (size_t k = 0; k < l; ++k) {
+ if (slst[k] == slst[j]) {
+ --l;
+ break;
+ }
+ }
+ ++l;
+ }
+ slst.resize(l);
+
+ // output conversion
+ RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
+ if (rl) {
+ for (size_t i = 0; rl && i < slst.size(); ++i) {
+ std::string wspace;
+ if (rl->conv(slst[i], wspace)) {
+ slst[i] = wspace;
+ }
+ }
+ }
+ return slst;
}
-std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
+std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word,
+ bool& capwords, size_t& abbv, int& captype) {
+ captype = NOCAP;
+ abbv = 0;
+ capwords = false;
+
std::vector<std::string> slst;
int onlycmpdsug = 0;
@@ -849,8 +998,6 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
if (word.size() >= MAXWORDLEN)
return slst;
}
- int captype = NOCAP;
- size_t abbv = 0;
size_t wl = 0;
std::string scw;
@@ -871,7 +1018,11 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
return slst;
}
- int capwords = 0;
+ bool good = false;
+
+ clock_t timelimit;
+ // initialize in every suggestion call
+ timelimit = clock();
// check capitalized form for FORCEUCASE
if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
@@ -886,22 +1037,38 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
switch (captype) {
case NOCAP: {
- pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
+ good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ if (abbv) {
+ std::string wspace(scw);
+ wspace.push_back('.');
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ }
break;
}
case INITCAP: {
- capwords = 1;
- pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
+ capwords = true;
+ good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
std::string wspace(scw);
mkallsmall2(wspace, sunicw);
- pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
break;
}
case HUHINITCAP:
- capwords = 1;
+ capwords = true;
+ /* FALLTHROUGH */
case HUHCAP: {
- pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
+ good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
// something.The -> something. The
size_t dot_pos = scw.find('.');
if (dot_pos != std::string::npos) {
@@ -927,19 +1094,25 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
// TheOpenOffice.org -> The OpenOffice.org
wspace = scw;
mkinitsmall2(wspace, sunicw);
- pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
}
wspace = scw;
mkallsmall2(wspace, sunicw);
if (spell(wspace.c_str()))
insert_sug(slst, wspace);
size_t prevns = slst.size();
- pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
if (captype == HUHINITCAP) {
mkinitcap2(wspace, sunicw);
if (spell(wspace.c_str()))
insert_sug(slst, wspace);
- pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
}
// aNew -> "a New" (instead of "a new")
for (size_t j = prevns; j < slst.size(); ++j) {
@@ -966,11 +1139,15 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
case ALLCAP: {
std::string wspace(scw);
mkallsmall2(wspace, sunicw);
- pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
insert_sug(slst, wspace);
mkinitcap2(wspace, sunicw);
- pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
for (size_t j = 0; j < slst.size(); ++j) {
mkallcap(slst[j]);
if (pAMgr && pAMgr->get_checksharps()) {
@@ -1002,34 +1179,43 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
}
}
// END OF LANG_hu section
-
- // try ngram approach since found nothing or only compound words
- if (pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
+ // try ngram approach since found nothing good suggestion
+ if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
switch (captype) {
case NOCAP: {
- pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs);
+ pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
break;
}
+ /* FALLTHROUGH */
case HUHINITCAP:
- capwords = 1;
+ capwords = true;
+ /* FALLTHROUGH */
case HUHCAP: {
std::string wspace(scw);
mkallsmall2(wspace, sunicw);
- pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
+ pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
break;
}
case INITCAP: {
- capwords = 1;
+ capwords = true;
std::string wspace(scw);
mkallsmall2(wspace, sunicw);
- pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
+ pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
break;
}
case ALLCAP: {
std::string wspace(scw);
mkallsmall2(wspace, sunicw);
size_t oldns = slst.size();
- pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
+ pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
for (size_t j = oldns; j < slst.size(); ++j) {
mkallcap(slst[j]);
}
@@ -1039,6 +1225,11 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
}
// try dash suggestion (Afo-American -> Afro-American)
+ // Note: LibreOffice was modified to treat dashes as word
+ // characters to check "scot-free" etc. word forms, but
+ // we need to handle suggestions for "Afo-American", etc.,
+ // while "Afro-American" is missing from the dictionary.
+ // TODO avoid possible overgeneration
size_t dash_pos = scw.find('-');
if (dash_pos != std::string::npos) {
int nodashsug = 1;
@@ -1050,12 +1241,14 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
size_t prev_pos = 0;
bool last = false;
- while (nodashsug && !last) {
+ while (!good && nodashsug && !last) {
if (dash_pos == scw.size())
last = 1;
std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
if (!spell(chunk.c_str())) {
std::vector<std::string> nlst = suggest(chunk.c_str());
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {
std::string wspace = scw.substr(0, prev_pos);
wspace.append(*j);
@@ -1063,7 +1256,11 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
wspace.append("-");
wspace.append(scw.substr(dash_pos + 1));
}
- insert_sug(slst, wspace);
+ int info = 0;
+ if (pAMgr && pAMgr->get_forbiddenword())
+ checkword(wspace, &info, NULL);
+ if (!(info & SPELL_FORBIDDEN))
+ insert_sug(slst, wspace);
}
nodashsug = 0;
}
@@ -1075,104 +1272,13 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
dash_pos = scw.size();
}
}
-
- // word reversing wrapper for complex prefixes
- if (complexprefixes) {
- for (size_t j = 0; j < slst.size(); ++j) {
- if (utf8)
- reverseword_utf(slst[j]);
- else
- reverseword(slst[j]);
- }
- }
-
- // capitalize
- if (capwords)
- for (size_t j = 0; j < slst.size(); ++j) {
- mkinitcap(slst[j]);
- }
-
- // expand suggestions with dot(s)
- if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
- for (size_t j = 0; j < slst.size(); ++j) {
- slst[j].append(word.substr(word.size() - abbv));
- }
- }
-
- // remove bad capitalized and forbidden forms
- if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
- switch (captype) {
- case INITCAP:
- case ALLCAP: {
- size_t l = 0;
- for (size_t j = 0; j < slst.size(); ++j) {
- if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
- std::string s;
- std::vector<w_char> w;
- if (utf8) {
- u8_u16(w, slst[j]);
- } else {
- s = slst[j];
- }
- mkallsmall2(s, w);
- if (spell(s)) {
- slst[l] = s;
- ++l;
- } else {
- mkinitcap2(s, w);
- if (spell(s)) {
- slst[l] = s;
- ++l;
- }
- }
- } else {
- slst[l] = slst[j];
- ++l;
- }
- }
- slst.resize(l);
- }
- }
- }
-
- // remove duplications
- size_t l = 0;
- for (size_t j = 0; j < slst.size(); ++j) {
- slst[l] = slst[j];
- for (size_t k = 0; k < l; ++k) {
- if (slst[k] == slst[j]) {
- --l;
- break;
- }
- }
- ++l;
- }
- slst.resize(l);
-
- // output conversion
- rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
- for (size_t j = 0; rl && j < slst.size(); ++j) {
- std::string wspace;
- if (rl->conv(slst[j], wspace)) {
- slst[j] = wspace;
- }
- }
-
return slst;
}
-const std::string& Hunspell::get_dict_encoding() const {
- return m_Impl->get_dict_encoding();
-}
-
const std::string& HunspellImpl::get_dict_encoding() const {
return encoding;
}
-std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
- return m_Impl->stem(desc);
-}
-
std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
std::vector<std::string> slst;
@@ -1241,30 +1347,14 @@ std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc
return slst;
}
-std::vector<std::string> Hunspell::stem(const std::string& word) {
- return m_Impl->stem(word);
-}
-
std::vector<std::string> HunspellImpl::stem(const std::string& word) {
return stem(analyze(word));
}
-const char* Hunspell::get_wordchars() const {
- return m_Impl->get_wordchars().c_str();
-}
-
-const std::string& Hunspell::get_wordchars_cpp() const {
- return m_Impl->get_wordchars();
-}
-
-const std::string& HunspellImpl::get_wordchars() const {
+const std::string& HunspellImpl::get_wordchars_cpp() const {
return pAMgr->get_wordchars();
}
-const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
- return m_Impl->get_wordchars_utf16();
-}
-
const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
return pAMgr->get_wordchars_utf16();
}
@@ -1300,56 +1390,32 @@ int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
return u8.size();
}
-int Hunspell::add(const std::string& word) {
- return m_Impl->add(word);
-}
-
int HunspellImpl::add(const std::string& word) {
if (!m_HMgrs.empty())
return m_HMgrs[0]->add(word);
return 0;
}
-int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
- return m_Impl->add_with_affix(word, example);
-}
-
int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
if (!m_HMgrs.empty())
return m_HMgrs[0]->add_with_affix(word, example);
return 0;
}
-int Hunspell::remove(const std::string& word) {
- return m_Impl->remove(word);
-}
-
int HunspellImpl::remove(const std::string& word) {
if (!m_HMgrs.empty())
return m_HMgrs[0]->remove(word);
return 0;
}
-const char* Hunspell::get_version() const {
- return m_Impl->get_version().c_str();
-}
-
-const std::string& Hunspell::get_version_cpp() const {
- return m_Impl->get_version();
-}
-
-const char* Hunspell::get_try_string() const {
- return m_Impl->get_try_string();
+const std::string& HunspellImpl::get_version_cpp() const {
+ return pAMgr->get_version();
}
struct cs_info* HunspellImpl::get_csconv() {
return csconv;
}
-struct cs_info* Hunspell::get_csconv() {
- return m_Impl->get_csconv();
-}
-
void HunspellImpl::cat_result(std::string& result, const std::string& st) {
if (!st.empty()) {
if (!result.empty())
@@ -1358,11 +1424,22 @@ void HunspellImpl::cat_result(std::string& result, const std::string& st) {
}
}
-std::vector<std::string> Hunspell::analyze(const std::string& word) {
- return m_Impl->analyze(word);
+std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
+ std::vector<std::string> slst = analyze_internal(word);
+ // output conversion
+ RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
+ if (rl) {
+ for (size_t i = 0; rl && i < slst.size(); ++i) {
+ std::string wspace;
+ if (rl->conv(slst[i], wspace)) {
+ slst[i] = wspace;
+ }
+ }
+ }
+ return slst;
}
-std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
+std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) {
std::vector<std::string> slst;
if (!pSMgr || m_HMgrs.empty())
return slst;
@@ -1595,10 +1672,6 @@ std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
return slst;
}
-std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
- return m_Impl->generate(word, pl);
-}
-
std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
std::vector<std::string> slst;
if (!pSMgr || pl.empty())
@@ -1643,10 +1716,6 @@ std::vector<std::string> HunspellImpl::generate(const std::string& word, const s
return slst;
}
-std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
- return m_Impl->generate(word, pattern);
-}
-
std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
std::vector<std::string> pl = analyze(pattern);
std::vector<std::string> slst = generate(word, pl);
@@ -1655,10 +1724,11 @@ std::vector<std::string> HunspellImpl::generate(const std::string& word, const s
}
// minimal XML parser functions
-std::string HunspellImpl::get_xml_par(const char* par) {
+std::string HunspellImpl::get_xml_par(const std::string& in_par, std::string::size_type pos) {
std::string dest;
- if (!par)
+ if (pos == std::string::npos)
return dest;
+ const char* par = in_par.c_str() + pos;
char end = *par;
if (end == '>')
end = '<';
@@ -1672,22 +1742,8 @@ std::string HunspellImpl::get_xml_par(const char* par) {
return dest;
}
-int Hunspell::get_langnum() const {
- return m_Impl->get_langnum();
-}
-
-bool Hunspell::input_conv(const std::string& word, std::string& dest) {
- return m_Impl->input_conv(word, dest);
-}
-
-int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
- std::string d;
- bool ret = input_conv(word, d);
- if (ret && d.size() < destsize) {
- strncpy(dest, d.c_str(), destsize);
- return 1;
- }
- return 0;
+int HunspellImpl::get_langnum() const {
+ return langnum;
}
bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
@@ -1700,42 +1756,47 @@ bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
}
// return the beginning of the element (attr == NULL) or the attribute
-const char* HunspellImpl::get_xml_pos(const char* s, const char* attr) {
- const char* end = strchr(s, '>');
+std::string::size_type HunspellImpl::get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr) {
+ if (pos == std::string::npos)
+ return std::string::npos;
+
+ std::string::size_type endpos = s.find('>', pos);
if (attr == NULL)
- return end;
- const char* p = s;
- while (1) {
- p = strstr(p, attr);
- if (!p || p >= end)
- return 0;
- if (*(p - 1) == ' ' || *(p - 1) == '\n')
+ return endpos;
+ while (true) {
+ pos = s.find(attr, pos);
+ if (pos == std::string::npos || pos >= endpos)
+ return std::string::npos;
+ if (s[pos - 1] == ' ' || s[pos - 1] == '\n')
break;
- p += strlen(attr);
+ pos += strlen(attr);
}
- return p + strlen(attr);
+ return pos + strlen(attr);
}
-int HunspellImpl::check_xml_par(const char* q,
- const char* attr,
- const char* value) {
- std::string cw = get_xml_par(get_xml_pos(q, attr));
+int HunspellImpl::check_xml_par(const std::string& q, std::string::size_type pos,
+ const char* attr,
+ const char* value) {
+ std::string cw = get_xml_par(q, get_xml_pos(q, pos, attr));
if (cw == value)
return 1;
return 0;
}
-std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char* tag) {
+std::vector<std::string> HunspellImpl::get_xml_list(const std::string& list, std::string::size_type pos, const char* tag) {
std::vector<std::string> slst;
- if (!list)
+ if (pos == std::string::npos)
return slst;
- const char* p = list;
- for (size_t n = 0; ((p = strstr(p, tag)) != NULL); ++p, ++n) {
- std::string cw = get_xml_par(p + strlen(tag) - 1);
+ while (true) {
+ pos = list.find(tag, pos);
+ if (pos == std::string::npos)
+ break;
+ std::string cw = get_xml_par(list, pos + strlen(tag) - 1);
if (cw.empty()) {
break;
}
slst.push_back(cw);
+ ++pos;
}
return slst;
}
@@ -1743,19 +1804,20 @@ std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char
std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
std::vector<std::string> slst;
- const char* word = in_word.c_str();
-
- const char* q = strstr(word, "<query");
- if (!q)
+ std::string::size_type qpos = in_word.find("<query");
+ if (qpos == std::string::npos)
return slst; // bad XML input
- const char* q2 = strchr(q, '>');
- if (!q2)
+
+ std::string::size_type q2pos = in_word.find('>', qpos);
+ if (q2pos == std::string::npos)
return slst; // bad XML input
- q2 = strstr(q2, "<word");
- if (!q2)
+
+ q2pos = in_word.find("<word", q2pos);
+ if (q2pos == std::string::npos)
return slst; // bad XML input
- if (check_xml_par(q, "type=", "analyze")) {
- std::string cw = get_xml_par(strchr(q2, '>'));
+
+ if (check_xml_par(in_word, qpos, "type=", "analyze")) {
+ std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
if (!cw.empty())
slst = analyze(cw);
if (slst.empty())
@@ -1778,23 +1840,24 @@ std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
slst.clear();
slst.push_back(r);
return slst;
- } else if (check_xml_par(q, "type=", "stem")) {
- std::string cw = get_xml_par(strchr(q2, '>'));
+ } else if (check_xml_par(in_word, qpos, "type=", "stem")) {
+ std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
if (!cw.empty())
return stem(cw);
- } else if (check_xml_par(q, "type=", "generate")) {
- std::string cw = get_xml_par(strchr(q2, '>'));
+ } else if (check_xml_par(in_word, qpos, "type=", "generate")) {
+ std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
if (cw.empty())
return slst;
- const char* q3 = strstr(q2 + 1, "<word");
- if (q3) {
- std::string cw2 = get_xml_par(strchr(q3, '>'));
+ std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
+ if (q3pos != std::string::npos) {
+ std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
if (!cw2.empty()) {
return generate(cw, cw2);
}
} else {
- if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
- std::vector<std::string> slst2 = get_xml_list(strchr(q2, '>'), "<a>");
+ q2pos = in_word.find("<code", q2pos + 1);
+ if (q2pos != std::string::npos) {
+ std::vector<std::string> slst2 = get_xml_list(in_word, in_word.find('>', q2pos), "<a>");
if (!slst2.empty()) {
slst = generate(cw, slst2);
uniqlist(slst);
@@ -1802,21 +1865,57 @@ std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
}
}
}
+ } else if (check_xml_par(in_word, qpos, "type=", "add")) {
+ std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
+ if (cw.empty())
+ return slst;
+ std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
+ if (q3pos != std::string::npos) {
+ std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
+ if (!cw2.empty()) {
+ add_with_affix(cw, cw2);
+ } else {
+ add(cw);
+ }
+ } else {
+ add(cw);
+ }
}
return slst;
}
-int Hunspell::spell(const char* word, int* info, char** root) {
- std::string sroot;
- bool ret = m_Impl->spell(word, info, root ? &sroot : NULL);
- if (root) {
- if (sroot.empty()) {
- *root = NULL;
+std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
+ std::vector<std::string> slst;
+ struct hentry* he = NULL;
+ int len;
+ std::string w2;
+ const char* word;
+ const char* ignoredchars = pAMgr->get_ignore();
+ if (ignoredchars != NULL) {
+ w2.assign(root_word);
+ if (utf8) {
+ const std::vector<w_char>& ignoredchars_utf16 =
+ pAMgr->get_ignore_utf16();
+ remove_ignored_chars_utf(w2, ignoredchars_utf16);
} else {
- *root = mystrdup(sroot.c_str());
+ remove_ignored_chars(w2, ignoredchars);
}
+ word = w2.c_str();
+ } else
+ word = root_word.c_str();
+
+ len = strlen(word);
+
+ if (!len)
+ return slst;
+
+ for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
+ he = m_HMgrs[i]->lookup(word);
}
- return ret;
+ if (he) {
+ slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
+ }
+ return slst;
}
namespace {
@@ -1835,113 +1934,289 @@ namespace {
}
}
-void Hunspell::free_list(char*** slst, int n) {
- Hunspell_free_list((Hunhandle*)(this), slst, n);
+int HunspellImpl::spell(const char* word, int* info, char** root) {
+ std::string sroot;
+ bool ret = spell(word, info, root ? &sroot : NULL);
+ if (root) {
+ if (sroot.empty()) {
+ *root = NULL;
+ } else {
+ *root = mystrdup(sroot.c_str());
+ }
+ }
+ return ret;
+}
+
+int HunspellImpl::suggest(char*** slst, const char* word) {
+ std::vector<std::string> suggests = suggest(word);
+ return munge_vector(slst, suggests);
+}
+
+int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) {
+ std::vector<std::string> stems = suffix_suggest(root_word);
+ return munge_vector(slst, stems);
+}
+
+void HunspellImpl::free_list(char*** slst, int n) {
+ if (slst && *slst) {
+ for (int i = 0; i < n; i++)
+ free((*slst)[i]);
+ free(*slst);
+ *slst = NULL;
+ }
+}
+
+char* HunspellImpl::get_dic_encoding() {
+ return &encoding[0];
+}
+
+int HunspellImpl::analyze(char*** slst, const char* word) {
+ std::vector<std::string> stems = analyze(word);
+ return munge_vector(slst, stems);
+}
+
+int HunspellImpl::stem(char*** slst, const char* word) {
+ std::vector<std::string> stems = stem(word);
+ return munge_vector(slst, stems);
+}
+
+int HunspellImpl::stem(char*** slst, char** desc, int n) {
+ std::vector<std::string> morph;
+ morph.reserve(n);
+ for (int i = 0; i < n; ++i)
+ morph.push_back(desc[i]);
+
+ std::vector<std::string> stems = stem(morph);
+ return munge_vector(slst, stems);
+}
+
+int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) {
+ std::vector<std::string> stems = generate(word, pattern);
+ return munge_vector(slst, stems);
+}
+
+int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) {
+ std::vector<std::string> morph;
+ morph.reserve(pln);
+ for (int i = 0; i < pln; ++i)
+ morph.push_back(pl[i]);
+
+ std::vector<std::string> stems = generate(word, morph);
+ return munge_vector(slst, stems);
+}
+
+const char* HunspellImpl::get_wordchars() const {
+ return get_wordchars_cpp().c_str();
+}
+
+const char* HunspellImpl::get_version() const {
+ return get_version_cpp().c_str();
+}
+
+int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) {
+ std::string d;
+ bool ret = input_conv(word, d);
+ if (ret && d.size() < destsize) {
+ strncpy(dest, d.c_str(), destsize);
+ return 1;
+ }
+ return 0;
+}
+
+Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
+ : m_Impl(new HunspellImpl(affpath, dpath, key)) {
+}
+
+Hunspell::~Hunspell() {
+ delete m_Impl;
+}
+
+// load extra dictionaries
+int Hunspell::add_dic(const char* dpath, const char* key) {
+ return m_Impl->add_dic(dpath, key);
+}
+
+bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
+ return m_Impl->spell(word, info, root);
+}
+
+std::vector<std::string> Hunspell::suggest(const std::string& word) {
+ return m_Impl->suggest(word);
+}
+
+std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
+ return m_Impl->suffix_suggest(root_word);
+}
+
+const std::string& Hunspell::get_dict_encoding() const {
+ return m_Impl->get_dict_encoding();
+}
+
+std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
+ return m_Impl->stem(desc);
+}
+
+std::vector<std::string> Hunspell::stem(const std::string& word) {
+ return m_Impl->stem(word);
+}
+
+const std::string& Hunspell::get_wordchars_cpp() const {
+ return m_Impl->get_wordchars_cpp();
+}
+
+const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
+ return m_Impl->get_wordchars_utf16();
+}
+
+int Hunspell::add(const std::string& word) {
+ return m_Impl->add(word);
+}
+
+int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
+ return m_Impl->add_with_affix(word, example);
+}
+
+int Hunspell::remove(const std::string& word) {
+ return m_Impl->remove(word);
+}
+
+const std::string& Hunspell::get_version_cpp() const {
+ return m_Impl->get_version_cpp();
+}
+
+struct cs_info* Hunspell::get_csconv() {
+ return m_Impl->get_csconv();
+}
+
+std::vector<std::string> Hunspell::analyze(const std::string& word) {
+ return m_Impl->analyze(word);
+}
+
+std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
+ return m_Impl->generate(word, pl);
+}
+
+std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
+ return m_Impl->generate(word, pattern);
+}
+
+int Hunspell::get_langnum() const {
+ return m_Impl->get_langnum();
+}
+
+bool Hunspell::input_conv(const std::string& word, std::string& dest) {
+ return m_Impl->input_conv(word, dest);
+}
+
+int Hunspell::spell(const char* word, int* info, char** root) {
+ return m_Impl->spell(word, info, root);
}
int Hunspell::suggest(char*** slst, const char* word) {
- return Hunspell_suggest((Hunhandle*)(this), slst, word);
+ return m_Impl->suggest(slst, word);
}
int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
- std::vector<std::string> stems = m_Impl->suffix_suggest(root_word);
- return munge_vector(slst, stems);
+ return m_Impl->suffix_suggest(slst, root_word);
+}
+
+void Hunspell::free_list(char*** slst, int n) {
+ m_Impl->free_list(slst, n);
}
char* Hunspell::get_dic_encoding() {
- return &(m_Impl->dic_encoding_vec[0]);
+ return m_Impl->get_dic_encoding();
}
-int Hunspell::stem(char*** slst, char** desc, int n) {
- return Hunspell_stem2((Hunhandle*)(this), slst, desc, n);
+int Hunspell::analyze(char*** slst, const char* word) {
+ return m_Impl->analyze(slst, word);
}
int Hunspell::stem(char*** slst, const char* word) {
- return Hunspell_stem((Hunhandle*)(this), slst, word);
+ return m_Impl->stem(slst, word);
}
-int Hunspell::analyze(char*** slst, const char* word) {
- return Hunspell_analyze((Hunhandle*)(this), slst, word);
+int Hunspell::stem(char*** slst, char** desc, int n) {
+ return m_Impl->stem(slst, desc, n);
+}
+
+int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
+ return m_Impl->generate(slst, word, pattern);
}
int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
- return Hunspell_generate2((Hunhandle*)(this), slst, word, pl, pln);
+ return m_Impl->generate(slst, word, pl, pln);
}
-int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
- return Hunspell_generate((Hunhandle*)(this), slst, word, pattern);
+const char* Hunspell::get_wordchars() const {
+ return m_Impl->get_wordchars();
+}
+
+const char* Hunspell::get_version() const {
+ return m_Impl->get_version();
+}
+
+int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
+ return m_Impl->input_conv(word, dest, destsize);
}
Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
- return (Hunhandle*)(new Hunspell(affpath, dpath));
+ return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath));
}
Hunhandle* Hunspell_create_key(const char* affpath,
const char* dpath,
const char* key) {
- return reinterpret_cast<Hunhandle*>(new Hunspell(affpath, dpath, key));
+ return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath, key));
}
void Hunspell_destroy(Hunhandle* pHunspell) {
- delete reinterpret_cast<Hunspell*>(pHunspell);
+ delete reinterpret_cast<HunspellImpl*>(pHunspell);
}
int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
- return reinterpret_cast<Hunspell*>(pHunspell)->add_dic(dpath);
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->add_dic(dpath);
}
int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
- return reinterpret_cast<Hunspell*>(pHunspell)->spell(std::string(word));
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->spell(word);
}
char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
- return reinterpret_cast<Hunspell*>(pHunspell)->get_dic_encoding();
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->get_dic_encoding();
}
int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
- std::vector<std::string> suggests = reinterpret_cast<Hunspell*>(pHunspell)->suggest(word);
- return munge_vector(slst, suggests);
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->suggest(slst, word);
}
int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
- std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->analyze(word);
- return munge_vector(slst, stems);
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->analyze(slst, word);
}
int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
-
- std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(word);
- return munge_vector(slst, stems);
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, word);
}
int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
- std::vector<std::string> morph;
- for (int i = 0; i < n; ++i)
- morph.push_back(desc[i]);
-
- std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(morph);
- return munge_vector(slst, stems);
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, desc, n);
}
int Hunspell_generate(Hunhandle* pHunspell,
char*** slst,
const char* word,
- const char* pattern) {
- std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, pattern);
- return munge_vector(slst, stems);
+ const char* pattern)
+{
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, pattern);
}
int Hunspell_generate2(Hunhandle* pHunspell,
char*** slst,
const char* word,
char** desc,
- int n) {
- std::vector<std::string> morph;
- for (int i = 0; i < n; ++i)
- morph.push_back(desc[i]);
-
- std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, morph);
- return munge_vector(slst, stems);
+ int n)
+{
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, desc, n);
}
/* functions for run-time modification of the dictionary */
@@ -1949,7 +2224,7 @@ int Hunspell_generate2(Hunhandle* pHunspell,
/* add word to the run-time dictionary */
int Hunspell_add(Hunhandle* pHunspell, const char* word) {
- return reinterpret_cast<Hunspell*>(pHunspell)->add(word);
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->add(word);
}
/* add word to the run-time dictionary with affix flags of
@@ -1960,58 +2235,15 @@ int Hunspell_add(Hunhandle* pHunspell, const char* word) {
int Hunspell_add_with_affix(Hunhandle* pHunspell,
const char* word,
const char* example) {
- return reinterpret_cast<Hunspell*>(pHunspell)->add_with_affix(word, example);
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_affix(word, example);
}
/* remove word from the run-time dictionary */
int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
- return reinterpret_cast<Hunspell*>(pHunspell)->remove(word);
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->remove(word);
}
-void Hunspell_free_list(Hunhandle*, char*** list, int n) {
- if (list && *list) {
- for (int i = 0; i < n; i++)
- free((*list)[i]);
- free(*list);
- *list = NULL;
- }
-}
-
-std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
- return m_Impl->suffix_suggest(root_word);
-}
-
-std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
- std::vector<std::string> slst;
- struct hentry* he = NULL;
- int len;
- std::string w2;
- const char* word;
- const char* ignoredchars = pAMgr->get_ignore();
- if (ignoredchars != NULL) {
- w2.assign(root_word);
- if (utf8) {
- const std::vector<w_char>& ignoredchars_utf16 =
- pAMgr->get_ignore_utf16();
- remove_ignored_chars_utf(w2, ignoredchars_utf16);
- } else {
- remove_ignored_chars(w2, ignoredchars);
- }
- word = w2.c_str();
- } else
- word = root_word.c_str();
-
- len = strlen(word);
-
- if (!len)
- return slst;
-
- for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
- he = m_HMgrs[i]->lookup(word);
- }
- if (he) {
- slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
- }
- return slst;
+void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) {
+ reinterpret_cast<HunspellImpl*>(pHunspell)->free_list(list, n);
}