summaryrefslogtreecommitdiff
path: root/libs/hunspell/src/affentry.c++
diff options
context:
space:
mode:
Diffstat (limited to 'libs/hunspell/src/affentry.c++')
-rw-r--r--libs/hunspell/src/affentry.c++201
1 files changed, 81 insertions, 120 deletions
diff --git a/libs/hunspell/src/affentry.c++ b/libs/hunspell/src/affentry.c++
index 983fe2c1ec..bd28274368 100644
--- a/libs/hunspell/src/affentry.c++
+++ b/libs/hunspell/src/affentry.c++
@@ -79,8 +79,6 @@
#include "affentry.hxx"
#include "csutil.hxx"
-#define MAXTEMPWORDLEN (MAXWORDUTF8LEN + 4)
-
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
// register affix manager
: pmyMgr(pmgr),
@@ -117,11 +115,10 @@ PfxEntry::~PfxEntry() {
}
// add prefix to this word assuming conditions hold
-char* PfxEntry::add(const char* word, int len) {
+char* PfxEntry::add(const char* word, size_t len) {
if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
(len >= numconds) && test_condition(word) &&
- (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0)) &&
- ((MAXTEMPWORDLEN) > (len + appnd.size() - strip.size()))) {
+ (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0))) {
/* we have a match so add prefix */
std::string tword(appnd);
tword.append(word + strip.size());
@@ -233,26 +230,21 @@ struct hentry* PfxEntry::checkword(const char* word,
int len,
char in_compound,
const FLAG needflag) {
- int tmpl; // length of tmpword
struct hentry* he; // hash entry of root word or NULL
- char tmpword[MAXTEMPWORDLEN];
// on entry prefix is 0 length or already matches the beginning of the word.
// So if the remaining root word has positive length
// and if there are enough chars in root word and added back strip chars
// to meet the number of characters conditions, then test it
- tmpl = len - appnd.size();
+ int tmpl = len - appnd.size(); // length of tmpword
if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
- if (strip.size()) {
- strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1);
- tmpword[MAXTEMPWORDLEN - 1] = '\0';
- }
- strcpy((tmpword + strip.size()), (word + appnd.size()));
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size());
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
@@ -262,9 +254,9 @@ struct hentry* PfxEntry::checkword(const char* word,
// if all conditions are met then check if resulting
// root word in the dictionary
- if (test_condition(tmpword)) {
+ if (test_condition(tmpword.c_str())) {
tmpl += strip.size();
- if ((he = pmyMgr->lookup(tmpword)) != NULL) {
+ if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
do {
if (TESTAFF(he->astr, aflag, he->alen) &&
// forbid single prefixes with needaffix flag
@@ -283,8 +275,9 @@ struct hentry* PfxEntry::checkword(const char* word,
// if ((opts & aeXPRODUCT) && in_compound) {
if ((opts & aeXPRODUCT)) {
- he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, this, NULL, 0,
- NULL, FLAG_NULL, needflag, in_compound);
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, aeXPRODUCT, this,
+ NULL, 0, NULL, FLAG_NULL, needflag,
+ in_compound);
if (he)
return he;
}
@@ -298,27 +291,22 @@ struct hentry* PfxEntry::check_twosfx(const char* word,
int len,
char in_compound,
const FLAG needflag) {
- int tmpl; // length of tmpword
struct hentry* he; // hash entry of root word or NULL
- char tmpword[MAXTEMPWORDLEN];
// on entry prefix is 0 length or already matches the beginning of the word.
// So if the remaining root word has positive length
// and if there are enough chars in root word and added back strip chars
// to meet the number of characters conditions, then test it
- tmpl = len - appnd.size();
+ int tmpl = len - appnd.size(); // length of tmpword
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
(tmpl + strip.size() >= numconds)) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
- if (strip.size()) {
- strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1);
- tmpword[MAXTEMPWORDLEN - 1] = '\0';
- }
- strcpy((tmpword + strip.size()), (word + appnd.size()));
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size());
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
@@ -328,7 +316,7 @@ struct hentry* PfxEntry::check_twosfx(const char* word,
// if all conditions are met then check if resulting
// root word in the dictionary
- if (test_condition(tmpword)) {
+ if (test_condition(tmpword.c_str())) {
tmpl += strip.size();
// prefix matched but no root word was found
@@ -336,7 +324,7 @@ struct hentry* PfxEntry::check_twosfx(const char* word,
// cross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, this,
+ he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this,
needflag);
if (he)
return he;
@@ -351,26 +339,20 @@ char* PfxEntry::check_twosfx_morph(const char* word,
int len,
char in_compound,
const FLAG needflag) {
- int tmpl; // length of tmpword
- char tmpword[MAXTEMPWORDLEN];
-
// on entry prefix is 0 length or already matches the beginning of the word.
// So if the remaining root word has positive length
// and if there are enough chars in root word and added back strip chars
// to meet the number of characters conditions, then test it
- tmpl = len - appnd.size();
+ int tmpl = len - appnd.size(); // length of tmpword
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
(tmpl + strip.size() >= numconds)) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
- if (strip.size()) {
- strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1);
- tmpword[MAXTEMPWORDLEN - 1] = '\0';
- }
- strcpy((tmpword + strip.size()), (word + appnd.size()));
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size());
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
@@ -380,7 +362,7 @@ char* PfxEntry::check_twosfx_morph(const char* word,
// if all conditions are met then check if resulting
// root word in the dictionary
- if (test_condition(tmpword)) {
+ if (test_condition(tmpword.c_str())) {
tmpl += strip.size();
// prefix matched but no root word was found
@@ -388,7 +370,8 @@ char* PfxEntry::check_twosfx_morph(const char* word,
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl, aeXPRODUCT,
+ return pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl,
+ aeXPRODUCT,
this, needflag);
}
}
@@ -401,31 +384,23 @@ char* PfxEntry::check_morph(const char* word,
int len,
char in_compound,
const FLAG needflag) {
- int tmpl; // length of tmpword
struct hentry* he; // hash entry of root word or NULL
- char tmpword[MAXTEMPWORDLEN];
- char result[MAXLNLEN];
char* st;
- *result = '\0';
-
// on entry prefix is 0 length or already matches the beginning of the word.
// So if the remaining root word has positive length
// and if there are enough chars in root word and added back strip chars
// to meet the number of characters conditions, then test it
- tmpl = len - appnd.size();
+ int tmpl = len - appnd.size(); // length of tmpword
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
(tmpl + strip.size() >= numconds)) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
- if (strip.size()) {
- strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1);
- tmpword[MAXTEMPWORDLEN - 1] = '\0';
- }
- strcpy(tmpword + strip.size(), word + appnd.size());
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size());
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
@@ -435,9 +410,11 @@ char* PfxEntry::check_morph(const char* word,
// if all conditions are met then check if resulting
// root word in the dictionary
- if (test_condition(tmpword)) {
+ if (test_condition(tmpword.c_str())) {
+ std::string result;
+
tmpl += strip.size();
- if ((he = pmyMgr->lookup(tmpword)) != NULL) {
+ if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
do {
if (TESTAFF(he->astr, aflag, he->alen) &&
// forbid single prefixes with needaffix flag
@@ -446,28 +423,28 @@ char* PfxEntry::check_morph(const char* word,
((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
(contclass && TESTAFF(contclass, needflag, contclasslen)))) {
if (morphcode) {
- mystrcat(result, " ", MAXLNLEN);
- mystrcat(result, morphcode, MAXLNLEN);
+ result.append(" ");
+ result.append(morphcode);
} else
- mystrcat(result, getKey(), MAXLNLEN);
+ result.append(getKey());
if (!HENTRY_FIND(he, MORPH_STEM)) {
- mystrcat(result, " ", MAXLNLEN);
- mystrcat(result, MORPH_STEM, MAXLNLEN);
- mystrcat(result, HENTRY_WORD(he), MAXLNLEN);
+ result.append(" ");
+ result.append(MORPH_STEM);
+ result.append(HENTRY_WORD(he));
}
// store the pointer of the hash entry
if (HENTRY_DATA(he)) {
- mystrcat(result, " ", MAXLNLEN);
- mystrcat(result, HENTRY_DATA2(he), MAXLNLEN);
+ result.append(" ");
+ result.append(HENTRY_DATA2(he));
} else {
// return with debug information
char* flag = pmyMgr->encode_flag(getFlag());
- mystrcat(result, " ", MAXLNLEN);
- mystrcat(result, MORPH_FLAG, MAXLNLEN);
- mystrcat(result, flag, MAXLNLEN);
+ result.append(" ");
+ result.append(MORPH_FLAG);
+ result.append(flag);
free(flag);
}
- mystrcat(result, "\n", MAXLNLEN);
+ result.append("\n");
}
he = he->next_homonym;
} while (he);
@@ -478,18 +455,19 @@ char* PfxEntry::check_morph(const char* word,
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, this,
+ st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this,
FLAG_NULL, needflag);
if (st) {
- mystrcat(result, st, MAXLNLEN);
+ result.append(st);
free(st);
}
}
+
+ if (!result.empty())
+ return mystrdup(result.c_str());
}
}
- if (*result)
- return mystrdup(result);
return NULL;
}
@@ -516,7 +494,8 @@ SfxEntry::SfxEntry(AffixMgr* pmgr, affentry* dp)
c.l.conds2 = dp->c.l.conds2;
} else
memcpy(c.conds, dp->c.conds, MAXCONDLEN);
- rappnd = myrevstrdup(appnd.c_str());
+ rappnd = appnd;
+ reverseword(rappnd);
morphcode = dp->morphcode;
contclass = dp->contclass;
contclasslen = dp->contclasslen;
@@ -524,8 +503,6 @@ SfxEntry::SfxEntry(AffixMgr* pmgr, affentry* dp)
SfxEntry::~SfxEntry() {
aflag = 0;
- if (rappnd)
- free(rappnd);
pmyMgr = NULL;
if (opts & aeLONGCOND)
free(c.l.conds2);
@@ -536,13 +513,12 @@ SfxEntry::~SfxEntry() {
}
// add suffix to this word assuming conditions hold
-char* SfxEntry::add(const char* word, int len) {
+char* SfxEntry::add(const char* word, size_t len) {
/* make sure all conditions match */
if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
(len >= numconds) && test_condition(word + len, word) &&
(!strip.size() ||
- (strcmp(word + len - strip.size(), strip.c_str()) == 0)) &&
- ((MAXTEMPWORDLEN) > (len + appnd.size() - strip.size()))) {
+ (strcmp(word + len - strip.size(), strip.c_str()) == 0))) {
std::string tword(word);
/* we have a match so add suffix */
tword.replace(len - strip.size(), std::string::npos, appnd);
@@ -699,10 +675,7 @@ struct hentry* SfxEntry::checkword(const char* word,
const FLAG cclass,
const FLAG needflag,
const FLAG badflag) {
- int tmpl; // length of tmpword
struct hentry* he; // hash entry pointer
- unsigned char* cp;
- char tmpword[MAXTEMPWORDLEN];
PfxEntry* ep = ppfx;
// if this suffix is being cross checked with a prefix
@@ -716,7 +689,7 @@ struct hentry* SfxEntry::checkword(const char* word,
// and if there are enough chars in root word and added back strip chars
// to meet the number of characters conditions, then test it
- tmpl = len - appnd.size();
+ int tmpl = len - appnd.size(); // length of tmpword
// the second condition is not enough for UTF-8 strings
// it checked in test_condition()
@@ -726,15 +699,13 @@ struct hentry* SfxEntry::checkword(const char* word,
// back any characters that would have been stripped or
// or null terminating the shorter string
- strncpy(tmpword, word, MAXTEMPWORDLEN - 1);
- tmpword[MAXTEMPWORDLEN - 1] = '\0';
- cp = (unsigned char*)(tmpword + tmpl);
+ std::string tmpstring(word, tmpl);
if (strip.size()) {
- strcpy((char*)cp, strip.c_str());
- tmpl += strip.size();
- cp = (unsigned char*)(tmpword + tmpl);
- } else
- *cp = '\0';
+ tmpstring.append(strip);
+ }
+
+ const char* tmpword = tmpstring.c_str();
+ const char* endword = tmpword + tmpstring.size();
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
@@ -744,7 +715,7 @@ struct hentry* SfxEntry::checkword(const char* word,
// if all conditions are met then check if resulting
// root word in the dictionary
- if (test_condition((char*)cp, (char*)tmpword)) {
+ if (test_condition(endword, tmpword)) {
#ifdef SZOSZABLYA_POSSIBLE_ROOTS
fprintf(stdout, "%s %s %c\n", word, tmpword, aflag);
#endif
@@ -804,10 +775,7 @@ struct hentry* SfxEntry::check_twosfx(const char* word,
int optflags,
PfxEntry* ppfx,
const FLAG needflag) {
- int tmpl; // length of tmpword
struct hentry* he; // hash entry pointer
- unsigned char* cp;
- char tmpword[MAXTEMPWORDLEN];
PfxEntry* ep = ppfx;
// if this suffix is being cross checked with a prefix
@@ -821,7 +789,7 @@ struct hentry* SfxEntry::check_twosfx(const char* word,
// and if there are enough chars in root word and added back strip chars
// to meet the number of characters conditions, then test it
- tmpl = len - appnd.size();
+ int tmpl = len - appnd.size(); // length of tmpword
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
(tmpl + strip.size() >= numconds)) {
@@ -829,15 +797,13 @@ struct hentry* SfxEntry::check_twosfx(const char* word,
// back any characters that would have been stripped or
// or null terminating the shorter string
- strncpy(tmpword, word, MAXTEMPWORDLEN - 1);
- tmpword[MAXTEMPWORDLEN - 1] = '\0';
- cp = (unsigned char*)(tmpword + tmpl);
- if (strip.size()) {
- strcpy((char*)cp, strip.c_str());
- tmpl += strip.size();
- cp = (unsigned char*)(tmpword + tmpl);
- } else
- *cp = '\0';
+ std::string tmpword(word);
+ tmpword.resize(tmpl);
+ tmpword.append(strip);
+ tmpl += strip.size();
+
+ const char* beg = tmpword.c_str();
+ const char* end = beg + tmpl;
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
@@ -846,17 +812,17 @@ struct hentry* SfxEntry::check_twosfx(const char* word,
// if all conditions are met then recall suffix_check
- if (test_condition((char*)cp, (char*)tmpword)) {
+ if (test_condition(end, beg)) {
if (ppfx) {
// handle conditional suffix
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
- he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL,
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, NULL, 0, NULL,
(FLAG)aflag, needflag);
else
- he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0,
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx, NULL, 0,
NULL, (FLAG)aflag, needflag);
} else {
- he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL,
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, NULL, 0, NULL,
(FLAG)aflag, needflag);
}
if (he)
@@ -872,9 +838,6 @@ char* SfxEntry::check_twosfx_morph(const char* word,
int optflags,
PfxEntry* ppfx,
const FLAG needflag) {
- int tmpl; // length of tmpword
- unsigned char* cp;
- char tmpword[MAXTEMPWORDLEN];
PfxEntry* ep = ppfx;
char* st;
@@ -893,7 +856,7 @@ char* SfxEntry::check_twosfx_morph(const char* word,
// and if there are enough chars in root word and added back strip chars
// to meet the number of characters conditions, then test it
- tmpl = len - appnd.size();
+ int tmpl = len - appnd.size(); // length of tmpword
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
(tmpl + strip.size() >= numconds)) {
@@ -901,15 +864,13 @@ char* SfxEntry::check_twosfx_morph(const char* word,
// back any characters that would have been stripped or
// or null terminating the shorter string
- strncpy(tmpword, word, MAXTEMPWORDLEN - 1);
- tmpword[MAXTEMPWORDLEN - 1] = '\0';
- cp = (unsigned char*)(tmpword + tmpl);
- if (strip.size()) {
- strcpy((char*)cp, strip.c_str());
- tmpl += strip.size();
- cp = (unsigned char*)(tmpword + tmpl);
- } else
- *cp = '\0';
+ std::string tmpword(word);
+ tmpword.resize(tmpl);
+ tmpword.append(strip);
+ tmpl += strip.size();
+
+ const char* beg = tmpword.c_str();
+ const char* end = beg + tmpl;
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
@@ -918,11 +879,11 @@ char* SfxEntry::check_twosfx_morph(const char* word,
// if all conditions are met then recall suffix_check
- if (test_condition((char*)cp, (char*)tmpword)) {
+ if (test_condition(end, beg)) {
if (ppfx) {
// handle conditional suffix
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
- st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag,
+ st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag,
needflag);
if (st) {
if (ppfx->getMorph()) {
@@ -934,7 +895,7 @@ char* SfxEntry::check_twosfx_morph(const char* word,
mychomp(result);
}
} else {
- st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag,
+ st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag,
needflag);
if (st) {
mystrcat(result, st, MAXLNLEN);
@@ -944,7 +905,7 @@ char* SfxEntry::check_twosfx_morph(const char* word,
}
} else {
st =
- pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
+ pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag);
if (st) {
mystrcat(result, st, MAXLNLEN);
free(st);