summaryrefslogtreecommitdiff
path: root/libs/hunspell/src/affixmgr.c++
diff options
context:
space:
mode:
authorGeorge Hazan <ghazan@miranda.im>2022-08-30 17:13:21 +0300
committerGeorge Hazan <ghazan@miranda.im>2022-08-30 17:13:21 +0300
commit3ad2f2b7c2bfb3166363239d67a6645692ffb2b6 (patch)
tree0201fd31d0c0e5c193752f7b80cdc69096b563cf /libs/hunspell/src/affixmgr.c++
parentd82b809f6af58a1d10fa503138b912d336dca75e (diff)
fixes #3183 (Update hunspell to 1.7.1)
Diffstat (limited to 'libs/hunspell/src/affixmgr.c++')
-rw-r--r--libs/hunspell/src/affixmgr.c++331
1 files changed, 156 insertions, 175 deletions
diff --git a/libs/hunspell/src/affixmgr.c++ b/libs/hunspell/src/affixmgr.c++
index 90c7eaff33..adb750dba1 100644
--- a/libs/hunspell/src/affixmgr.c++
+++ b/libs/hunspell/src/affixmgr.c++
@@ -1,7 +1,7 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
- * Copyright (C) 2002-2017 Németh László
+ * Copyright (C) 2002-2022 Németh László
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
@@ -72,6 +72,7 @@
#include <string.h>
#include <stdio.h>
#include <ctype.h>
+#include <time.h>
#include <algorithm>
#include <limits>
@@ -96,7 +97,6 @@ AffixMgr::AffixMgr(const char* affpath,
complexprefixes = 0;
parsedmaptable = false;
parsedbreaktable = false;
- parsedrep = false;
iconvtable = NULL;
oconvtable = NULL;
// allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN)
@@ -113,7 +113,7 @@ AffixMgr::AffixMgr(const char* affpath,
compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
compoundmoresuffixes = 0; // allow more suffixes within compound words
checkcompounddup = 0; // forbid double words in compounds
- checkcompoundrep = 0; // forbid bad compounds (may be non compound word with
+ checkcompoundrep = 0; // forbid bad compounds (may be non-compound word with
// a REP substitution)
checkcompoundcase =
0; // forbid upper and lowercase combinations at word bounds
@@ -439,7 +439,7 @@ int AffixMgr::parse_file(const char* affpath, const char* key) {
}
}
- /* parse in the flag used by forbidden words */
+ /* parse in the flag used by forbidden words (is deprecated) */
if (line.compare(0, 13, "LEMMA_PRESENT", 13) == 0) {
if (!parse_flag(line, &lemma_present, afflst)) {
finishFileMgr(afflst);
@@ -463,7 +463,7 @@ int AffixMgr::parse_file(const char* affpath, const char* key) {
}
}
- /* parse in the flag used by `needaffixs' */
+ /* parse in the flag used by `needaffixs' (is deprecated) */
if (line.compare(0, 10, "PSEUDOROOT", 10) == 0) {
if (!parse_flag(line, &needaffix, afflst)) {
finishFileMgr(afflst);
@@ -529,14 +529,6 @@ int AffixMgr::parse_file(const char* affpath, const char* key) {
}
}
- /* parse in the typical fault correcting table */
- if (line.compare(0, 3, "REP", 3) == 0) {
- if (!parse_reptable(line, afflst)) {
- finishFileMgr(afflst);
- return 1;
- }
- }
-
/* parse in the input conversion table */
if (line.compare(0, 5, "ICONV", 5) == 0) {
if (!parse_convtable(line, afflst, &iconvtable, "ICONV")) {
@@ -545,7 +537,7 @@ int AffixMgr::parse_file(const char* affpath, const char* key) {
}
}
- /* parse in the input conversion table */
+ /* parse in the output conversion table */
if (line.compare(0, 5, "OCONV", 5) == 0) {
if (!parse_convtable(line, afflst, &oconvtable, "OCONV")) {
finishFileMgr(afflst);
@@ -1023,7 +1015,7 @@ int AffixMgr::process_sfx_order() {
// add flags to the result for dictionary debugging
std::string& AffixMgr::debugflag(std::string& result, unsigned short flag) {
char* st = encode_flag(flag);
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(MORPH_FLAG);
if (st) {
result.append(st);
@@ -1060,7 +1052,7 @@ int AffixMgr::encodeit(AffEntry& entry, const char* cs) {
} else if (cs[MAXCONDLEN]) {
//there is more conditions than fit in fixed space, so its
//a long condition
- entry.opts += aeLONGCOND;
+ entry.opts |= aeLONGCOND;
entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1);
if (!entry.c.l.conds2)
return 1;
@@ -1146,7 +1138,7 @@ struct hentry* AffixMgr::prefix_check(const char* word,
return NULL;
}
-// check word for prefixes
+// check word for prefixes and two-level suffixes
struct hentry* AffixMgr::prefix_check_twosfx(const char* word,
int len,
char in_compound,
@@ -1187,7 +1179,7 @@ struct hentry* AffixMgr::prefix_check_twosfx(const char* word,
return NULL;
}
-// check word for prefixes
+// check word for prefixes and morph
std::string AffixMgr::prefix_check_morph(const char* word,
int len,
char in_compound,
@@ -1234,7 +1226,7 @@ std::string AffixMgr::prefix_check_morph(const char* word,
return result;
}
-// check word for prefixes
+// check word for prefixes and morph and two-level suffixes
std::string AffixMgr::prefix_check_twosfx_morph(const char* word,
int len,
char in_compound,
@@ -1275,25 +1267,44 @@ std::string AffixMgr::prefix_check_twosfx_morph(const char* word,
return result;
}
-// Is word a non compound with a REP substitution (see checkcompoundrep)?
+// Is word a non-compound with a REP substitution (see checkcompoundrep)?
int AffixMgr::cpdrep_check(const char* word, int wl) {
- if ((wl < 2) || reptable.empty())
+ if ((wl < 2) || get_reptable().empty())
return 0;
- for (size_t i = 0; i < reptable.size(); ++i) {
- const char* r = word;
- const size_t lenp = reptable[i].pattern.size();
- // search every occurence of the pattern in the word
- while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) {
- std::string candidate(word);
- size_t type = r == word && langnum != LANG_hu ? 1 : 0;
- if (r - word + reptable[i].pattern.size() == lenp && langnum != LANG_hu)
- type += 2;
- candidate.replace(r - word, lenp, reptable[i].outstrings[type]);
+ for (size_t i = 0; i < get_reptable().size(); ++i) {
+ // use only available mid patterns
+ if (!get_reptable()[i].outstrings[0].empty()) {
+ const char* r = word;
+ const size_t lenp = get_reptable()[i].pattern.size();
+ // search every occurence of the pattern in the word
+ while ((r = strstr(r, get_reptable()[i].pattern.c_str())) != NULL) {
+ std::string candidate(word);
+ candidate.replace(r - word, lenp, get_reptable()[i].outstrings[0]);
+ if (candidate_check(candidate.c_str(), candidate.size()))
+ return 1;
+ ++r; // search for the next letter
+ }
+ }
+ }
+
+ return 0;
+}
+
+// forbid compound words, if they are in the dictionary as a
+// word pair separated by space
+int AffixMgr::cpdwordpair_check(const char * word, int wl) {
+ if (wl > 2) {
+ std::string candidate(word);
+ for (size_t i = 1; i < candidate.size(); i++) {
+ // go to end of the UTF-8 character
+ if (utf8 && ((word[i] & 0xc0) == 0x80))
+ continue;
+ candidate.insert(i, 1, ' ');
if (candidate_check(candidate.c_str(), candidate.size()))
return 1;
- ++r; // search for the next letter
+ candidate.erase(i, 1);
}
}
@@ -1584,6 +1595,21 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
int checked_prefix;
+ // add a time limit to handle possible
+ // combinatorical explosion of the overlapping words
+
+ HUNSPELL_THREAD_LOCAL clock_t timelimit;
+
+ if (wordnum == 0) {
+ // get the start time, seeing as we're reusing this set to 0
+ // to flag timeout, use clock() + 1 to avoid start clock()
+ // of 0 as being a timeout
+ timelimit = clock() + 1;
+ }
+ else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) {
+ timelimit = 0;
+ }
+
setcminmax(&cmin, &cmax, word.c_str(), len);
st.assign(word);
@@ -1608,6 +1634,9 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
do { // simplified checkcompoundpattern loop
+ if (timelimit == 0)
+ return 0;
+
if (scpd > 0) {
for (; scpd <= checkcpdtable.size() &&
(checkcpdtable[scpd - 1].pattern3.empty() ||
@@ -1647,6 +1676,12 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
affixed = 1;
rv = lookup(st.c_str()); // perhaps without prefix
+ // forbid dictionary stems with COMPOUNDFORBIDFLAG in
+ // compound words, overriding the effect of COMPOUNDPERMITFLAG
+ if ((rv) && compoundforbidflag &&
+ TESTAFF(rv->astr, compoundforbidflag, rv->alen) && !hu_mov_rule)
+ continue;
+
// search homonym with compound flag
while ((rv) && !hu_mov_rule &&
((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
@@ -1854,7 +1889,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
}
// check FORCEUCASE
- if (rv && forceucase &&
+ if (rv && forceucase && (rv) &&
(TESTAFF(rv->astr, forceucase, rv->alen)) &&
!(info && *info & SPELL_ORIGCAP))
rv = NULL;
@@ -1909,9 +1944,10 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
&&
(scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL ||
TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen))) {
- // forbid compound word, if it is a non compound word with typical
+ // forbid compound word, if it is a non-compound word with typical
// fault
- if (checkcompoundrep && cpdrep_check(word.c_str(), len))
+ if ((checkcompoundrep && cpdrep_check(word.c_str(), len)) ||
+ cpdwordpair_check(word.c_str(), len))
return NULL;
return rv_first;
}
@@ -1962,7 +1998,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
}
// check FORCEUCASE
- if (rv && forceucase &&
+ if (rv && forceucase && (rv) &&
(TESTAFF(rv->astr, forceucase, rv->alen)) &&
!(info && *info & SPELL_ORIGCAP))
rv = NULL;
@@ -1989,7 +2025,9 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
if (sfxappnd) {
std::string tmp(sfxappnd);
reverseword(tmp);
- numsyllable -= get_syllable(tmp) + sfxextra;
+ numsyllable -= short(get_syllable(tmp) + sfxextra);
+ } else {
+ numsyllable -= short(sfxextra);
}
// + 1 word, if syllable number of the prefix > 1 (hungarian
@@ -2024,7 +2062,6 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
(TESTAFF(rv->astr, compoundroot, rv->alen))) {
wordnum++;
}
-
// second word is acceptable, as a word with prefix or/and suffix?
// hungarian conventions: compounding is acceptable,
// when compound forms consist 2 word, otherwise
@@ -2033,9 +2070,10 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
(((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) &&
((!checkcompounddup || (rv != rv_first)))) {
- // forbid compound word, if it is a non compound word with typical
+ // forbid compound word, if it is a non-compound word with typical
// fault
- if (checkcompoundrep && cpdrep_check(word.c_str(), len))
+ if ((checkcompoundrep && cpdrep_check(word.c_str(), len)) ||
+ cpdwordpair_check(word.c_str(), len))
return NULL;
return rv_first;
}
@@ -2059,8 +2097,12 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
rv = NULL;
}
if (rv) {
- // forbid compound word, if it is a non compound word with typical
- // fault
+ // forbid compound word, if it is a non-compound word with typical
+ // fault, or a dictionary word pair
+
+ if (cpdwordpair_check(word.c_str(), len))
+ return NULL;
+
if (checkcompoundrep || forbiddenword) {
if (checkcompoundrep && cpdrep_check(word.c_str(), len))
@@ -2071,7 +2113,8 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
char r = st[i + rv->blen];
st[i + rv->blen] = '\0';
- if (checkcompoundrep && cpdrep_check(st.c_str(), i + rv->blen)) {
+ if ((checkcompoundrep && cpdrep_check(st.c_str(), i + rv->blen)) ||
+ cpdwordpair_check(st.c_str(), i + rv->blen)) {
st[ + i + rv->blen] = r;
continue;
}
@@ -2162,6 +2205,21 @@ int AffixMgr::compound_check_morph(const char* word,
char affixed = 0;
hentry** oldwords = words;
+ // add a time limit to handle possible
+ // combinatorical explosion of the overlapping words
+
+ HUNSPELL_THREAD_LOCAL clock_t timelimit;
+
+ if (wordnum == 0) {
+ // get the start time, seeing as we're reusing this set to 0
+ // to flag timeout, use clock() + 1 to avoid start clock()
+ // of 0 as being a timeout
+ timelimit = clock() + 1;
+ }
+ else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) {
+ timelimit = 0;
+ }
+
setcminmax(&cmin, &cmax, word, len);
st.assign(word);
@@ -2180,6 +2238,9 @@ int AffixMgr::compound_check_morph(const char* word,
do { // onlycpdrule loop
+ if (timelimit == 0)
+ return 0;
+
oldnumsyllable = numsyllable;
oldwordnum = wordnum;
checked_prefix = 0;
@@ -2198,6 +2259,12 @@ int AffixMgr::compound_check_morph(const char* word,
rv = lookup(st.c_str()); // perhaps without prefix
+ // forbid dictionary stems with COMPOUNDFORBIDFLAG in
+ // compound words, overriding the effect of COMPOUNDPERMITFLAG
+ if ((rv) && compoundforbidflag &&
+ TESTAFF(rv->astr, compoundforbidflag, rv->alen) && !hu_mov_rule)
+ continue;
+
// search homonym with compound flag
while ((rv) && !hu_mov_rule &&
((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
@@ -2215,6 +2282,9 @@ int AffixMgr::compound_check_morph(const char* word,
rv = rv->next_homonym;
}
+ if (timelimit == 0)
+ return 0;
+
if (rv)
affixed = 0;
@@ -2405,22 +2475,22 @@ int AffixMgr::compound_check_morph(const char* word,
if (rv && words && words[wnum + 1]) {
result.append(presult);
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(MORPH_PART);
result.append(word + i);
if (complexprefixes && HENTRY_DATA(rv))
result.append(HENTRY_DATA2(rv));
if (!HENTRY_FIND(rv, MORPH_STEM)) {
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(MORPH_STEM);
result.append(HENTRY_WORD(rv));
}
// store the pointer of the hash entry
if (!complexprefixes && HENTRY_DATA(rv)) {
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(HENTRY_DATA2(rv));
}
- result.append("\n");
+ result.push_back(MSEP_REC);
return 0;
}
@@ -2462,7 +2532,7 @@ int AffixMgr::compound_check_morph(const char* word,
((!checkcompounddup || (rv != rv_first)))) {
// bad compound word
result.append(presult);
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(MORPH_PART);
result.append(word + i);
@@ -2470,17 +2540,17 @@ int AffixMgr::compound_check_morph(const char* word,
if (complexprefixes)
result.append(HENTRY_DATA2(rv));
if (!HENTRY_FIND(rv, MORPH_STEM)) {
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(MORPH_STEM);
result.append(HENTRY_WORD(rv));
}
// store the pointer of the hash entry
if (!complexprefixes) {
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(HENTRY_DATA2(rv));
}
}
- result.append("\n");
+ result.push_back(MSEP_REC);
ok = 1;
}
@@ -2519,7 +2589,7 @@ int AffixMgr::compound_check_morph(const char* word,
line_uniq_app(m, MSEP_REC);
result.append(m);
}
- result.append("\n");
+ result.push_back(MSEP_REC);
ok = 1;
}
}
@@ -2552,7 +2622,9 @@ int AffixMgr::compound_check_morph(const char* word,
if (sfxappnd) {
std::string tmp(sfxappnd);
reverseword(tmp);
- numsyllable -= get_syllable(tmp) + sfxextra;
+ numsyllable -= short(get_syllable(tmp) + sfxextra);
+ } else {
+ numsyllable -= short(sfxextra);
}
// + 1 word, if syllable number of the prefix > 1 (hungarian
@@ -2605,8 +2677,9 @@ int AffixMgr::compound_check_morph(const char* word,
if (!m.empty()) {
result.push_back(MSEP_FLD);
result.append(MORPH_PART);
- result.append(word + 1);
+ result.append(word + i);
line_uniq_app(m, MSEP_REC);
+ result.push_back(MSEP_FLD);
result.append(m);
}
result.push_back(MSEP_REC);
@@ -2769,7 +2842,6 @@ struct hentry* AffixMgr::suffix_check(const char* word,
}
// check word for two-level suffixes
-
struct hentry* AffixMgr::suffix_check_twosfx(const char* word,
int len,
int sfxopts,
@@ -2814,6 +2886,7 @@ struct hentry* AffixMgr::suffix_check_twosfx(const char* word,
return NULL;
}
+// check word for two-level suffixes and morph
std::string AffixMgr::suffix_check_twosfx_morph(const char* word,
int len,
int sfxopts,
@@ -2832,17 +2905,17 @@ std::string AffixMgr::suffix_check_twosfx_morph(const char* word,
if (ppfx) {
if (ppfx->getMorph()) {
result.append(ppfx->getMorph());
- result.append(" ");
+ result.push_back(MSEP_FLD);
} else
debugflag(result, ppfx->getFlag());
}
result.append(st);
if (se->getMorph()) {
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(se->getMorph());
} else
debugflag(result, se->getFlag());
- result.append("\n");
+ result.push_back(MSEP_REC);
}
}
se = se->getNext();
@@ -2867,12 +2940,12 @@ std::string AffixMgr::suffix_check_twosfx_morph(const char* word,
result3.clear();
if (sptr->getMorph()) {
- result3.append(" ");
+ result3.push_back(MSEP_FLD);
result3.append(sptr->getMorph());
} else
debugflag(result3, sptr->getFlag());
strlinecat(result2, result3);
- result2.append("\n");
+ result2.push_back(MSEP_REC);
result.append(result2);
}
}
@@ -2935,28 +3008,28 @@ std::string AffixMgr::suffix_check_morph(const char* word,
if (ppfx) {
if (ppfx->getMorph()) {
result.append(ppfx->getMorph());
- result.append(" ");
+ result.push_back(MSEP_FLD);
} else
debugflag(result, ppfx->getFlag());
}
if (complexprefixes && HENTRY_DATA(rv))
result.append(HENTRY_DATA2(rv));
if (!HENTRY_FIND(rv, MORPH_STEM)) {
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(MORPH_STEM);
result.append(HENTRY_WORD(rv));
}
if (!complexprefixes && HENTRY_DATA(rv)) {
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(HENTRY_DATA2(rv));
}
if (se->getMorph()) {
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(se->getMorph());
} else
debugflag(result, se->getFlag());
- result.append("\n");
+ result.push_back(MSEP_REC);
rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
}
}
@@ -3002,29 +3075,29 @@ std::string AffixMgr::suffix_check_morph(const char* word,
if (ppfx) {
if (ppfx->getMorph()) {
result.append(ppfx->getMorph());
- result.append(" ");
+ result.push_back(MSEP_FLD);
} else
debugflag(result, ppfx->getFlag());
}
if (complexprefixes && HENTRY_DATA(rv))
result.append(HENTRY_DATA2(rv));
if (!HENTRY_FIND(rv, MORPH_STEM)) {
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(MORPH_STEM);
result.append(HENTRY_WORD(rv));
}
if (!complexprefixes && HENTRY_DATA(rv)) {
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(HENTRY_DATA2(rv));
}
if (sptr->getMorph()) {
- result.append(" ");
+ result.push_back(MSEP_FLD);
result.append(sptr->getMorph());
} else
debugflag(result, sptr->getFlag());
- result.append("\n");
+ result.push_back(MSEP_REC);
rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
}
sptr = sptr->getNextEQ();
@@ -3213,7 +3286,7 @@ std::string AffixMgr::morphgen(const char* ts,
// use input suffix fields, if exist
if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) {
mymorph.assign(morph);
- mymorph.append(" ");
+ mymorph.push_back(MSEP_FLD);
stemmorphcatpos = mymorph.size();
} else {
stemmorphcatpos = std::string::npos;
@@ -3414,7 +3487,7 @@ int AffixMgr::expand_rootword(struct guessword* wlst,
// return replacing table
const std::vector<replentry>& AffixMgr::get_reptable() const {
- return reptable;
+ return pHMgr->get_reptable();
}
// return iconv table
@@ -3554,6 +3627,11 @@ FLAG AffixMgr::get_nongramsuggest() const {
return nongramsuggest;
}
+// return the substandard root/affix control flag
+FLAG AffixMgr::get_substandard() const {
+ return substandard;
+}
+
// return the forbidden words flag modify flag
FLAG AffixMgr::get_needaffix() const {
return needaffix;
@@ -3692,103 +3770,6 @@ bool AffixMgr::parse_cpdsyllable(const std::string& line, FileMgr* af) {
return true;
}
-/* parse in the typical fault correcting table */
-bool AffixMgr::parse_reptable(const std::string& line, FileMgr* af) {
- if (parsedrep) {
- HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
- af->getlinenum());
- return false;
- }
- parsedrep = true;
- int numrep = -1;
- int i = 0;
- int np = 0;
- std::string::const_iterator iter = line.begin();
- std::string::const_iterator start_piece = mystrsep(line, iter);
- while (start_piece != line.end()) {
- switch (i) {
- case 0: {
- np++;
- break;
- }
- case 1: {
- numrep = atoi(std::string(start_piece, iter).c_str());
- if (numrep < 1) {
- HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n",
- af->getlinenum());
- return false;
- }
- reptable.reserve(numrep);
- np++;
- break;
- }
- default:
- break;
- }
- ++i;
- start_piece = mystrsep(line, iter);
- }
- if (np != 2) {
- HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
- af->getlinenum());
- return false;
- }
-
- /* now parse the numrep lines to read in the remainder of the table */
- for (int j = 0; j < numrep; ++j) {
- std::string nl;
- if (!af->getline(nl))
- return false;
- mychomp(nl);
- reptable.push_back(replentry());
- iter = nl.begin();
- i = 0;
- int type = 0;
- start_piece = mystrsep(nl, iter);
- while (start_piece != nl.end()) {
- switch (i) {
- case 0: {
- if (nl.compare(start_piece - nl.begin(), 3, "REP", 3) != 0) {
- HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
- af->getlinenum());
- reptable.clear();
- return false;
- }
- break;
- }
- case 1: {
- if (*start_piece == '^')
- type = 1;
- reptable.back().pattern.assign(start_piece + type, iter);
- mystrrep(reptable.back().pattern, "_", " ");
- if (!reptable.back().pattern.empty() && reptable.back().pattern[reptable.back().pattern.size() - 1] == '$') {
- type += 2;
- reptable.back().pattern.resize(reptable.back().pattern.size() - 1);
- }
- break;
- }
- case 2: {
- reptable.back().outstrings[type].assign(start_piece, iter);
- mystrrep(reptable.back().outstrings[type], "_", " ");
- break;
- }
- default:
- break;
- }
- ++i;
- start_piece = mystrsep(nl, iter);
- }
- if (reptable.back().pattern.empty() || reptable.back().outstrings[type].empty()) {
- HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
- af->getlinenum());
- reptable.clear();
- return false;
- }
- }
- return true;
-}
-
-/* parse in the typical fault correcting table */
bool AffixMgr::parse_convtable(const std::string& line,
FileMgr* af,
RepList** rl,
@@ -4386,7 +4367,7 @@ void AffixMgr::reverse_condition(std::string& piece) {
case '^': {
if (*(k - 1) == ']')
neg = 1;
- else
+ else if (neg)
*(k - 1) = *k;
break;
}
@@ -4519,11 +4500,11 @@ bool AffixMgr::parse_affix(const std::string& line,
char opts = ff;
if (utf8)
- opts += aeUTF8;
+ opts |= aeUTF8;
if (pHMgr->is_aliasf())
- opts += aeALIASF;
+ opts |= aeALIASF;
if (pHMgr->is_aliasm())
- opts += aeALIASM;
+ opts |= aeALIASM;
affentries.initialize(numents, opts, aflag);
}
@@ -4617,7 +4598,7 @@ bool AffixMgr::parse_affix(const std::string& line,
entry->appnd = std::string(start_piece, dash);
std::string dash_str(dash + 1, iter);
- if (!ignorechars.empty()) {
+ if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) {
if (utf8) {
remove_ignored_chars_utf(entry->appnd, ignorechars_utf16);
} else {
@@ -4653,7 +4634,7 @@ bool AffixMgr::parse_affix(const std::string& line,
} else {
entry->appnd = std::string(start_piece, iter);
- if (!ignorechars.empty()) {
+ if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) {
if (utf8) {
remove_ignored_chars_utf(entry->appnd, ignorechars_utf16);
} else {