diff options
author | Tobias Weimer <wishmaster51@googlemail.com> | 2014-07-20 08:01:05 +0000 |
---|---|---|
committer | Tobias Weimer <wishmaster51@googlemail.com> | 2014-07-20 08:01:05 +0000 |
commit | d5dfa90eec9c108f86b9a0aa3f746a5b164c6649 (patch) | |
tree | e6eb1ae6ca89db58494b28d6c5124ea94c517c37 | |
parent | 42c1d344fd105bcc25ae35e5a82d16a978b302c4 (diff) |
SpellChecker:
-Updated Hunspell to 1.3.3
-Create services in Load()
git-svn-id: http://svn.miranda-ng.org/main/trunk@9876 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
24 files changed, 460 insertions, 274 deletions
diff --git a/plugins/SpellChecker/src/hunspell/affentry.cxx b/plugins/SpellChecker/src/hunspell/affentry.cxx index 6406f2577a..435fef804e 100644 --- a/plugins/SpellChecker/src/hunspell/affentry.cxx +++ b/plugins/SpellChecker/src/hunspell/affentry.cxx @@ -1,12 +1,16 @@ -#include "..\commons.h"
+#include "..\commons.h" + +#define MAXTEMPWORDLEN (MAXWORDUTF8LEN + 4) PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp) + // register affix manager + : pmyMgr(pmgr) + , next(NULL) + , nexteq(NULL) + , nextne(NULL) + , flgnxt(NULL) { - // register affix manager - pmyMgr = pmgr; - // set up its initial values - aflag = dp->aflag; // flag strip = dp->strip; // string to strip appnd = dp->appnd; // string to append @@ -19,9 +23,6 @@ PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp) memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1); c.l.conds2 = dp->c.l.conds2; } else memcpy(c.conds, dp->c.conds, MAXCONDLEN); - next = NULL; - nextne = NULL; - nexteq = NULL; morphcode = dp->morphcode; contclass = dp->contclass; contclasslen = dp->contclasslen; @@ -44,16 +45,17 @@ PfxEntry::~PfxEntry() // add prefix to this word assuming conditions hold char * PfxEntry::add(const char * word, int len) { - char tword[MAXWORDUTF8LEN + 4]; + char tword[MAXTEMPWORDLEN]; if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) && (len >= numconds) && test_condition(word) && (!stripl || (strncmp(word, strip, stripl) == 0)) && - ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) { + ((MAXTEMPWORDLEN) > (len + appndl - stripl))) { /* we have a match so add prefix */ char * pp = tword; if (appndl) { - strcpy(tword,appnd); + strncpy(tword, appnd, MAXTEMPWORDLEN-1); + tword[MAXTEMPWORDLEN-1] = '\0'; pp += appndl; } strcpy(pp, (word + stripl)); @@ -101,13 +103,15 @@ inline int PfxEntry::test_condition(const char * st) if (*st == '\0' && p) return 0; // word <= condition break; } - case '.': if (!pos) { // dots are not metacharacters in groups: [.] + case '.': + if (!pos) { // dots are not metacharacters in groups: [.] p = nextchar(p); // skip the next character for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++); if (*st == '\0' && p) return 0; // word <= condition break; } + /* FALLTHROUGH */ default: { if (*st == *p) { st++; @@ -124,11 +128,11 @@ inline int PfxEntry::test_condition(const char * st) } if (pos && st != pos) { ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); } } else if (pos) { ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); } } else if (pos) { // group p = nextchar(p); @@ -144,7 +148,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound { int tmpl; // length of tmpword struct hentry * he; // hash entry of root word or NULL - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; // on entry prefix is 0 length or already matches the beginning of the word. // So if the remaining root word has positive length @@ -158,7 +162,10 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -205,7 +212,7 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len, { int tmpl; // length of tmpword struct hentry * he; // hash entry of root word or NULL - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; // on entry prefix is 0 length or already matches the beginning of the word. // So if the remaining root word has positive length @@ -220,7 +227,10 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len, // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -252,7 +262,7 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len, char in_compound, const FLAG needflag) { int tmpl; // length of tmpword - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; // on entry prefix is 0 length or already matches the beginning of the word. // So if the remaining root word has positive length @@ -267,7 +277,10 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len, // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -299,7 +312,7 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const { int tmpl; // length of tmpword struct hentry * he; // hash entry of root word or NULL - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; char result[MAXLNLEN]; char * st; @@ -318,7 +331,10 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -386,10 +402,15 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const } SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp) + : pmyMgr(pmgr) // register affix manager + , next(NULL) + , nexteq(NULL) + , nextne(NULL) + , flgnxt(NULL) + , l_morph(NULL) + , r_morph(NULL) + , eq_morph(NULL) { - // register affix manager - pmyMgr = pmgr; - // set up its initial values aflag = dp->aflag; // char flag strip = dp->strip; // string to strip @@ -404,7 +425,6 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp) memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1); c.l.conds2 = dp->c.l.conds2; } else memcpy(c.conds, dp->c.conds, MAXCONDLEN); - rappnd = myrevstrdup(appnd); morphcode = dp->morphcode; contclass = dp->contclass; @@ -429,15 +449,16 @@ SfxEntry::~SfxEntry() // add suffix to this word assuming conditions hold char * SfxEntry::add(const char * word, int len) { - char tword[MAXWORDUTF8LEN + 4]; + char tword[MAXTEMPWORDLEN]; /* make sure all conditions match */ if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) && (len >= numconds) && test_condition(word + len, word) && (!stripl || (strcmp(word + len - stripl, strip) == 0)) && - ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) { + ((MAXTEMPWORDLEN) > (len + appndl - stripl))) { /* we have a match so add suffix */ - strcpy(tword,word); + strncpy(tword, word, MAXTEMPWORDLEN-1); + tword[MAXTEMPWORDLEN-1] = '\0'; if (appndl) { strcpy(tword + len - stripl, appnd); } else { @@ -472,24 +493,37 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) int i = 1; while (1) { switch (*p) { - case '\0': return 1; - case '[': { p = nextchar(p); pos = st; break; } - case '^': { p = nextchar(p); neg = true; break; } - case ']': { if (!neg && !ingroup) return 0; - i++; - // skip the next character - if (!ingroup) { - for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--); - st--; - } - pos = NULL; - neg = false; - ingroup = false; - p = nextchar(p); - if (st < beg && p) return 0; // word <= condition - break; - } - case '.': if (!pos) { // dots are not metacharacters in groups: [.] + case '\0': + return 1; + case '[': + p = nextchar(p); + pos = st; + break; + case '^': + p = nextchar(p); + neg = true; + break; + case ']': + if (!neg && !ingroup) + return 0; + i++; + // skip the next character + if (!ingroup) + { + for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--); + st--; + } + pos = NULL; + neg = false; + ingroup = false; + p = nextchar(p); + if (st < beg && p) + return 0; // word <= condition + break; + case '.': + if (!pos) + { + // dots are not metacharacters in groups: [.] p = nextchar(p); // skip the next character for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--); @@ -504,6 +538,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) } break; } + /* FALLTHROUGH */ default: { if (*st == *p) { p = nextchar(p); @@ -524,7 +559,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) if (neg) return 0; else if (i == numconds) return 1; ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); st--; } if (p && *p != ']') p = nextchar(p); @@ -532,7 +567,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) if (neg) return 0; else if (i == numconds) return 1; ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); // if (p && *p != ']') p = nextchar(p); st--; } @@ -558,7 +593,7 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, int tmpl; // length of tmpword struct hentry * he; // hash entry pointer unsigned char * cp; - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; PfxEntry* ep = ppfx; // if this suffix is being cross checked with a prefix @@ -583,7 +618,8 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, // back any characters that would have been stripped or // or null terminating the shorter string - strcpy (tmpword, word); + strncpy (tmpword, word, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; cp = (unsigned char *)(tmpword + tmpl); if (stripl) { strcpy ((char *)cp, strip); @@ -636,7 +672,10 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, } else if (wlst && (*ns < maxSug)) { int cwrd = 1; for (int k=0; k < *ns; k++) - if (strcmp(tmpword, wlst[k]) == 0) cwrd = 0; + if (strcmp(tmpword, wlst[k]) == 0) { + cwrd = 0; + break; + } if (cwrd) { wlst[*ns] = mystrdup(tmpword); if (wlst[*ns] == NULL) { @@ -659,7 +698,7 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags, int tmpl; // length of tmpword struct hentry * he; // hash entry pointer unsigned char * cp; - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; PfxEntry* ep = ppfx; @@ -683,7 +722,8 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags, // back any characters that would have been stripped or // or null terminating the shorter string - strcpy (tmpword, word); + strncpy(tmpword, word, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; cp = (unsigned char *)(tmpword + tmpl); if (stripl) { strcpy ((char *)cp, strip); @@ -720,7 +760,7 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags, { int tmpl; // length of tmpword unsigned char * cp; - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; PfxEntry* ep = ppfx; char * st; @@ -748,7 +788,8 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags, // back any characters that would have been stripped or // or null terminating the shorter string - strcpy (tmpword, word); + strncpy(tmpword, word, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; cp = (unsigned char *)(tmpword + tmpl); if (stripl) { strcpy ((char *)cp, strip); diff --git a/plugins/SpellChecker/src/hunspell/affentry.hxx b/plugins/SpellChecker/src/hunspell/affentry.hxx index eaf361fcce..923ee5ffed 100644 --- a/plugins/SpellChecker/src/hunspell/affentry.hxx +++ b/plugins/SpellChecker/src/hunspell/affentry.hxx @@ -11,6 +11,10 @@ class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry { +private: + PfxEntry(const PfxEntry&); + PfxEntry& operator = (const PfxEntry&); +private: AffixMgr* pmyMgr; PfxEntry * next; @@ -67,6 +71,10 @@ public: class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry { +private: + SfxEntry(const SfxEntry&); + SfxEntry& operator = (const SfxEntry&); +private: AffixMgr* pmyMgr; char * rappnd; diff --git a/plugins/SpellChecker/src/hunspell/affixmgr.cxx b/plugins/SpellChecker/src/hunspell/affixmgr.cxx index 59313bfba0..bd998f6369 100644 --- a/plugins/SpellChecker/src/hunspell/affixmgr.cxx +++ b/plugins/SpellChecker/src/hunspell/affixmgr.cxx @@ -1,4 +1,4 @@ -#include "..\commons.h"
+#include "..\commons.h" AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key) { @@ -34,6 +34,7 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * k compoundroot = FLAG_NULL; // compound word signing flag compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word + compoundmoresuffixes = 0; // allow more suffixes within compound words checkcompounddup = 0; // forbid double words in compounds checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution) checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds @@ -239,6 +240,14 @@ AffixMgr::~AffixMgr() #endif } +void AffixMgr::finishFileMgr(FileMgr *afflst) +{ + delete afflst; + + // convert affix trees to sorted list + process_pfx_tree_to_list(); + process_sfx_tree_to_list(); +} // read in aff file and build up prefix and suffix entry objects int AffixMgr::parse_file(const char * affpath, const char * key) @@ -265,7 +274,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) // read in each line ignoring any that do not // start with a known line type indicator - while ((line = afflst->getline())) { + while ((line = afflst->getline()) != NULL) { mychomp(line); /* remove byte order mark */ @@ -280,7 +289,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the keyboard string */ if (strncmp(line,"KEY",3) == 0) { if (parse_string(line, &keystring, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -288,7 +297,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the try string */ if (strncmp(line,"TRY",3) == 0) { if (parse_string(line, &trystring, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -296,7 +305,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the name of the character set used by the .dict and .aff */ if (strncmp(line,"SET",3) == 0) { if (parse_string(line, &encoding, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } if (strcmp(encoding, "UTF-8") == 0) { @@ -316,7 +325,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by the controlled compound words */ if (strncmp(line,"COMPOUNDFLAG",12) == 0) { if (parse_flag(line, &compoundflag, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -325,12 +334,12 @@ int AffixMgr::parse_file(const char * affpath, const char * key) if (strncmp(line,"COMPOUNDBEGIN",13) == 0) { if (complexprefixes) { if (parse_flag(line, &compoundend, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } else { if (parse_flag(line, &compoundbegin, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -339,7 +348,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by compound words */ if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) { if (parse_flag(line, &compoundmiddle, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -347,12 +356,12 @@ int AffixMgr::parse_file(const char * affpath, const char * key) if (strncmp(line,"COMPOUNDEND",11) == 0) { if (complexprefixes) { if (parse_flag(line, &compoundbegin, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } else { if (parse_flag(line, &compoundend, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -361,7 +370,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the data used by compound_check() method */ if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) { if (parse_num(line, &cpdwordmax, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -369,7 +378,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag sign compounds in dictionary */ if (strncmp(line,"COMPOUNDROOT",12) == 0) { if (parse_flag(line, &compoundroot, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -377,7 +386,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by compound_check() method */ if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) { if (parse_flag(line, &compoundpermitflag, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -385,11 +394,15 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by compound_check() method */ if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) { if (parse_flag(line, &compoundforbidflag, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } + if (strncmp(line,"COMPOUNDMORESUFFIXES",20) == 0) { + compoundmoresuffixes = 1; + } + if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) { checkcompounddup = 1; } @@ -412,14 +425,14 @@ int AffixMgr::parse_file(const char * affpath, const char * key) if (strncmp(line,"NOSUGGEST",9) == 0) { if (parse_flag(line, &nosuggest, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } if (strncmp(line,"NONGRAMSUGGEST",14) == 0) { if (parse_flag(line, &nongramsuggest, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -427,7 +440,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by forbidden words */ if (strncmp(line,"FORBIDDENWORD",13) == 0) { if (parse_flag(line, &forbiddenword, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -435,7 +448,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by forbidden words */ if (strncmp(line,"LEMMA_PRESENT",13) == 0) { if (parse_flag(line, &lemma_present, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -443,7 +456,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by circumfixes */ if (strncmp(line,"CIRCUMFIX",9) == 0) { if (parse_flag(line, &circumfix, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -451,7 +464,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by fogemorphemes */ if (strncmp(line,"ONLYINCOMPOUND",14) == 0) { if (parse_flag(line, &onlyincompound, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -459,7 +472,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by `needaffixs' */ if (strncmp(line,"PSEUDOROOT",10) == 0) { if (parse_flag(line, &needaffix, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -467,7 +480,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by `needaffixs' */ if (strncmp(line,"NEEDAFFIX",9) == 0) { if (parse_flag(line, &needaffix, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -475,7 +488,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the minimal length for words in compounds */ if (strncmp(line,"COMPOUNDMIN",11) == 0) { if (parse_num(line, &cpdmin, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } if (cpdmin < 1) cpdmin = 1; @@ -484,7 +497,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the max. words and syllables in compounds */ if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) { if (parse_cpdsyllable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -492,7 +505,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by compound_check() method */ if (strncmp(line,"SYLLABLENUM",11) == 0) { if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -505,7 +518,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the extra word characters */ if (strncmp(line,"WORDCHARS",9) == 0) { if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, utf8, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -513,7 +526,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the ignored characters (for example, Arabic optional diacretics charachters */ if (strncmp(line,"IGNORE",6) == 0) { if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, utf8, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -521,7 +534,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the typical fault correcting table */ if (strncmp(line,"REP",3) == 0) { if (parse_reptable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -529,7 +542,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the input conversion table */ if (strncmp(line,"ICONV",5) == 0) { if (parse_convtable(line, afflst, &iconvtable, "ICONV")) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -537,7 +550,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the input conversion table */ if (strncmp(line,"OCONV",5) == 0) { if (parse_convtable(line, afflst, &oconvtable, "OCONV")) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -545,7 +558,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the phonetic translation table */ if (strncmp(line,"PHONE",5) == 0) { if (parse_phonetable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -553,7 +566,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the checkcompoundpattern table */ if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) { if (parse_checkcpdtable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -561,7 +574,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the defcompound table */ if (strncmp(line,"COMPOUNDRULE",12) == 0) { if (parse_defcpdtable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -569,7 +582,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the related character map table */ if (strncmp(line,"MAP",3) == 0) { if (parse_maptable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -577,7 +590,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the word breakpoints table */ if (strncmp(line,"BREAK",5) == 0) { if (parse_breaktable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -585,7 +598,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the language for language specific codes */ if (strncmp(line,"LANG",4) == 0) { if (parse_string(line, &lang, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } langnum = get_lang_num(lang); @@ -598,7 +611,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) if (strncmp(line,"MAXNGRAMSUGS",12) == 0) { if (parse_num(line, &maxngramsugs, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -608,14 +621,14 @@ int AffixMgr::parse_file(const char * affpath, const char * key) if (strncmp(line,"MAXDIFF",7) == 0) { if (parse_num(line, &maxdiff, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } if (strncmp(line,"MAXCPDSUGS",10) == 0) { if (parse_num(line, &maxcpdsugs, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -635,7 +648,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by forbidden words */ if (strncmp(line,"KEEPCASE",8) == 0) { if (parse_flag(line, &keepcase, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -643,7 +656,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by `forceucase' */ if (strncmp(line,"FORCEUCASE",10) == 0) { if (parse_flag(line, &forceucase, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -651,7 +664,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by `warn' */ if (strncmp(line,"WARN",4) == 0) { if (parse_flag(line, &warn, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -663,7 +676,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by the affix generator */ if (strncmp(line,"SUBSTANDARD",11) == 0) { if (parse_flag(line, &substandard, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -682,19 +695,14 @@ int AffixMgr::parse_file(const char * affpath, const char * key) dupflags_ini = 0; } if (parse_affix(line, ft, afflst, dupflags)) { - delete afflst; - process_pfx_tree_to_list(); - process_sfx_tree_to_list(); + finishFileMgr(afflst); return 1; } } - } - delete afflst; - // convert affix trees to sorted list - process_pfx_tree_to_list(); - process_sfx_tree_to_list(); + finishFileMgr(afflst); + // affix trees are sorted now // now we can speed up performance greatly taking advantage of the // relationship between the affixes and the idea of "subsets". @@ -807,7 +815,7 @@ int AffixMgr::build_pfxtree(PfxEntry* pfxptr) pptr = NULL; for (;;) { pptr = ptr; - if (strcmp(ep->getKey(), ptr->getKey()) <= 0) { + if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) { ptr = ptr->getNextEQ(); if (!ptr) { pptr->setNextEQ(ep); @@ -871,7 +879,7 @@ int AffixMgr::build_sfxtree(SfxEntry* sfxptr) pptr = NULL; for (;;) { pptr = ptr; - if (strcmp(ep->getKey(), ptr->getKey()) <= 0) { + if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) { ptr = ptr->getNextEQ(); if (!ptr) { pptr->setNextEQ(ep); @@ -951,7 +959,7 @@ int AffixMgr::process_pfx_order() PfxEntry * nptr = ptr->getNext(); for (; nptr != NULL; nptr = nptr->getNext()) { - if (! isSubset( ptr->getKey() , nptr->getKey())) break; + if (! isSubset( ptr->getKey() , nptr->getKey() )) break; } ptr->setNextNE(nptr); ptr->setNextEQ(NULL); @@ -1305,7 +1313,7 @@ int AffixMgr::cpdrep_check(const char * word, int wl) } // forbid compoundings when there are special patterns at word bound -int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char affixed) +int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char /*affixed*/) { int len; for (int i = 0; i < numcheckcpd; i++) { @@ -1318,7 +1326,7 @@ int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, // zero pattern (0/flag) => unmodified stem (zero affixes allowed) (!*(checkcpdtable[i].pattern) || ( (*(checkcpdtable[i].pattern)=='0' && r1->blen <= pos && strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) || - (*(checkcpdtable[i].pattern)!='0' && (len = strlen(checkcpdtable[i].pattern)) && + (*(checkcpdtable[i].pattern)!='0' && ((len = strlen(checkcpdtable[i].pattern)) != 0) && strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) { return 1; } @@ -1379,7 +1387,10 @@ int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** for (i = 0; i < numdefcpd; i++) { for (j = 0; j < defcpdtable[i].len; j++) { if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' && - TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) ok = 1; + TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) { + ok = 1; + break; + } } } if (ok == 0) { @@ -1530,7 +1541,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len, int oldlen = 0; int checkedstriple = 0; int onlycpdrule; - int affixed = 0; + char affixed = 0; hentry ** oldwords = words; int checked_prefix; @@ -1612,8 +1623,9 @@ struct hentry * AffixMgr::compound_check(const char * word, int len, if (onlycpdrule) break; if (compoundflag && !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) { - if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, - FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule && + if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, + FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule && sfx->getCont() && ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())) || (compoundend && @@ -1626,9 +1638,11 @@ struct hentry * AffixMgr::compound_check(const char * word, int len, if (rv || (((wordnum == 0) && compoundbegin && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffixes + compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) || ((wordnum > 0) && compoundmiddle && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffixes + compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle))))) ) checked_prefix = 1; // else check forbiddenwords and needaffix @@ -2031,7 +2045,7 @@ int AffixMgr::compound_check_morph(const char * word, int len, int cmax; int onlycpdrule; - int affixed = 0; + char affixed = 0; hentry ** oldwords = words; setcminmax(&cmin, &cmax, word, len); @@ -2101,11 +2115,12 @@ int AffixMgr::compound_check_morph(const char * word, int len, } if (!rv) { - if (onlycpdrule) break; + if (onlycpdrule && strlen(*result) > MAXLNLEN/10) break; if (compoundflag && !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) { - if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, - FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule && + if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, + FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule && sfx->getCont() && ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())) || (compoundend && @@ -2118,9 +2133,11 @@ int AffixMgr::compound_check_morph(const char * word, int len, if (rv || (((wordnum == 0) && compoundbegin && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffix+compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) || ((wordnum > 0) && compoundmiddle && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffix+compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle))))) ) { // char * p = prefix_check_morph(st, i, 0, compound); @@ -3540,7 +3557,7 @@ int AffixMgr::parse_reptable(char * line, FileMgr * af) /* now parse the numrep lines to read in the remainder of the table */ char * nl; for (int j=0; j < numrep; j++) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; @@ -3637,7 +3654,7 @@ int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const c if (*piece != '\0') { switch(i) { case 0: { - if (strncmp(piece, keyword, sizeof(keyword)) != 0) { + if (strncmp(piece, keyword, strlen(keyword)) != 0) { HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); delete *rl; *rl = NULL; @@ -4244,7 +4261,7 @@ int AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupf std::vector<affentry>::iterator start = affentries.begin(); std::vector<affentry>::iterator end = affentries.end(); for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; diff --git a/plugins/SpellChecker/src/hunspell/affixmgr.hxx b/plugins/SpellChecker/src/hunspell/affixmgr.hxx index d9c625aed0..736816f04e 100644 --- a/plugins/SpellChecker/src/hunspell/affixmgr.hxx +++ b/plugins/SpellChecker/src/hunspell/affixmgr.hxx @@ -41,6 +41,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr FLAG compoundroot; FLAG compoundforbidflag; FLAG compoundpermitflag; + int compoundmoresuffixes; int checkcompounddup; int checkcompoundrep; int checkcompoundcase; @@ -244,6 +245,7 @@ private: int process_sfx_tree_to_list(); int redundant_condition(char, char * strip, int stripl, const char * cond, int); + void finishFileMgr(FileMgr *afflst); }; #endif diff --git a/plugins/SpellChecker/src/hunspell/atypes.hxx b/plugins/SpellChecker/src/hunspell/atypes.hxx index df27c4d1cb..61c59d5ff9 100644 --- a/plugins/SpellChecker/src/hunspell/atypes.hxx +++ b/plugins/SpellChecker/src/hunspell/atypes.hxx @@ -57,7 +57,7 @@ static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {} #define FLAG_NULL 0x00 #define FREE_FLAG(a) a = 0 -#define TESTAFF( a, b , c ) flag_bsearch((unsigned short *) a, (unsigned short) b, c) +#define TESTAFF( a, b , c ) (flag_bsearch((unsigned short *) a, (unsigned short) b, c)) struct affentry { diff --git a/plugins/SpellChecker/src/hunspell/baseaffix.hxx b/plugins/SpellChecker/src/hunspell/baseaffix.hxx index ed64f3d84f..f417acaa44 100644 --- a/plugins/SpellChecker/src/hunspell/baseaffix.hxx +++ b/plugins/SpellChecker/src/hunspell/baseaffix.hxx @@ -5,7 +5,11 @@ class LIBHUNSPELL_DLL_EXPORTED AffEntry { +private: + AffEntry(const AffEntry&); + AffEntry& operator = (const AffEntry&); protected: + AffEntry() {} char * appnd; char * strip; unsigned char appndl; diff --git a/plugins/SpellChecker/src/hunspell/csutil.cxx b/plugins/SpellChecker/src/hunspell/csutil.cxx index 7284f9cdfd..cf24bc06dd 100644 --- a/plugins/SpellChecker/src/hunspell/csutil.cxx +++ b/plugins/SpellChecker/src/hunspell/csutil.cxx @@ -1,4 +1,4 @@ -#include "..\commons.h"
+#include "..\commons.h" // Unicode character encoding information struct unicode_info { @@ -36,6 +36,21 @@ struct unicode_info2 { static struct unicode_info2 * utf_tbl = NULL; static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances +FILE * myfopen(const char * path, const char * mode) { +#ifdef _WIN32 +#define WIN32_LONG_PATH_PREFIX "\\\\?\\" + if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) { + int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0); + wchar_t *buff = (wchar_t *) malloc(len * sizeof(wchar_t)); + MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len); + FILE * f = _wfopen(buff, (strcmp(mode, "r") == 0) ? L"r" : L"rb"); + free(buff); + return f; + } +#endif + return fopen(path, mode); +} + /* only UTF-16 (BMP) implementation */ char * u16_u8(char * dest, int size, const w_char * src, int srclen) { signed char * u8 = (signed char *)dest; @@ -332,7 +347,10 @@ char * line_uniq(char * text, char breakchar) { for ( i = 1; i < linenum; i++ ) { int dup = 0; for (int j = 0; j < i; j++) { - if (strcmp(lines[i], lines[j]) == 0) dup = 1; + if (strcmp(lines[i], lines[j]) == 0) { + dup = 1; + break; + } } if (!dup) { if ((i > 1) || (*(lines[0]) != '\0')) { @@ -5412,14 +5430,14 @@ static void toAsciiLowerAndRemoveNonAlphanumeric( const char* pName, char* pBuf while ( *pName ) { /* A-Z */ - if ( (*pName >= 0x41) && (*pName <= 0x5A)) + if ( (*pName >= 0x41) && (*pName <= 0x5A) ) { *pBuf = (*pName)+0x20; /* toAsciiLower */ pBuf++; } /* a-z, 0-9 */ else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) || - ((*pName >= 0x30) && (*pName <= 0x39))) + ((*pName >= 0x30) && (*pName <= 0x39)) ) { *pBuf = *pName; pBuf++; @@ -5458,7 +5476,15 @@ struct cs_info * get_current_cs(const char * es) { // conversion tables static in this file, create them when needed // with help the mozilla backend. struct cs_info * get_current_cs(const char * es) { - struct cs_info *ccs; + struct cs_info *ccs = new cs_info[256]; + // Initialze the array with dummy data so that we wouldn't need + // to return null in case of failures. + for (int i = 0; i <= 0xff; ++i) { + ccs[i].ccase = false; + ccs[i].clower = i; + ccs[i].cupper = i; + } + nsCOMPtr<nsIUnicodeEncoder> encoder; nsCOMPtr<nsIUnicodeDecoder> decoder; @@ -5466,21 +5492,19 @@ struct cs_info * get_current_cs(const char * es) { nsresult rv; nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv); if (NS_FAILED(rv)) - return nsnull; + return ccs; rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder)); if (NS_FAILED(rv)) - return nsnull; + return ccs; encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nsnull, '?'); rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder)); if (NS_FAILED(rv)) - return nsnull; + return ccs; decoder->SetInputErrorBehavior(decoder->kOnError_Signal); if (NS_FAILED(rv)) - return nsnull; - - ccs = new cs_info[256]; + return ccs; for (unsigned int i = 0; i <= 0xff; ++i) { PRBool success = PR_FALSE; @@ -5643,7 +5667,7 @@ unsigned short unicodetoupper(unsigned short c, int langnum) if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr))) return 0x0130; #ifdef OPENOFFICEORG - return u_toupper(c); + return static_cast<unsigned short>(u_toupper(c)); #else #ifdef MOZILLA_CLIENT return ToUpperCase((PRUnichar) c); @@ -5661,7 +5685,7 @@ unsigned short unicodetolower(unsigned short c, int langnum) if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr))) return 0x0131; #ifdef OPENOFFICEORG - return u_tolower(c); + return static_cast<unsigned short>(u_tolower(c)); #else #ifdef MOZILLA_CLIENT return ToLowerCase((PRUnichar) c); diff --git a/plugins/SpellChecker/src/hunspell/csutil.hxx b/plugins/SpellChecker/src/hunspell/csutil.hxx index 7bd0b919be..e034b53fd2 100644 --- a/plugins/SpellChecker/src/hunspell/csutil.hxx +++ b/plugins/SpellChecker/src/hunspell/csutil.hxx @@ -52,6 +52,9 @@ #define FORBIDDENWORD 65510 #define ONLYUPCASEFLAG 65511 +// fopen or optional _wfopen to fix long pathname problem of WIN32 +LIBHUNSPELL_DLL_EXPORTED FILE * myfopen(const char * path, const char * mode); + // convert UTF-16 characters to UTF-8 LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen); diff --git a/plugins/SpellChecker/src/hunspell/dictmgr.cxx b/plugins/SpellChecker/src/hunspell/dictmgr.cxx index 5310053f76..857d757730 100644 --- a/plugins/SpellChecker/src/hunspell/dictmgr.cxx +++ b/plugins/SpellChecker/src/hunspell/dictmgr.cxx @@ -1,7 +1,7 @@ #include <stdlib.h> #include <string.h> -#include "..\commons.h"
+#include "..\commons.h" DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0) { @@ -53,8 +53,7 @@ int DictMgr::parse_file(const char * dictpath, const char * etype) dictentry * pdict = pdentry; // open the dictionary list file - FILE * dictlst; - dictlst = fopen(dictpath,"r"); + FILE *dictlst = myfopen(dictpath,"r"); if (!dictlst) { return 1; } @@ -97,7 +96,8 @@ int DictMgr::parse_file(const char * dictpath, const char * etype) case 3: free(pdict->region); pdict->region=NULL; - case 2: //deliberate fallthrough + /* FALLTHROUGH */ + case 2: free(pdict->lang); pdict->lang=NULL; default: diff --git a/plugins/SpellChecker/src/hunspell/dictmgr.hxx b/plugins/SpellChecker/src/hunspell/dictmgr.hxx index bb197f84fb..692ed964c3 100644 --- a/plugins/SpellChecker/src/hunspell/dictmgr.hxx +++ b/plugins/SpellChecker/src/hunspell/dictmgr.hxx @@ -15,7 +15,10 @@ struct dictentry { class LIBHUNSPELL_DLL_EXPORTED DictMgr { - +private: + DictMgr(const DictMgr&); + DictMgr& operator = (const DictMgr&); +private: int numdict; dictentry * pdentry; diff --git a/plugins/SpellChecker/src/hunspell/filemgr.cxx b/plugins/SpellChecker/src/hunspell/filemgr.cxx index bfd9554271..f6f2c146b2 100644 --- a/plugins/SpellChecker/src/hunspell/filemgr.cxx +++ b/plugins/SpellChecker/src/hunspell/filemgr.cxx @@ -1,14 +1,17 @@ -#include "..\commons.h"
+#include "..\commons.h" int FileMgr::fail(const char * err, const char * par) { fprintf(stderr, err, par); return -1; } -FileMgr::FileMgr(const char * file, const char * key) { - linenum = 0; - hin = NULL; - fin = fopen(file, "r"); +FileMgr::FileMgr(const char * file, const char * key) + : hin(NULL) + , linenum(0) +{ + in[0] = '\0'; + + fin = myfopen(file, "r"); if (!fin) { // check hzipped file char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1); @@ -32,7 +35,7 @@ char * FileMgr::getline() { const char * l; linenum++; if (fin) return fgets(in, BUFSIZE - 1, fin); - if (hin && (l = hin->getline())) return strcpy(in, l); + if (hin && ((l = hin->getline()) != NULL)) return strcpy(in, l); linenum--; return NULL; } diff --git a/plugins/SpellChecker/src/hunspell/filemgr.hxx b/plugins/SpellChecker/src/hunspell/filemgr.hxx index 94cb7233d8..37b2ae9ea1 100644 --- a/plugins/SpellChecker/src/hunspell/filemgr.hxx +++ b/plugins/SpellChecker/src/hunspell/filemgr.hxx @@ -9,6 +9,9 @@ class LIBHUNSPELL_DLL_EXPORTED FileMgr { +private: + FileMgr(const FileMgr&); + FileMgr& operator = (const FileMgr&); protected: FILE * fin; Hunzip * hin; diff --git a/plugins/SpellChecker/src/hunspell/hashmgr.cxx b/plugins/SpellChecker/src/hunspell/hashmgr.cxx index f150437ad6..7a5da77ecc 100644 --- a/plugins/SpellChecker/src/hunspell/hashmgr.cxx +++ b/plugins/SpellChecker/src/hunspell/hashmgr.cxx @@ -1,14 +1,23 @@ -#include "..\commons.h"
+#include "..\commons.h" + +#undef max // build a hash table from a munched word list HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) + : tablesize(0) + , tableptr(NULL) + , userword(0) + , flag_mode(FLAG_CHAR) + , complexprefixes(0) + , utf8(0) + , forbiddenword(FORBIDDENWORD) // forbidden word signing flag + , numaliasf(0) + , aliasf(NULL) + , aliasflen(0) + , numaliasm(0) + , aliasm(NULL) { - tablesize = 0; - tableptr = NULL; - flag_mode = FLAG_CHAR; - complexprefixes = 0; - utf8 = 0; langnum = 0; lang = NULL; enc = NULL; @@ -16,11 +25,6 @@ HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) ignorechars = NULL; ignorechars_utf16 = NULL; ignorechars_utf16_len = 0; - numaliasf = 0; - aliasf = NULL; - numaliasm = 0; - aliasm = NULL; - forbiddenword = FORBIDDENWORD; // forbidden word signing flag load_config(apath, key); int ec = load_tables(tpath, key); if (ec) { @@ -106,7 +110,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, int al, const char * desc, bool onlyupcase) { bool upcasehomonym = false; - int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0; + int descl = desc ? (aliasm ? sizeof(char *) : strlen(desc) + 1) : 0; // variable-length hash record with word and optional fields struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl); @@ -200,18 +204,21 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, } int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, - unsigned short * flags, int al, char * dp, int captype) + unsigned short * flags, int flagslen, char * dp, int captype) { + if (flags == NULL) + flagslen = 0; + // add inner capitalized forms to handle the following allcap forms: // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG // Allcaps with suffixes: CIA's -> CIA'S if (((captype == HUHCAP) || (captype == HUHINITCAP) || - ((captype == ALLCAP) && (flags != NULL))) && - !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) { - unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1)); + ((captype == ALLCAP) && (flagslen != 0))) && + !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) { + unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (flagslen+1)); if (!flags2) return 1; - if (al) memcpy(flags2, flags, al * sizeof(unsigned short)); - flags2[al] = ONLYUPCASEFLAG; + if (flagslen) memcpy(flags2, flags, flagslen * sizeof(unsigned short)); + flags2[flagslen] = ONLYUPCASEFLAG; if (utf8) { char st[BUFSIZE]; w_char w[BUFSIZE]; @@ -219,11 +226,11 @@ int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, mkallsmall_utf(w, wlen, langnum); mkallcap_utf(w, 1, langnum); u16_u8(st, BUFSIZE, w, wlen); - return add_word(st,wbl,wcl,flags2,al+1,dp, true); + return add_word(st,wbl,wcl,flags2,flagslen+1,dp, true); } else { mkallsmall(word, csconv); mkinitcap(word, csconv); - return add_word(word,wbl,wcl,flags2,al+1,dp, true); + return add_word(word,wbl,wcl,flags2,flagslen+1,dp, true); } } return 0; @@ -353,8 +360,8 @@ int HashMgr::load_tables(const char * tpath, const char * key) if (dict == NULL) return 1; // first read the first line of file to get hash table size */ - if (!(ts = dict->getline())) { - HUNSPELL_WARNING(stderr, "error: empty dic file\n"); + if ((ts = dict->getline()) == NULL) { + HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath); delete dict; return 2; } @@ -367,30 +374,32 @@ int HashMgr::load_tables(const char * tpath, const char * key) } tablesize = atoi(ts); - if (tablesize == 0) { + + int nExtra = 5 + USERWORD; + + if (tablesize <= 0 || (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) / int(sizeof(struct hentry *)))) { HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n"); delete dict; return 4; } - tablesize = tablesize + 5 + USERWORD; - if ((tablesize %2) == 0) tablesize++; + tablesize += nExtra; + if ((tablesize % 2) == 0) tablesize++; // allocate the hash table - tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *)); + tableptr = (struct hentry **) calloc(tablesize, sizeof(struct hentry *)); if (! tableptr) { delete dict; return 3; } - for (int i=0; i<tablesize; i++) tableptr[i] = NULL; // loop through all words on much list and add to hash // table and create word and affix strings - while ((ts = dict->getline())) { + while ((ts = dict->getline()) != NULL) { mychomp(ts); // split each line into word and morphological description dp = ts; - while ((dp = strchr(dp, ':'))) { + while ((dp = strchr(dp, ':')) != NULL) { if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) { for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--); if (dp < ts) { // missing word @@ -606,7 +615,7 @@ int HashMgr::load_config(const char * affpath, const char * key) // read in each line ignoring any that do not // start with a known line type indicator - while ((line = afflst->getline())) { + while ((line = afflst->getline()) != NULL) { mychomp(line); /* remove byte order mark */ @@ -746,7 +755,7 @@ int HashMgr::parse_aliasf(char * line, FileMgr * af) /* now parse the numaliasf lines to read in the remainder of the table */ char * nl; for (int j=0; j < numaliasf; j++) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; @@ -853,7 +862,7 @@ int HashMgr::parse_aliasm(char * line, FileMgr * af) /* now parse the numaliasm lines to read in the remainder of the table */ char * nl = line; for (int j=0; j < numaliasm; j++) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; diff --git a/plugins/SpellChecker/src/hunspell/hunspell.cxx b/plugins/SpellChecker/src/hunspell/hunspell.cxx index 03f8cb42c8..b5dcfd57fb 100644 --- a/plugins/SpellChecker/src/hunspell/hunspell.cxx +++ b/plugins/SpellChecker/src/hunspell/hunspell.cxx @@ -1,4 +1,4 @@ -#include "..\commons.h"
+#include "..\commons.h" #ifndef MOZILLA_CLIENT # include "config.h" @@ -234,25 +234,10 @@ int Hunspell::mkallcap2(char * p, w_char * u, int nc) void Hunspell::mkallsmall(char * p) { - if (utf8) { - w_char u[MAXWORDLEN]; - int nc = u8_u16(u, MAXWORDLEN, p); - unsigned short idx; - for (int i = 0; i < nc; i++) { - idx = (u[i].h << 8) + u[i].l; - unsigned short low = unicodetolower(idx, langnum); - if (idx != low) { - u[i].h = (unsigned char) (low >> 8); - u[i].l = (unsigned char) (low & 0x00FF); - } - } - u16_u8(p, MAXWORDUTF8LEN, u, nc); - } else { while (*p != '\0') { *p = csconv[((unsigned char) *p)].clower; p++; } - } } int Hunspell::mkallsmall2(char * p, w_char * u, int nc) @@ -335,6 +320,10 @@ int Hunspell::spell(const char * word, int * info, char ** root) char cw[MAXWORDUTF8LEN]; char wspace[MAXWORDUTF8LEN]; w_char unicw[MAXWORDLEN]; + + int info2 = 0; + if (!info) info = &info2; else *info = 0; + // Hunspell supports XML input of the simplified API (see manual) if (strcmp(word, SPELL_XML) == 0) return 1; int nc = strlen(word); @@ -353,7 +342,6 @@ int Hunspell::spell(const char * word, int * info, char ** root) if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - int info2 = 0; if (wl == 0 || maxdic == 0) return 1; if (root) *root = NULL; @@ -371,13 +359,14 @@ int Hunspell::spell(const char * word, int * info, char ** root) } else break; } if ((i == wl) && (nstate == NNUM)) return 1; - if (!info) info = &info2; else *info = 0; switch(captype) { case HUHCAP: + /* FALLTHROUGH */ case HUHINITCAP: *info += SPELL_ORIGCAP; - case NOCAP: { + /* FALLTHROUGH */ + case NOCAP: rv = checkword(cw, info, root); if ((abbv) && !(rv)) { memcpy(wspace,cw,wl); @@ -386,7 +375,6 @@ int Hunspell::spell(const char * word, int * info, char ** root) rv = checkword(wspace, info, root); } break; - } case ALLCAP: { *info += SPELL_ORIGCAP; rv = checkword(cw, info, root); @@ -410,7 +398,7 @@ int Hunspell::spell(const char * word, int * info, char ** root) *apostrophe = '\0'; wl2 = u8_u16(tmpword, MAXWORDLEN, cw); *apostrophe = '\''; - if (wl2 < nc) { + if (wl2 >= 0 && wl2 < nc) { mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1); rv = checkword(cw, info, root); if (rv) break; @@ -757,19 +745,28 @@ int Hunspell::suggest(char*** slst, const char * word) char * dot = strchr(cw, '.'); if (dot && (dot > cw)) { int captype_; - if (utf8) { + if (utf8) + { w_char w_[MAXWORDLEN]; int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1); captype_ = get_captype_utf8(w_, wl_, langnum); } else captype_ = get_captype(dot+1, strlen(dot+1), csconv); - if (captype_ == INITCAP) { + if (captype_ == INITCAP) + { char * st = mystrdup(cw); - if (st) st = (char *) realloc(st, wl + 2); - if (st) { - st[(dot - cw) + 1] = ' '; - strcpy(st + (dot - cw) + 2, dot + 1); - ns = insert_sug(slst, st, ns); - free(st); + if (st) + { + char *newst = (char *) realloc(st, wl + 2); + if (newst == NULL) + free(st); + st = newst; + } + if (st) + { + st[(dot - cw) + 1] = ' '; + strcpy(st + (dot - cw) + 2, dot + 1); + ns = insert_sug(slst, st, ns); + free(st); } } } @@ -855,7 +852,7 @@ int Hunspell::suggest(char*** slst, const char * word) *pos = '\0'; strcpy(w, (*slst)[j]); strcat(w, pos + 1); - spell(w, &info, NULL); + (void)spell(w, &info, NULL); if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { *pos = ' '; } else *pos = '-'; @@ -1682,6 +1679,13 @@ int Hunspell::get_langnum() const return langnum; } +int Hunspell::input_conv(const char * word, char * dest) +{ + RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + return (rl && rl->conv(word, dest)); +} + + // return the beginning of the element (attr == NULL) or the attribute const char * Hunspell::get_xml_pos(const char * s, const char * attr) { @@ -1706,11 +1710,11 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) { int n = 0; char * p; if (!list) return 0; - for (p = list; (p = strstr(p, tag)); p++) n++; + for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++; if (n == 0) return 0; *slst = (char **) malloc(sizeof(char *) * n); if (!*slst) return 0; - for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) { + for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) { int l = strlen(p); (*slst)[n] = (char *) malloc(l + 1); if (!(*slst)[n]) return n; @@ -1722,6 +1726,19 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) { return n; } +namespace +{ + void myrep(std::string& str, const std::string& search, const std::string& replace) + { + size_t pos = 0; + while ((pos = str.find(search, pos)) != std::string::npos) + { + str.replace(pos, search.length(), replace); + pos += replace.length(); + } + } +} + int Hunspell::spellml(char*** slst, const char * word) { char *q, *q2; @@ -1733,26 +1750,26 @@ int Hunspell::spellml(char*** slst, const char * word) q2 = strstr(q2, "<word"); if (!q2) return 0; // bad XML input if (check_xml_par(q, "type=", "analyze")) { - int n = 0, s = 0; + int n = 0; if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw); if (n == 0) return 0; // convert the result to <code><a>ana1</a><a>ana2</a></code> format - for (int i = 0; i < n; i++) s+= strlen((*slst)[i]); - char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->& - if (!r) return 0; - strcpy(r, "<code>"); + std::string r; + r.append("<code>"); for (int i = 0; i < n; i++) { - int l = strlen(r); - strcpy(r + l, "<a>"); - strcpy(r + l + 3, (*slst)[i]); - mystrrep(r + l + 3, "\t", " "); - mystrrep(r + l + 3, "<", "<"); - mystrrep(r + l + 3, "&", "&"); - strcat(r, "</a>"); + r.append("<a>"); + + std::string entry((*slst)[i]); free((*slst)[i]); + myrep(entry, "\t", " "); + myrep(entry, "&", "&"); + myrep(entry, "<", "<"); + r.append(entry); + + r.append("</a>"); } - strcat(r, "</code>"); - (*slst)[0] = r; + r.append("</code>"); + (*slst)[0] = mystrdup(r.c_str()); return 1; } else if (check_xml_par(q, "type=", "stem")) { if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw); @@ -1765,9 +1782,9 @@ int Hunspell::spellml(char*** slst, const char * word) return generate(slst, cw, cw2); } } else { - if ((q2 = strstr(q2 + 1, "<code"))) { + if ((q2 = strstr(q2 + 1, "<code")) != NULL) { char ** slst2; - if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) { + if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) { int n2 = generate(slst, cw, slst2, n); freelist(&slst2, n); return uniqlist(*slst, n2); diff --git a/plugins/SpellChecker/src/hunspell/hunspell.hxx b/plugins/SpellChecker/src/hunspell/hunspell.hxx index a25c637f0d..79f9566567 100644 --- a/plugins/SpellChecker/src/hunspell/hunspell.hxx +++ b/plugins/SpellChecker/src/hunspell/hunspell.hxx @@ -19,6 +19,10 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell { +private: + Hunspell(const Hunspell&); + Hunspell& operator = (const Hunspell&); +private: AffixMgr* pAMgr; HashMgr* pHMgr[MAXDIC]; int maxdic; @@ -35,6 +39,11 @@ public: /* Hunspell(aff, dic) - constructor of Hunspell class * input: path of affix file and dictionary file + * + * In WIN32 environment, use UTF-8 encoded paths started with the long path + * prefix \\\\?\\ to handle system-independent character encoding and very + * long path names (without the long path prefix Hunspell will use fopen() + * with system-dependent character encoding instead of _wfopen()). */ Hunspell(const char * affpath, const char * dpath, const char * key = NULL); @@ -133,6 +142,9 @@ public: const char * get_version(); int get_langnum() const; + + /* need for putdic */ + int input_conv(const char * word, char * dest); /* experimental and deprecated functions */ diff --git a/plugins/SpellChecker/src/hunspell/hunzip.cxx b/plugins/SpellChecker/src/hunspell/hunzip.cxx index ecd8c7da77..c4c811dcd6 100644 --- a/plugins/SpellChecker/src/hunspell/hunzip.cxx +++ b/plugins/SpellChecker/src/hunspell/hunzip.cxx @@ -1,4 +1,4 @@ -#include "..\commons.h"
+#include "..\commons.h" #define CODELEN 65536 #define BASEBITREC 5000 @@ -13,15 +13,17 @@ int Hunzip::fail(const char * err, const char * par) { return -1; } -Hunzip::Hunzip(const char * file, const char * key) { - bufsiz = 0; - lastbit = 0; - inc = 0; - outc = 0; - dec = NULL; - fin = NULL; - filename = (char *) malloc(strlen(file) + 1); - if (filename) strcpy(filename, file); +Hunzip::Hunzip(const char * file, const char * key) + : fin(NULL) + , bufsiz(0) + , lastbit(0) + , inc(0) + , inbits(0) + , outc(0) + , dec(NULL) +{ + in[0] = out[0] = line[0] = '\0'; + filename = mystrdup(file); if (getcode(key) == -1) bufsiz = -1; else bufsiz = getbuf(); } @@ -34,7 +36,7 @@ int Hunzip::getcode(const char * key) { if (!filename) return -1; - fin = fopen(filename, "rb"); + fin = myfopen(filename, "rb"); if (!fin) return -1; // read magic number diff --git a/plugins/SpellChecker/src/hunspell/hunzip.hxx b/plugins/SpellChecker/src/hunspell/hunzip.hxx index b58e3ab1dc..bd02fd8f10 100644 --- a/plugins/SpellChecker/src/hunspell/hunzip.hxx +++ b/plugins/SpellChecker/src/hunspell/hunzip.hxx @@ -23,7 +23,9 @@ struct bit { class LIBHUNSPELL_DLL_EXPORTED Hunzip { - +private: + Hunzip(const Hunzip&); + Hunzip& operator = (const Hunzip&); protected: char * filename; FILE * fin; diff --git a/plugins/SpellChecker/src/hunspell/phonet.cxx b/plugins/SpellChecker/src/hunspell/phonet.cxx index 6e2e56297f..192bfe2d69 100644 --- a/plugins/SpellChecker/src/hunspell/phonet.cxx +++ b/plugins/SpellChecker/src/hunspell/phonet.cxx @@ -27,7 +27,7 @@ Porting from Aspell to Hunspell using C-like structs */ -#include "..\commons.h"
+#include "..\commons.h" void init_phonet_hash(phonetable & parms) { @@ -81,7 +81,8 @@ int phonet (const char * inword, char * target, char word[MAXPHONETUTF8LEN + 1]; if (len == -1) len = strlen(inword); if (len > MAXPHONETUTF8LEN) return 0; - strcpy(word, inword); + strncpy(word, inword, MAXPHONETUTF8LEN); + word[MAXPHONETUTF8LEN] = '\0'; /** check word **/ i = j = z = 0; @@ -134,10 +135,10 @@ int phonet (const char * inword, char * target, || (*s == '^' && (i == 0 || ! myisalpha(word[i-1])) && (*(s+1) != '$' - || (! myisalpha(word[i+k0])))) + || (! myisalpha(word[i+k0]) ))) || (*s == '$' && i > 0 && myisalpha(word[i-1]) - && (! myisalpha(word[i+k0])))) + && (! myisalpha(word[i+k0]) ))) { /** search for followup rules, if: **/ /** parms.followup and k > 1 and NO '-' in searchstring **/ diff --git a/plugins/SpellChecker/src/hunspell/replist.cxx b/plugins/SpellChecker/src/hunspell/replist.cxx index cf75a2868d..51f090a476 100644 --- a/plugins/SpellChecker/src/hunspell/replist.cxx +++ b/plugins/SpellChecker/src/hunspell/replist.cxx @@ -1,4 +1,4 @@ -#include "..\commons.h"
+#include "..\commons.h" RepList::RepList(int n) { dat = (replentry **) malloc(sizeof(replentry *) * n); diff --git a/plugins/SpellChecker/src/hunspell/replist.hxx b/plugins/SpellChecker/src/hunspell/replist.hxx index 1cc039ebeb..de439cd362 100644 --- a/plugins/SpellChecker/src/hunspell/replist.hxx +++ b/plugins/SpellChecker/src/hunspell/replist.hxx @@ -8,6 +8,9 @@ class LIBHUNSPELL_DLL_EXPORTED RepList { +private: + RepList(const RepList&); + RepList& operator = (const RepList&); protected: replentry ** dat; int size; diff --git a/plugins/SpellChecker/src/hunspell/suggestmgr.cxx b/plugins/SpellChecker/src/hunspell/suggestmgr.cxx index 1fdf20c881..d89630849c 100644 --- a/plugins/SpellChecker/src/hunspell/suggestmgr.cxx +++ b/plugins/SpellChecker/src/hunspell/suggestmgr.cxx @@ -1,4 +1,4 @@ -#include "..\commons.h"
+#include "..\commons.h" const w_char W_VLINE = { '\0', '|' }; @@ -97,7 +97,10 @@ int SuggestMgr::testsug(char** wlst, const char * candidate, int wl, int ns, int int cwrd = 1; if (ns == maxSug) return maxSug; for (int k=0; k < ns; k++) { - if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; + if (strcmp(candidate,wlst[k]) == 0) { + cwrd = 0; + break; + } } if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { wlst[ns] = mystrdup(candidate); @@ -354,8 +357,12 @@ int SuggestMgr::map_related(const char * word, char * candidate, int wn, int cn, int cwrd = 1; *(candidate + cn) = '\0'; int wl = strlen(candidate); - for (int m=0; m < ns; m++) - if (strcmp(candidate, wlst[m]) == 0) cwrd = 0; + for (int m=0; m < ns; m++) { + if (strcmp(candidate, wlst[m]) == 0) { + cwrd = 0; + break; + } + } if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { if (ns < maxSug) { wlst[ns] = mystrdup(candidate); @@ -668,7 +675,7 @@ int SuggestMgr::extrachar(char** wlst, const char * word, int ns, int cpdsuggest // error is missing a letter it needs int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsuggest) { - char candidate[MAXSWUTF8L]; + char candidate[MAXSWUTF8L + 4]; char * p; clock_t timelimit = clock(); int timer = MINTIMER; @@ -690,8 +697,8 @@ int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsugge // error is missing a letter it needs int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest) { - w_char candidate_utf[MAXSWL]; - char candidate[MAXSWUTF8L]; + w_char candidate_utf[MAXSWL + 1]; + char candidate[MAXSWUTF8L + 4]; w_char * p; clock_t timelimit = clock(); int timer = MINTIMER; @@ -751,8 +758,12 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest ((c1 == 3) && (c2 >= 2)))) *p = '-'; cwrd = 1; - for (int k=0; k < ns; k++) - if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; + for (int k=0; k < ns; k++) { + if (strcmp(candidate,wlst[k]) == 0) { + cwrd = 0; + break; + } + } if (ns < maxSug) { if (cwrd) { wlst[ns] = mystrdup(candidate); @@ -767,8 +778,12 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest mystrlen(p + 1) > 1 && mystrlen(candidate) - mystrlen(p) > 1) { *p = '-'; - for (int k=0; k < ns; k++) - if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; + for (int k=0; k < ns; k++) { + if (strcmp(candidate,wlst[k]) == 0) { + cwrd = 0; + break; + } + } if (ns < maxSug) { if (cwrd) { wlst[ns] = mystrdup(candidate); @@ -1250,6 +1265,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md break; } // using 2-gram instead of 3, and other weightening + + re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) + + ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED); + gscore[i] = // length of longest common subsequent minus length difference 2 * _lcs - abs((int) (n - len)) + @@ -1262,9 +1281,8 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md // ngram ngram(4, word, gl, NGRAM_ANY_MISMATCH + low) + // weighted ngrams - (re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED)) + - (re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED)) + - // different limit for dictionaries with PHONE rules + re + + // different limit for dictionaries with PHONE rules (ph ? (re < len * fact ? -1000 : 0) : (re < (n + len)*fact? -1000 : 0)); } } @@ -1320,7 +1338,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md if ((!guessorig[i] && strstr(guess[i], wlst[j])) || (guessorig[i] && strstr(guessorig[i], wlst[j])) || // check forbidden words - !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) unique = 0; + !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) { + unique = 0; + break; + } } if (unique) { wlst[ns++] = guess[i]; @@ -1348,7 +1369,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md // don't suggest previous suggestions or a previous suggestion with prefixes or affixes if (strstr(rootsphon[i], wlst[j]) || // check forbidden words - !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) unique = 0; + !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) { + unique = 0; + break; + } } if (unique) { wlst[ns++] = mystrdup(rootsphon[i]); @@ -1842,6 +1866,10 @@ int SuggestMgr::commoncharacterpositions(char * s1, const char * s2, int * is_sw w_char su2[MAXSWL]; int l1 = u8_u16(su1, MAXSWL, s1); int l2 = u8_u16(su2, MAXSWL, s2); + + if (l1 <= 0 || l2 <= 0) + return 0; + // decapitalize dictionary word if (complexprefixes) { mkallsmall_utf(su2+l2-1, 1, langnum); diff --git a/plugins/SpellChecker/src/hunspell/suggestmgr.hxx b/plugins/SpellChecker/src/hunspell/suggestmgr.hxx index 5f043fdfd2..8456b5b3e2 100644 --- a/plugins/SpellChecker/src/hunspell/suggestmgr.hxx +++ b/plugins/SpellChecker/src/hunspell/suggestmgr.hxx @@ -32,6 +32,10 @@ enum { LCS_UP, LCS_LEFT, LCS_UPLEFT }; class LIBHUNSPELL_DLL_EXPORTED SuggestMgr { +private: + SuggestMgr(const SuggestMgr&); + SuggestMgr& operator = (const SuggestMgr&); +private: char * ckey; int ckeyl; w_char * ckey_utf; diff --git a/plugins/SpellChecker/src/options.cpp b/plugins/SpellChecker/src/options.cpp index 8ae164d222..191b721221 100644 --- a/plugins/SpellChecker/src/options.cpp +++ b/plugins/SpellChecker/src/options.cpp @@ -53,7 +53,7 @@ static OptPageControl autoReplaceControls[] = { // Functions //////////////////////////////////////////////////////////////////////////////////////
-int InitOptionsCallback(WPARAM wParam,LPARAM lParam)
+int InitOptionsCallback(WPARAM wParam,LPARAM)
{
OPTIONSDIALOGPAGE odp = { sizeof(odp) };
odp.hInstance = hInst;
diff --git a/plugins/SpellChecker/src/spellchecker.cpp b/plugins/SpellChecker/src/spellchecker.cpp index 6473c8b719..13be3a9629 100644 --- a/plugins/SpellChecker/src/spellchecker.cpp +++ b/plugins/SpellChecker/src/spellchecker.cpp @@ -69,7 +69,7 @@ extern "C" __declspec(dllexport) PLUGININFOEX* MirandaPluginInfoEx(DWORD miranda return &pluginInfo; } -static int IconsChanged(WPARAM wParam, LPARAM lParam) +static int IconsChanged(WPARAM, LPARAM) { StatusIconData sid = { sizeof(sid) }; sid.szModule = MODULE_NAME; @@ -94,7 +94,7 @@ static int IconsChanged(WPARAM wParam, LPARAM lParam) return 0; } -static int PreShutdown(WPARAM wParam, LPARAM lParam) +static int PreShutdown(WPARAM, LPARAM) { mir_free(dictionariesFolder); mir_free(customDictionariesFolder); @@ -103,7 +103,7 @@ static int PreShutdown(WPARAM wParam, LPARAM lParam) } // Called when all the modules are loaded -static int ModulesLoaded(WPARAM wParam, LPARAM lParam) +static int ModulesLoaded(WPARAM, LPARAM) { variables_enabled = ServiceExists(MS_VARS_FORMATSTRING); @@ -192,14 +192,10 @@ static int ModulesLoaded(WPARAM wParam, LPARAM lParam) dict->load(); } - HookEvent(ME_SKIN2_ICONSCHANGED, &IconsChanged); - HookEvent(ME_MSG_WINDOWEVENT, &MsgWindowEvent); - HookEvent(ME_MSG_WINDOWPOPUP, &MsgWindowPopup); - HookEvent(ME_MSG_ICONPRESSED, &IconPressed); - - CreateServiceFunction(MS_SPELLCHECKER_ADD_RICHEDIT, AddContactTextBoxService); - CreateServiceFunction(MS_SPELLCHECKER_REMOVE_RICHEDIT, RemoveContactTextBoxService); - CreateServiceFunction(MS_SPELLCHECKER_SHOW_POPUP_MENU, ShowPopupMenuService); + HookEvent(ME_SKIN2_ICONSCHANGED, IconsChanged); + HookEvent(ME_MSG_WINDOWEVENT, MsgWindowEvent); + HookEvent(ME_MSG_WINDOWPOPUP, MsgWindowPopup); + HookEvent(ME_MSG_ICONPRESSED, IconPressed); StatusIconData sid = { sizeof(sid) }; sid.szModule = MODULE_NAME; @@ -250,6 +246,10 @@ extern "C" int __declspec(dllexport) Load(void) HookEvent(ME_SYSTEM_MODULESLOADED, ModulesLoaded); HookEvent(ME_SYSTEM_PRESHUTDOWN, PreShutdown); + CreateServiceFunction(MS_SPELLCHECKER_ADD_RICHEDIT, AddContactTextBoxService); + CreateServiceFunction(MS_SPELLCHECKER_REMOVE_RICHEDIT, RemoveContactTextBoxService); + CreateServiceFunction(MS_SPELLCHECKER_SHOW_POPUP_MENU, ShowPopupMenuService); + hCheckedBmp = LoadBitmap(NULL, MAKEINTRESOURCE(OBM_CHECK)); if (GetObject(hCheckedBmp, sizeof(bmpChecked), &bmpChecked) == 0) bmpChecked.bmHeight = bmpChecked.bmWidth = 10; |