summaryrefslogtreecommitdiff
path: root/plugins/SpellChecker/src/hunspell/affentry.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/SpellChecker/src/hunspell/affentry.cxx')
-rw-r--r--plugins/SpellChecker/src/hunspell/affentry.cxx994
1 files changed, 0 insertions, 994 deletions
diff --git a/plugins/SpellChecker/src/hunspell/affentry.cxx b/plugins/SpellChecker/src/hunspell/affentry.cxx
deleted file mode 100644
index 435fef804e..0000000000
--- a/plugins/SpellChecker/src/hunspell/affentry.cxx
+++ /dev/null
@@ -1,994 +0,0 @@
-#include "..\commons.h"
-
-#define MAXTEMPWORDLEN (MAXWORDUTF8LEN + 4)
-
-PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
- // register affix manager
- : pmyMgr(pmgr)
- , next(NULL)
- , nexteq(NULL)
- , nextne(NULL)
- , flgnxt(NULL)
-{
- // set up its initial values
- aflag = dp->aflag; // flag
- strip = dp->strip; // string to strip
- appnd = dp->appnd; // string to append
- stripl = dp->stripl; // length of strip string
- appndl = dp->appndl; // length of append string
- numconds = dp->numconds; // length of the condition
- opts = dp->opts; // cross product flag
- // then copy over all of the conditions
- if (opts & aeLONGCOND) {
- memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
- c.l.conds2 = dp->c.l.conds2;
- } else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
- morphcode = dp->morphcode;
- contclass = dp->contclass;
- contclasslen = dp->contclasslen;
-}
-
-
-PfxEntry::~PfxEntry()
-{
- aflag = 0;
- if (appnd) free(appnd);
- if (strip) free(strip);
- pmyMgr = NULL;
- appnd = NULL;
- strip = NULL;
- if (opts & aeLONGCOND) free(c.l.conds2);
- if (morphcode && !(opts & aeALIASM)) free(morphcode);
- if (contclass && !(opts & aeALIASF)) free(contclass);
-}
-
-// add prefix to this word assuming conditions hold
-char * PfxEntry::add(const char * word, int len)
-{
- char tword[MAXTEMPWORDLEN];
-
- if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
- (len >= numconds) && test_condition(word) &&
- (!stripl || (strncmp(word, strip, stripl) == 0)) &&
- ((MAXTEMPWORDLEN) > (len + appndl - stripl))) {
- /* we have a match so add prefix */
- char * pp = tword;
- if (appndl) {
- strncpy(tword, appnd, MAXTEMPWORDLEN-1);
- tword[MAXTEMPWORDLEN-1] = '\0';
- pp += appndl;
- }
- strcpy(pp, (word + stripl));
- return mystrdup(tword);
- }
- return NULL;
-}
-
-inline char * PfxEntry::nextchar(char * p) {
- if (p) {
- p++;
- if (opts & aeLONGCOND) {
- // jump to the 2nd part of the condition
- if (p == c.conds + MAXCONDLEN_1) return c.l.conds2;
- // end of the MAXCONDLEN length condition
- } else if (p == c.conds + MAXCONDLEN) return NULL;
- return *p ? p : NULL;
- }
- return NULL;
-}
-
-inline int PfxEntry::test_condition(const char * st)
-{
- const char * pos = NULL; // group with pos input position
- bool neg = false; // complementer
- bool ingroup = false; // character in the group
- if (numconds == 0) return 1;
- char * p = c.conds;
- while (1) {
- switch (*p) {
- case '\0': return 1;
- case '[': {
- neg = false;
- ingroup = false;
- p = nextchar(p);
- pos = st; break;
- }
- case '^': { p = nextchar(p); neg = true; break; }
- case ']': {
- if ((neg && ingroup) || (!neg && !ingroup)) return 0;
- pos = NULL;
- p = nextchar(p);
- // skip the next character
- if (!ingroup && *st) for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
- if (*st == '\0' && p) return 0; // word <= condition
- break;
- }
- case '.':
- if (!pos) { // dots are not metacharacters in groups: [.]
- p = nextchar(p);
- // skip the next character
- for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
- if (*st == '\0' && p) return 0; // word <= condition
- break;
- }
- /* FALLTHROUGH */
- default: {
- if (*st == *p) {
- st++;
- p = nextchar(p);
- if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte
- while (p && (*p & 0xc0) == 0x80) { // character
- if (*p != *st) {
- if (!pos) return 0;
- st = pos;
- break;
- }
- p = nextchar(p);
- st++;
- }
- if (pos && st != pos) {
- ingroup = true;
- while (p && *p != ']' && ((p = nextchar(p)) != NULL));
- }
- } else if (pos) {
- ingroup = true;
- while (p && *p != ']' && ((p = nextchar(p)) != NULL));
- }
- } else if (pos) { // group
- p = nextchar(p);
- } else return 0;
- }
- }
- if (!p) return 1;
- }
-}
-
-// check if this prefix entry matches
-struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound, const FLAG needflag)
-{
- int tmpl; // length of tmpword
- struct hentry * he; // hash entry of root word or NULL
- char tmpword[MAXTEMPWORDLEN];
-
- // on entry prefix is 0 length or already matches the beginning of the word.
- // So if the remaining root word has positive length
- // and if there are enough chars in root word and added back strip chars
- // to meet the number of characters conditions, then test it
-
- tmpl = len - appndl;
-
- if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
-
- // generate new root word by removing prefix and adding
- // back any characters that would have been stripped
-
- if (stripl) {
- strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
- tmpword[MAXTEMPWORDLEN-1] = '\0';
- }
- strcpy ((tmpword + stripl), (word + appndl));
-
- // now make sure all of the conditions on characters
- // are met. Please see the appendix at the end of
- // this file for more info on exactly what is being
- // tested
-
- // if all conditions are met then check if resulting
- // root word in the dictionary
-
- if (test_condition(tmpword)) {
- tmpl += stripl;
- if ((he = pmyMgr->lookup(tmpword)) != NULL) {
- do {
- if (TESTAFF(he->astr, aflag, he->alen) &&
- // forbid single prefixes with needaffix flag
- ! TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
- // needflag
- ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
- (contclass && TESTAFF(contclass, needflag, contclasslen))))
- return he;
- he = he->next_homonym; // check homonyms
- } while (he);
- }
-
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
- // ross checked combined with a suffix
-
- //if ((opts & aeXPRODUCT) && in_compound) {
- if ((opts & aeXPRODUCT)) {
- he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, this, NULL,
- 0, NULL, FLAG_NULL, needflag, in_compound);
- if (he) return he;
- }
- }
- }
- return NULL;
-}
-
-// check if this prefix entry matches
-struct hentry * PfxEntry::check_twosfx(const char * word, int len,
- char in_compound, const FLAG needflag)
-{
- int tmpl; // length of tmpword
- struct hentry * he; // hash entry of root word or NULL
- char tmpword[MAXTEMPWORDLEN];
-
- // on entry prefix is 0 length or already matches the beginning of the word.
- // So if the remaining root word has positive length
- // and if there are enough chars in root word and added back strip chars
- // to meet the number of characters conditions, then test it
-
- tmpl = len - appndl;
-
- if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
- (tmpl + stripl >= numconds)) {
-
- // generate new root word by removing prefix and adding
- // back any characters that would have been stripped
-
- if (stripl) {
- strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
- tmpword[MAXTEMPWORDLEN-1] = '\0';
- }
- strcpy ((tmpword + stripl), (word + appndl));
-
- // now make sure all of the conditions on characters
- // are met. Please see the appendix at the end of
- // this file for more info on exactly what is being
- // tested
-
- // if all conditions are met then check if resulting
- // root word in the dictionary
-
- if (test_condition(tmpword)) {
- tmpl += stripl;
-
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
- // cross checked combined with a suffix
-
- if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, this, needflag);
- if (he) return he;
- }
- }
- }
- return NULL;
-}
-
-// check if this prefix entry matches
-char * PfxEntry::check_twosfx_morph(const char * word, int len,
- char in_compound, const FLAG needflag)
-{
- int tmpl; // length of tmpword
- char tmpword[MAXTEMPWORDLEN];
-
- // on entry prefix is 0 length or already matches the beginning of the word.
- // So if the remaining root word has positive length
- // and if there are enough chars in root word and added back strip chars
- // to meet the number of characters conditions, then test it
-
- tmpl = len - appndl;
-
- if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
- (tmpl + stripl >= numconds)) {
-
- // generate new root word by removing prefix and adding
- // back any characters that would have been stripped
-
- if (stripl) {
- strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
- tmpword[MAXTEMPWORDLEN-1] = '\0';
- }
- strcpy ((tmpword + stripl), (word + appndl));
-
- // now make sure all of the conditions on characters
- // are met. Please see the appendix at the end of
- // this file for more info on exactly what is being
- // tested
-
- // if all conditions are met then check if resulting
- // root word in the dictionary
-
- if (test_condition(tmpword)) {
- tmpl += stripl;
-
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
- // ross checked combined with a suffix
-
- if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl,
- aeXPRODUCT, this, needflag);
- }
- }
- }
- return NULL;
-}
-
-// check if this prefix entry matches
-char * PfxEntry::check_morph(const char * word, int len, char in_compound, const FLAG needflag)
-{
- int tmpl; // length of tmpword
- struct hentry * he; // hash entry of root word or NULL
- char tmpword[MAXTEMPWORDLEN];
- char result[MAXLNLEN];
- char * st;
-
- *result = '\0';
-
- // on entry prefix is 0 length or already matches the beginning of the word.
- // So if the remaining root word has positive length
- // and if there are enough chars in root word and added back strip chars
- // to meet the number of characters conditions, then test it
-
- tmpl = len - appndl;
-
- if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
- (tmpl + stripl >= numconds)) {
-
- // generate new root word by removing prefix and adding
- // back any characters that would have been stripped
-
- if (stripl) {
- strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
- tmpword[MAXTEMPWORDLEN-1] = '\0';
- }
- strcpy ((tmpword + stripl), (word + appndl));
-
- // now make sure all of the conditions on characters
- // are met. Please see the appendix at the end of
- // this file for more info on exactly what is being
- // tested
-
- // if all conditions are met then check if resulting
- // root word in the dictionary
-
- if (test_condition(tmpword)) {
- tmpl += stripl;
- if ((he = pmyMgr->lookup(tmpword)) != NULL) {
- do {
- if (TESTAFF(he->astr, aflag, he->alen) &&
- // forbid single prefixes with needaffix flag
- ! TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
- // needflag
- ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
- (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
- if (morphcode) {
- mystrcat(result, " ", MAXLNLEN);
- mystrcat(result, morphcode, MAXLNLEN);
- } else mystrcat(result,getKey(), MAXLNLEN);
- if (!HENTRY_FIND(he, MORPH_STEM)) {
- mystrcat(result, " ", MAXLNLEN);
- mystrcat(result, MORPH_STEM, MAXLNLEN);
- mystrcat(result, HENTRY_WORD(he), MAXLNLEN);
- }
- // store the pointer of the hash entry
- if (HENTRY_DATA(he)) {
- mystrcat(result, " ", MAXLNLEN);
- mystrcat(result, HENTRY_DATA2(he), MAXLNLEN);
- } else {
- // return with debug information
- char * flag = pmyMgr->encode_flag(getFlag());
- mystrcat(result, " ", MAXLNLEN);
- mystrcat(result, MORPH_FLAG, MAXLNLEN);
- mystrcat(result, flag, MAXLNLEN);
- free(flag);
- }
- mystrcat(result, "\n", MAXLNLEN);
- }
- he = he->next_homonym;
- } while (he);
- }
-
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
- // ross checked combined with a suffix
-
- if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, this,
- FLAG_NULL, needflag);
- if (st) {
- mystrcat(result, st, MAXLNLEN);
- free(st);
- }
- }
- }
- }
-
- if (*result) return mystrdup(result);
- return NULL;
-}
-
-SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
- : pmyMgr(pmgr) // register affix manager
- , next(NULL)
- , nexteq(NULL)
- , nextne(NULL)
- , flgnxt(NULL)
- , l_morph(NULL)
- , r_morph(NULL)
- , eq_morph(NULL)
-{
- // set up its initial values
- aflag = dp->aflag; // char flag
- strip = dp->strip; // string to strip
- appnd = dp->appnd; // string to append
- stripl = dp->stripl; // length of strip string
- appndl = dp->appndl; // length of append string
- numconds = dp->numconds; // length of the condition
- opts = dp->opts; // cross product flag
-
- // then copy over all of the conditions
- if (opts & aeLONGCOND) {
- memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
- c.l.conds2 = dp->c.l.conds2;
- } else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
- rappnd = myrevstrdup(appnd);
- morphcode = dp->morphcode;
- contclass = dp->contclass;
- contclasslen = dp->contclasslen;
-}
-
-
-SfxEntry::~SfxEntry()
-{
- aflag = 0;
- if (appnd) free(appnd);
- if (rappnd) free(rappnd);
- if (strip) free(strip);
- pmyMgr = NULL;
- appnd = NULL;
- strip = NULL;
- if (opts & aeLONGCOND) free(c.l.conds2);
- if (morphcode && !(opts & aeALIASM)) free(morphcode);
- if (contclass && !(opts & aeALIASF)) free(contclass);
-}
-
-// add suffix to this word assuming conditions hold
-char * SfxEntry::add(const char * word, int len)
-{
- char tword[MAXTEMPWORDLEN];
-
- /* make sure all conditions match */
- if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
- (len >= numconds) && test_condition(word + len, word) &&
- (!stripl || (strcmp(word + len - stripl, strip) == 0)) &&
- ((MAXTEMPWORDLEN) > (len + appndl - stripl))) {
- /* we have a match so add suffix */
- strncpy(tword, word, MAXTEMPWORDLEN-1);
- tword[MAXTEMPWORDLEN-1] = '\0';
- if (appndl) {
- strcpy(tword + len - stripl, appnd);
- } else {
- *(tword + len - stripl) = '\0';
- }
- return mystrdup(tword);
- }
- return NULL;
-}
-
-inline char * SfxEntry::nextchar(char * p) {
- if (p) {
- p++;
- if (opts & aeLONGCOND) {
- // jump to the 2nd part of the condition
- if (p == c.l.conds1 + MAXCONDLEN_1) return c.l.conds2;
- // end of the MAXCONDLEN length condition
- } else if (p == c.conds + MAXCONDLEN) return NULL;
- return *p ? p : NULL;
- }
- return NULL;
-}
-
-inline int SfxEntry::test_condition(const char * st, const char * beg)
-{
- const char * pos = NULL; // group with pos input position
- bool neg = false; // complementer
- bool ingroup = false; // character in the group
- if (numconds == 0) return 1;
- char * p = c.conds;
- st--;
- int i = 1;
- while (1) {
- switch (*p) {
- case '\0':
- return 1;
- case '[':
- p = nextchar(p);
- pos = st;
- break;
- case '^':
- p = nextchar(p);
- neg = true;
- break;
- case ']':
- if (!neg && !ingroup)
- return 0;
- i++;
- // skip the next character
- if (!ingroup)
- {
- for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
- st--;
- }
- pos = NULL;
- neg = false;
- ingroup = false;
- p = nextchar(p);
- if (st < beg && p)
- return 0; // word <= condition
- break;
- case '.':
- if (!pos)
- {
- // dots are not metacharacters in groups: [.]
- p = nextchar(p);
- // skip the next character
- for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
- if (st < beg) { // word <= condition
- if (p) return 0; else return 1;
- }
- if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character
- st--;
- if (st < beg) { // word <= condition
- if (p) return 0; else return 1;
- }
- }
- break;
- }
- /* FALLTHROUGH */
- default: {
- if (*st == *p) {
- p = nextchar(p);
- if ((opts & aeUTF8) && (*st & 0x80)) {
- st--;
- while (p && (st >= beg)) {
- if (*p != *st) {
- if (!pos) return 0;
- st = pos;
- break;
- }
- // first byte of the UTF-8 multibyte character
- if ((*p & 0xc0) != 0x80) break;
- p = nextchar(p);
- st--;
- }
- if (pos && st != pos) {
- if (neg) return 0;
- else if (i == numconds) return 1;
- ingroup = true;
- while (p && *p != ']' && ((p = nextchar(p)) != NULL));
- st--;
- }
- if (p && *p != ']') p = nextchar(p);
- } else if (pos) {
- if (neg) return 0;
- else if (i == numconds) return 1;
- ingroup = true;
- while (p && *p != ']' && ((p = nextchar(p)) != NULL));
-// if (p && *p != ']') p = nextchar(p);
- st--;
- }
- if (!pos) {
- i++;
- st--;
- }
- if (st < beg && p && *p != ']') return 0; // word <= condition
- } else if (pos) { // group
- p = nextchar(p);
- } else return 0;
- }
- }
- if (!p) return 1;
- }
-}
-
-// see if this suffix is present in the word
-struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
- PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
- const FLAG badflag)
-{
- int tmpl; // length of tmpword
- struct hentry * he; // hash entry pointer
- unsigned char * cp;
- char tmpword[MAXTEMPWORDLEN];
- PfxEntry* ep = ppfx;
-
- // if this suffix is being cross checked with a prefix
- // but it does not support cross products skip it
-
- if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0))
- return NULL;
-
- // upon entry suffix is 0 length or already matches the end of the word.
- // So if the remaining root word has positive length
- // and if there are enough chars in root word and added back strip chars
- // to meet the number of characters conditions, then test it
-
- tmpl = len - appndl;
- // the second condition is not enough for UTF-8 strings
- // it checked in test_condition()
-
- if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
- (tmpl + stripl >= numconds)) {
-
- // generate new root word by removing suffix and adding
- // back any characters that would have been stripped or
- // or null terminating the shorter string
-
- strncpy (tmpword, word, MAXTEMPWORDLEN-1);
- tmpword[MAXTEMPWORDLEN-1] = '\0';
- cp = (unsigned char *)(tmpword + tmpl);
- if (stripl) {
- strcpy ((char *)cp, strip);
- tmpl += stripl;
- cp = (unsigned char *)(tmpword + tmpl);
- } else *cp = '\0';
-
- // now make sure all of the conditions on characters
- // are met. Please see the appendix at the end of
- // this file for more info on exactly what is being
- // tested
-
- // if all conditions are met then check if resulting
- // root word in the dictionary
-
- if (test_condition((char *) cp, (char *) tmpword)) {
-
-#ifdef SZOSZABLYA_POSSIBLE_ROOTS
- fprintf(stdout,"%s %s %c\n", word, tmpword, aflag);
-#endif
- if ((he = pmyMgr->lookup(tmpword)) != NULL) {
- do {
- // check conditional suffix (enabled by prefix)
- if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() &&
- TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
- (((optflags & aeXPRODUCT) == 0) ||
- (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) ||
- // enabled by prefix
- ((contclass) && (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))
- ) &&
- // handle cont. class
- ((!cclass) ||
- ((contclass) && TESTAFF(contclass, cclass, contclasslen))
- ) &&
- // check only in compound homonyms (bad flags)
- (!badflag || !TESTAFF(he->astr, badflag, he->alen)
- ) &&
- // handle required flag
- ((!needflag) ||
- (TESTAFF(he->astr, needflag, he->alen) ||
- ((contclass) && TESTAFF(contclass, needflag, contclasslen)))
- )
- ) return he;
- he = he->next_homonym; // check homonyms
- } while (he);
-
- // obsolote stemming code (used only by the
- // experimental SuffixMgr:suggest_pos_stems)
- // store resulting root in wlst
- } else if (wlst && (*ns < maxSug)) {
- int cwrd = 1;
- for (int k=0; k < *ns; k++)
- if (strcmp(tmpword, wlst[k]) == 0) {
- cwrd = 0;
- break;
- }
- if (cwrd) {
- wlst[*ns] = mystrdup(tmpword);
- if (wlst[*ns] == NULL) {
- for (int j=0; j<*ns; j++) free(wlst[j]);
- *ns = -1;
- return NULL;
- }
- (*ns)++;
- }
- }
- }
- }
- return NULL;
-}
-
-// see if two-level suffix is present in the word
-struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
- PfxEntry* ppfx, const FLAG needflag)
-{
- int tmpl; // length of tmpword
- struct hentry * he; // hash entry pointer
- unsigned char * cp;
- char tmpword[MAXTEMPWORDLEN];
- PfxEntry* ep = ppfx;
-
-
- // if this suffix is being cross checked with a prefix
- // but it does not support cross products skip it
-
- if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
- return NULL;
-
- // upon entry suffix is 0 length or already matches the end of the word.
- // So if the remaining root word has positive length
- // and if there are enough chars in root word and added back strip chars
- // to meet the number of characters conditions, then test it
-
- tmpl = len - appndl;
-
- if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
- (tmpl + stripl >= numconds)) {
-
- // generate new root word by removing suffix and adding
- // back any characters that would have been stripped or
- // or null terminating the shorter string
-
- strncpy(tmpword, word, MAXTEMPWORDLEN-1);
- tmpword[MAXTEMPWORDLEN-1] = '\0';
- cp = (unsigned char *)(tmpword + tmpl);
- if (stripl) {
- strcpy ((char *)cp, strip);
- tmpl += stripl;
- cp = (unsigned char *)(tmpword + tmpl);
- } else *cp = '\0';
-
- // now make sure all of the conditions on characters
- // are met. Please see the appendix at the end of
- // this file for more info on exactly what is being
- // tested
-
- // if all conditions are met then recall suffix_check
-
- if (test_condition((char *) cp, (char *) tmpword)) {
- if (ppfx) {
- // handle conditional suffix
- if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
- he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, (FLAG) aflag, needflag);
- else
- he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0, NULL, (FLAG) aflag, needflag);
- } else {
- he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, (FLAG) aflag, needflag);
- }
- if (he) return he;
- }
- }
- return NULL;
-}
-
-// see if two-level suffix is present in the word
-char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
- PfxEntry* ppfx, const FLAG needflag)
-{
- int tmpl; // length of tmpword
- unsigned char * cp;
- char tmpword[MAXTEMPWORDLEN];
- PfxEntry* ep = ppfx;
- char * st;
-
- char result[MAXLNLEN];
-
- *result = '\0';
-
- // if this suffix is being cross checked with a prefix
- // but it does not support cross products skip it
-
- if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
- return NULL;
-
- // upon entry suffix is 0 length or already matches the end of the word.
- // So if the remaining root word has positive length
- // and if there are enough chars in root word and added back strip chars
- // to meet the number of characters conditions, then test it
-
- tmpl = len - appndl;
-
- if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
- (tmpl + stripl >= numconds)) {
-
- // generate new root word by removing suffix and adding
- // back any characters that would have been stripped or
- // or null terminating the shorter string
-
- strncpy(tmpword, word, MAXTEMPWORDLEN-1);
- tmpword[MAXTEMPWORDLEN-1] = '\0';
- cp = (unsigned char *)(tmpword + tmpl);
- if (stripl) {
- strcpy ((char *)cp, strip);
- tmpl += stripl;
- cp = (unsigned char *)(tmpword + tmpl);
- } else *cp = '\0';
-
- // now make sure all of the conditions on characters
- // are met. Please see the appendix at the end of
- // this file for more info on exactly what is being
- // tested
-
- // if all conditions are met then recall suffix_check
-
- if (test_condition((char *) cp, (char *) tmpword)) {
- if (ppfx) {
- // handle conditional suffix
- if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
- st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
- if (st) {
- if (ppfx->getMorph()) {
- mystrcat(result, ppfx->getMorph(), MAXLNLEN);
- mystrcat(result, " ", MAXLNLEN);
- }
- mystrcat(result,st, MAXLNLEN);
- free(st);
- mychomp(result);
- }
- } else {
- st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, needflag);
- if (st) {
- mystrcat(result, st, MAXLNLEN);
- free(st);
- mychomp(result);
- }
- }
- } else {
- st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
- if (st) {
- mystrcat(result, st, MAXLNLEN);
- free(st);
- mychomp(result);
- }
- }
- if (*result) return mystrdup(result);
- }
- }
- return NULL;
-}
-
-// get next homonym with same affix
-struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, PfxEntry* ppfx,
- const FLAG cclass, const FLAG needflag)
-{
- PfxEntry* ep = ppfx;
- FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
-
- while (he->next_homonym) {
- he = he->next_homonym;
- if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
- ((optflags & aeXPRODUCT) == 0 ||
- TESTAFF(he->astr, eFlag, he->alen) ||
- // handle conditional suffix
- ((contclass) && TESTAFF(contclass, eFlag, contclasslen))
- ) &&
- // handle cont. class
- ((!cclass) ||
- ((contclass) && TESTAFF(contclass, cclass, contclasslen))
- ) &&
- // handle required flag
- ((!needflag) ||
- (TESTAFF(he->astr, needflag, he->alen) ||
- ((contclass) && TESTAFF(contclass, needflag, contclasslen)))
- )
- ) return he;
- }
- return NULL;
-}
-
-
-#if 0
-
-Appendix: Understanding Affix Code
-
-
-An affix is either a prefix or a suffix attached to root words to make
-other words.
-
-Basically a Prefix or a Suffix is set of AffEntry objects
-which store information about the prefix or suffix along
-with supporting routines to check if a word has a particular
-prefix or suffix or a combination.
-
-The structure affentry is defined as follows:
-
-struct affentry
-{
- unsigned short aflag; // ID used to represent the affix
- char * strip; // string to strip before adding affix
- char * appnd; // the affix string to add
- unsigned char stripl; // length of the strip string
- unsigned char appndl; // length of the affix string
- char numconds; // the number of conditions that must be met
- char opts; // flag: aeXPRODUCT- combine both prefix and suffix
- char conds[SETSIZE]; // array which encodes the conditions to be met
-};
-
-
-Here is a suffix borrowed from the en_US.aff file. This file
-is whitespace delimited.
-
-SFX D Y 4
-SFX D 0 e d
-SFX D y ied [^aeiou]y
-SFX D 0 ed [^ey]
-SFX D 0 ed [aeiou]y
-
-This information can be interpreted as follows:
-
-In the first line has 4 fields
-
-Field
------
-1 SFX - indicates this is a suffix
-2 D - is the name of the character flag which represents this suffix
-3 Y - indicates it can be combined with prefixes (cross product)
-4 4 - indicates that sequence of 4 affentry structures are needed to
- properly store the affix information
-
-The remaining lines describe the unique information for the 4 SfxEntry
-objects that make up this affix. Each line can be interpreted
-as follows: (note fields 1 and 2 are as a check against line 1 info)
-
-Field
------
-1 SFX - indicates this is a suffix
-2 D - is the name of the character flag for this affix
-3 y - the string of chars to strip off before adding affix
- (a 0 here indicates the NULL string)
-4 ied - the string of affix characters to add
-5 [^aeiou]y - the conditions which must be met before the affix
- can be applied
-
-Field 5 is interesting. Since this is a suffix, field 5 tells us that
-there are 2 conditions that must be met. The first condition is that
-the next to the last character in the word must *NOT* be any of the
-following "a", "e", "i", "o" or "u". The second condition is that
-the last character of the word must end in "y".
-
-So how can we encode this information concisely and be able to
-test for both conditions in a fast manner? The answer is found
-but studying the wonderful ispell code of Geoff Kuenning, et.al.
-(now available under a normal BSD license).
-
-If we set up a conds array of 256 bytes indexed (0 to 255) and access it
-using a character (cast to an unsigned char) of a string, we have 8 bits
-of information we can store about that character. Specifically we
-could use each bit to say if that character is allowed in any of the
-last (or first for prefixes) 8 characters of the word.
-
-Basically, each character at one end of the word (up to the number
-of conditions) is used to index into the conds array and the resulting
-value found there says whether the that character is valid for a
-specific character position in the word.
-
-For prefixes, it does this by setting bit 0 if that char is valid
-in the first position, bit 1 if valid in the second position, and so on.
-
-If a bit is not set, then that char is not valid for that postion in the
-word.
-
-If working with suffixes bit 0 is used for the character closest
-to the front, bit 1 for the next character towards the end, ...,
-with bit numconds-1 representing the last char at the end of the string.
-
-Note: since entries in the conds[] are 8 bits, only 8 conditions
-(read that only 8 character positions) can be examined at one
-end of a word (the beginning for prefixes and the end for suffixes.
-
-So to make this clearer, lets encode the conds array values for the
-first two affentries for the suffix D described earlier.
-
-
- For the first affentry:
- numconds = 1 (only examine the last character)
-
- conds['e'] = (1 << 0) (the word must end in an E)
- all others are all 0
-
- For the second affentry:
- numconds = 2 (only examine the last two characters)
-
- conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
- where X is all characters *but* a, e, i, o, or u
-
-
- conds['y'] = (1 << 1) (the last char must be a y)
- all other bits for all other entries in the conds array are zero
-
-
-#endif
-