smaller unifired project for hunspell

git-svn-id: http://svn.miranda-ng.org/main/trunk@16202 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
author: George Hazan <george.hazan@gmail.com> 2016-01-31 16:14:52 +0000
committer: George Hazan <george.hazan@gmail.com> 2016-01-31 16:14:52 +0000
commit: 5707c6b2b1eafbf38ee7c14f39e42c2280d294ea (patch)
tree: cefc6ec223a91562809b3a885cbde5b44f4fc798 /libs/hunspell/src/affentry.cxx
parent: 62767bfaf7f7fb988b826da797463545db14b3b5 (diff)
1 files changed, 0 insertions, 1107 deletions
diff --git a/libs/hunspell/src/affentry.cxx b/libs/hunspell/src/affentry.cxx
deleted file mode 100644
index 983fe2c1ec..0000000000
--- a/libs/hunspell/src/affentry.cxx
+++ /dev/null
@@ -1,1107 +0,0 @@
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * The Original Code is Hunspell, based on MySpell.
- *
- * The Initial Developers of the Original Code are
- * Kevin Hendricks (MySpell) and Németh László (Hunspell).
- * Portions created by the Initial Developers are Copyright (C) 2002-2005
- * the Initial Developers. All Rights Reserved.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-/*
- * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
- * And Contributors.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * 3. All modifications to the source code must be clearly marked as
- *    such.  Binary redistributions based on modified source code
- *    must be clearly marked as modified versions in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
- * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <ctype.h>
-
-#include "affentry.hxx"
-#include "csutil.hxx"
-
-#define MAXTEMPWORDLEN (MAXWORDUTF8LEN + 4)
-
-PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
-    // register affix manager
-    : pmyMgr(pmgr),
-      next(NULL),
-      nexteq(NULL),
-      nextne(NULL),
-      flgnxt(NULL) {
-  // set up its initial values
-  aflag = dp->aflag;        // flag
-  strip = dp->strip;        // string to strip
-  appnd = dp->appnd;        // string to append
-  numconds = dp->numconds;  // length of the condition
-  opts = dp->opts;          // cross product flag
-  // then copy over all of the conditions
-  if (opts & aeLONGCOND) {
-    memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
-    c.l.conds2 = dp->c.l.conds2;
-  } else
-    memcpy(c.conds, dp->c.conds, MAXCONDLEN);
-  morphcode = dp->morphcode;
-  contclass = dp->contclass;
-  contclasslen = dp->contclasslen;
-}
-
-PfxEntry::~PfxEntry() {
-  aflag = 0;
-  pmyMgr = NULL;
-  if (opts & aeLONGCOND)
-    free(c.l.conds2);
-  if (morphcode && !(opts & aeALIASM))
-    free(morphcode);
-  if (contclass && !(opts & aeALIASF))
-    free(contclass);
-}
-
-// add prefix to this word assuming conditions hold
-char* PfxEntry::add(const char* word, int len) {
-  if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
-      (len >= numconds) && test_condition(word) &&
-      (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0)) &&
-      ((MAXTEMPWORDLEN) > (len + appnd.size() - strip.size()))) {
-    /* we have a match so add prefix */
-    std::string tword(appnd);
-    tword.append(word + strip.size());
-    return mystrdup(tword.c_str());
-  }
-  return NULL;
-}
-
-inline char* PfxEntry::nextchar(char* p) {
-  if (p) {
-    p++;
-    if (opts & aeLONGCOND) {
-      // jump to the 2nd part of the condition
-      if (p == c.conds + MAXCONDLEN_1)
-        return c.l.conds2;
-      // end of the MAXCONDLEN length condition
-    } else if (p == c.conds + MAXCONDLEN)
-      return NULL;
-    return *p ? p : NULL;
-  }
-  return NULL;
-}
-
-inline int PfxEntry::test_condition(const char* st) {
-  const char* pos = NULL;  // group with pos input position
-  bool neg = false;        // complementer
-  bool ingroup = false;    // character in the group
-  if (numconds == 0)
-    return 1;
-  char* p = c.conds;
-  while (1) {
-    switch (*p) {
-      case '\0':
-        return 1;
-      case '[': {
-        neg = false;
-        ingroup = false;
-        p = nextchar(p);
-        pos = st;
-        break;
-      }
-      case '^': {
-        p = nextchar(p);
-        neg = true;
-        break;
-      }
-      case ']': {
-        if ((neg && ingroup) || (!neg && !ingroup))
-          return 0;
-        pos = NULL;
-        p = nextchar(p);
-        // skip the next character
-        if (!ingroup && *st)
-          for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
-            ;
-        if (*st == '\0' && p)
-          return 0;  // word <= condition
-        break;
-      }
-      case '.':
-        if (!pos) {  // dots are not metacharacters in groups: [.]
-          p = nextchar(p);
-          // skip the next character
-          for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
-            ;
-          if (*st == '\0' && p)
-            return 0;  // word <= condition
-          break;
-        }
-      /* FALLTHROUGH */
-      default: {
-        if (*st == *p) {
-          st++;
-          p = nextchar(p);
-          if ((opts & aeUTF8) && (*(st - 1) & 0x80)) {  // multibyte
-            while (p && (*p & 0xc0) == 0x80) {          // character
-              if (*p != *st) {
-                if (!pos)
-                  return 0;
-                st = pos;
-                break;
-              }
-              p = nextchar(p);
-              st++;
-            }
-            if (pos && st != pos) {
-              ingroup = true;
-              while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
-              }
-            }
-          } else if (pos) {
-            ingroup = true;
-            while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
-            }
-          }
-        } else if (pos) {  // group
-          p = nextchar(p);
-        } else
-          return 0;
-      }
-    }
-    if (!p)
-      return 1;
-  }
-}
-
-// check if this prefix entry matches
-struct hentry* PfxEntry::checkword(const char* word,
-                                   int len,
-                                   char in_compound,
-                                   const FLAG needflag) {
-  int tmpl;           // length of tmpword
-  struct hentry* he;  // hash entry of root word or NULL
-  char tmpword[MAXTEMPWORDLEN];
-
-  // on entry prefix is 0 length or already matches the beginning of the word.
-  // So if the remaining root word has positive length
-  // and if there are enough chars in root word and added back strip chars
-  // to meet the number of characters conditions, then test it
-
-  tmpl = len - appnd.size();
-
-  if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
-    // generate new root word by removing prefix and adding
-    // back any characters that would have been stripped
-
-    if (strip.size()) {
-      strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1);
-      tmpword[MAXTEMPWORDLEN - 1] = '\0';
-    }
-    strcpy((tmpword + strip.size()), (word + appnd.size()));
-
-    // now make sure all of the conditions on characters
-    // are met.  Please see the appendix at the end of
-    // this file for more info on exactly what is being
-    // tested
-
-    // if all conditions are met then check if resulting
-    // root word in the dictionary
-
-    if (test_condition(tmpword)) {
-      tmpl += strip.size();
-      if ((he = pmyMgr->lookup(tmpword)) != NULL) {
-        do {
-          if (TESTAFF(he->astr, aflag, he->alen) &&
-              // forbid single prefixes with needaffix flag
-              !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
-              // needflag
-              ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
-               (contclass && TESTAFF(contclass, needflag, contclasslen))))
-            return he;
-          he = he->next_homonym;  // check homonyms
-        } while (he);
-      }
-
-      // prefix matched but no root word was found
-      // if aeXPRODUCT is allowed, try again but now
-      // ross checked combined with a suffix
-
-      // if ((opts & aeXPRODUCT) && in_compound) {
-      if ((opts & aeXPRODUCT)) {
-        he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, this, NULL, 0,
-                                  NULL, FLAG_NULL, needflag, in_compound);
-        if (he)
-          return he;
-      }
-    }
-  }
-  return NULL;
-}
-
-// check if this prefix entry matches
-struct hentry* PfxEntry::check_twosfx(const char* word,
-                                      int len,
-                                      char in_compound,
-                                      const FLAG needflag) {
-  int tmpl;           // length of tmpword
-  struct hentry* he;  // hash entry of root word or NULL
-  char tmpword[MAXTEMPWORDLEN];
-
-  // on entry prefix is 0 length or already matches the beginning of the word.
-  // So if the remaining root word has positive length
-  // and if there are enough chars in root word and added back strip chars
-  // to meet the number of characters conditions, then test it
-
-  tmpl = len - appnd.size();
-
-  if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
-      (tmpl + strip.size() >= numconds)) {
-    // generate new root word by removing prefix and adding
-    // back any characters that would have been stripped
-
-    if (strip.size()) {
-      strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1);
-      tmpword[MAXTEMPWORDLEN - 1] = '\0';
-    }
-    strcpy((tmpword + strip.size()), (word + appnd.size()));
-
-    // now make sure all of the conditions on characters
-    // are met.  Please see the appendix at the end of
-    // this file for more info on exactly what is being
-    // tested
-
-    // if all conditions are met then check if resulting
-    // root word in the dictionary
-
-    if (test_condition(tmpword)) {
-      tmpl += strip.size();
-
-      // prefix matched but no root word was found
-      // if aeXPRODUCT is allowed, try again but now
-      // cross checked combined with a suffix
-
-      if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
-        he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, this,
-                                         needflag);
-        if (he)
-          return he;
-      }
-    }
-  }
-  return NULL;
-}
-
-// check if this prefix entry matches
-char* PfxEntry::check_twosfx_morph(const char* word,
-                                   int len,
-                                   char in_compound,
-                                   const FLAG needflag) {
-  int tmpl;  // length of tmpword
-  char tmpword[MAXTEMPWORDLEN];
-
-  // on entry prefix is 0 length or already matches the beginning of the word.
-  // So if the remaining root word has positive length
-  // and if there are enough chars in root word and added back strip chars
-  // to meet the number of characters conditions, then test it
-
-  tmpl = len - appnd.size();
-
-  if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
-      (tmpl + strip.size() >= numconds)) {
-    // generate new root word by removing prefix and adding
-    // back any characters that would have been stripped
-
-    if (strip.size()) {
-      strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1);
-      tmpword[MAXTEMPWORDLEN - 1] = '\0';
-    }
-    strcpy((tmpword + strip.size()), (word + appnd.size()));
-
-    // now make sure all of the conditions on characters
-    // are met.  Please see the appendix at the end of
-    // this file for more info on exactly what is being
-    // tested
-
-    // if all conditions are met then check if resulting
-    // root word in the dictionary
-
-    if (test_condition(tmpword)) {
-      tmpl += strip.size();
-
-      // prefix matched but no root word was found
-      // if aeXPRODUCT is allowed, try again but now
-      // ross checked combined with a suffix
-
-      if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
-        return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl, aeXPRODUCT,
-                                                 this, needflag);
-      }
-    }
-  }
-  return NULL;
-}
-
-// check if this prefix entry matches
-char* PfxEntry::check_morph(const char* word,
-                            int len,
-                            char in_compound,
-                            const FLAG needflag) {
-  int tmpl;           // length of tmpword
-  struct hentry* he;  // hash entry of root word or NULL
-  char tmpword[MAXTEMPWORDLEN];
-  char result[MAXLNLEN];
-  char* st;
-
-  *result = '\0';
-
-  // on entry prefix is 0 length or already matches the beginning of the word.
-  // So if the remaining root word has positive length
-  // and if there are enough chars in root word and added back strip chars
-  // to meet the number of characters conditions, then test it
-
-  tmpl = len - appnd.size();
-
-  if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
-      (tmpl + strip.size() >= numconds)) {
-    // generate new root word by removing prefix and adding
-    // back any characters that would have been stripped
-
-    if (strip.size()) {
-      strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1);
-      tmpword[MAXTEMPWORDLEN - 1] = '\0';
-    }
-    strcpy(tmpword + strip.size(), word + appnd.size());
-
-    // now make sure all of the conditions on characters
-    // are met.  Please see the appendix at the end of
-    // this file for more info on exactly what is being
-    // tested
-
-    // if all conditions are met then check if resulting
-    // root word in the dictionary
-
-    if (test_condition(tmpword)) {
-      tmpl += strip.size();
-      if ((he = pmyMgr->lookup(tmpword)) != NULL) {
-        do {
-          if (TESTAFF(he->astr, aflag, he->alen) &&
-              // forbid single prefixes with needaffix flag
-              !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
-              // needflag
-              ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
-               (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
-            if (morphcode) {
-              mystrcat(result, " ", MAXLNLEN);
-              mystrcat(result, morphcode, MAXLNLEN);
-            } else
-              mystrcat(result, getKey(), MAXLNLEN);
-            if (!HENTRY_FIND(he, MORPH_STEM)) {
-              mystrcat(result, " ", MAXLNLEN);
-              mystrcat(result, MORPH_STEM, MAXLNLEN);
-              mystrcat(result, HENTRY_WORD(he), MAXLNLEN);
-            }
-            // store the pointer of the hash entry
-            if (HENTRY_DATA(he)) {
-              mystrcat(result, " ", MAXLNLEN);
-              mystrcat(result, HENTRY_DATA2(he), MAXLNLEN);
-            } else {
-              // return with debug information
-              char* flag = pmyMgr->encode_flag(getFlag());
-              mystrcat(result, " ", MAXLNLEN);
-              mystrcat(result, MORPH_FLAG, MAXLNLEN);
-              mystrcat(result, flag, MAXLNLEN);
-              free(flag);
-            }
-            mystrcat(result, "\n", MAXLNLEN);
-          }
-          he = he->next_homonym;
-        } while (he);
-      }
-
-      // prefix matched but no root word was found
-      // if aeXPRODUCT is allowed, try again but now
-      // ross checked combined with a suffix
-
-      if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
-        st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, this,
-                                        FLAG_NULL, needflag);
-        if (st) {
-          mystrcat(result, st, MAXLNLEN);
-          free(st);
-        }
-      }
-    }
-  }
-
-  if (*result)
-    return mystrdup(result);
-  return NULL;
-}
-
-SfxEntry::SfxEntry(AffixMgr* pmgr, affentry* dp)
-    : pmyMgr(pmgr)  // register affix manager
-      ,
-      next(NULL),
-      nexteq(NULL),
-      nextne(NULL),
-      flgnxt(NULL),
-      l_morph(NULL),
-      r_morph(NULL),
-      eq_morph(NULL) {
-  // set up its initial values
-  aflag = dp->aflag;        // char flag
-  strip = dp->strip;        // string to strip
-  appnd = dp->appnd;        // string to append
-  numconds = dp->numconds;  // length of the condition
-  opts = dp->opts;          // cross product flag
-
-  // then copy over all of the conditions
-  if (opts & aeLONGCOND) {
-    memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
-    c.l.conds2 = dp->c.l.conds2;
-  } else
-    memcpy(c.conds, dp->c.conds, MAXCONDLEN);
-  rappnd = myrevstrdup(appnd.c_str());
-  morphcode = dp->morphcode;
-  contclass = dp->contclass;
-  contclasslen = dp->contclasslen;
-}
-
-SfxEntry::~SfxEntry() {
-  aflag = 0;
-  if (rappnd)
-    free(rappnd);
-  pmyMgr = NULL;
-  if (opts & aeLONGCOND)
-    free(c.l.conds2);
-  if (morphcode && !(opts & aeALIASM))
-    free(morphcode);
-  if (contclass && !(opts & aeALIASF))
-    free(contclass);
-}
-
-// add suffix to this word assuming conditions hold
-char* SfxEntry::add(const char* word, int len) {
-  /* make sure all conditions match */
-  if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
-      (len >= numconds) && test_condition(word + len, word) &&
-      (!strip.size() ||
-       (strcmp(word + len - strip.size(), strip.c_str()) == 0)) &&
-      ((MAXTEMPWORDLEN) > (len + appnd.size() - strip.size()))) {
-    std::string tword(word);
-    /* we have a match so add suffix */
-    tword.replace(len - strip.size(), std::string::npos, appnd);
-    return mystrdup(tword.c_str());
-  }
-  return NULL;
-}
-
-inline char* SfxEntry::nextchar(char* p) {
-  if (p) {
-    p++;
-    if (opts & aeLONGCOND) {
-      // jump to the 2nd part of the condition
-      if (p == c.l.conds1 + MAXCONDLEN_1)
-        return c.l.conds2;
-      // end of the MAXCONDLEN length condition
-    } else if (p == c.conds + MAXCONDLEN)
-      return NULL;
-    return *p ? p : NULL;
-  }
-  return NULL;
-}
-
-inline int SfxEntry::test_condition(const char* st, const char* beg) {
-  const char* pos = NULL;  // group with pos input position
-  bool neg = false;        // complementer
-  bool ingroup = false;    // character in the group
-  if (numconds == 0)
-    return 1;
-  char* p = c.conds;
-  st--;
-  int i = 1;
-  while (1) {
-    switch (*p) {
-      case '\0':
-        return 1;
-      case '[':
-        p = nextchar(p);
-        pos = st;
-        break;
-      case '^':
-        p = nextchar(p);
-        neg = true;
-        break;
-      case ']':
-        if (!neg && !ingroup)
-          return 0;
-        i++;
-        // skip the next character
-        if (!ingroup) {
-          for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--)
-            ;
-          st--;
-        }
-        pos = NULL;
-        neg = false;
-        ingroup = false;
-        p = nextchar(p);
-        if (st < beg && p)
-          return 0;  // word <= condition
-        break;
-      case '.':
-        if (!pos) {
-          // dots are not metacharacters in groups: [.]
-          p = nextchar(p);
-          // skip the next character
-          for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80;
-               st--)
-            ;
-          if (st < beg) {  // word <= condition
-            if (p)
-              return 0;
-            else
-              return 1;
-          }
-          if ((opts & aeUTF8) && (*st & 0x80)) {  // head of the UTF-8 character
-            st--;
-            if (st < beg) {  // word <= condition
-              if (p)
-                return 0;
-              else
-                return 1;
-            }
-          }
-          break;
-        }
-      /* FALLTHROUGH */
-      default: {
-        if (*st == *p) {
-          p = nextchar(p);
-          if ((opts & aeUTF8) && (*st & 0x80)) {
-            st--;
-            while (p && (st >= beg)) {
-              if (*p != *st) {
-                if (!pos)
-                  return 0;
-                st = pos;
-                break;
-              }
-              // first byte of the UTF-8 multibyte character
-              if ((*p & 0xc0) != 0x80)
-                break;
-              p = nextchar(p);
-              st--;
-            }
-            if (pos && st != pos) {
-              if (neg)
-                return 0;
-              else if (i == numconds)
-                return 1;
-              ingroup = true;
-              while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
-              }
-              st--;
-            }
-            if (p && *p != ']')
-              p = nextchar(p);
-          } else if (pos) {
-            if (neg)
-              return 0;
-            else if (i == numconds)
-              return 1;
-            ingroup = true;
-            while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
-            }
-            //			if (p && *p != ']') p = nextchar(p);
-            st--;
-          }
-          if (!pos) {
-            i++;
-            st--;
-          }
-          if (st < beg && p && *p != ']')
-            return 0;      // word <= condition
-        } else if (pos) {  // group
-          p = nextchar(p);
-        } else
-          return 0;
-      }
-    }
-    if (!p)
-      return 1;
-  }
-}
-
-// see if this suffix is present in the word
-struct hentry* SfxEntry::checkword(const char* word,
-                                   int len,
-                                   int optflags,
-                                   PfxEntry* ppfx,
-                                   char** wlst,
-                                   int maxSug,
-                                   int* ns,
-                                   const FLAG cclass,
-                                   const FLAG needflag,
-                                   const FLAG badflag) {
-  int tmpl;           // length of tmpword
-  struct hentry* he;  // hash entry pointer
-  unsigned char* cp;
-  char tmpword[MAXTEMPWORDLEN];
-  PfxEntry* ep = ppfx;
-
-  // if this suffix is being cross checked with a prefix
-  // but it does not support cross products skip it
-
-  if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0))
-    return NULL;
-
-  // upon entry suffix is 0 length or already matches the end of the word.
-  // So if the remaining root word has positive length
-  // and if there are enough chars in root word and added back strip chars
-  // to meet the number of characters conditions, then test it
-
-  tmpl = len - appnd.size();
-  // the second condition is not enough for UTF-8 strings
-  // it checked in test_condition()
-
-  if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
-      (tmpl + strip.size() >= numconds)) {
-    // generate new root word by removing suffix and adding
-    // back any characters that would have been stripped or
-    // or null terminating the shorter string
-
-    strncpy(tmpword, word, MAXTEMPWORDLEN - 1);
-    tmpword[MAXTEMPWORDLEN - 1] = '\0';
-    cp = (unsigned char*)(tmpword + tmpl);
-    if (strip.size()) {
-      strcpy((char*)cp, strip.c_str());
-      tmpl += strip.size();
-      cp = (unsigned char*)(tmpword + tmpl);
-    } else
-      *cp = '\0';
-
-    // now make sure all of the conditions on characters
-    // are met.  Please see the appendix at the end of
-    // this file for more info on exactly what is being
-    // tested
-
-    // if all conditions are met then check if resulting
-    // root word in the dictionary
-
-    if (test_condition((char*)cp, (char*)tmpword)) {
-#ifdef SZOSZABLYA_POSSIBLE_ROOTS
-      fprintf(stdout, "%s %s %c\n", word, tmpword, aflag);
-#endif
-      if ((he = pmyMgr->lookup(tmpword)) != NULL) {
-        do {
-          // check conditional suffix (enabled by prefix)
-          if ((TESTAFF(he->astr, aflag, he->alen) ||
-               (ep && ep->getCont() &&
-                TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
-              (((optflags & aeXPRODUCT) == 0) ||
-               (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) ||
-               // enabled by prefix
-               ((contclass) &&
-                (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))) &&
-              // handle cont. class
-              ((!cclass) ||
-               ((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
-              // check only in compound homonyms (bad flags)
-              (!badflag || !TESTAFF(he->astr, badflag, he->alen)) &&
-              // handle required flag
-              ((!needflag) ||
-               (TESTAFF(he->astr, needflag, he->alen) ||
-                ((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
-            return he;
-          he = he->next_homonym;  // check homonyms
-        } while (he);
-
-        // obsolote stemming code (used only by the
-        // experimental SuffixMgr:suggest_pos_stems)
-        // store resulting root in wlst
-      } else if (wlst && (*ns < maxSug)) {
-        int cwrd = 1;
-        for (int k = 0; k < *ns; k++)
-          if (strcmp(tmpword, wlst[k]) == 0) {
-            cwrd = 0;
-            break;
-          }
-        if (cwrd) {
-          wlst[*ns] = mystrdup(tmpword);
-          if (wlst[*ns] == NULL) {
-            for (int j = 0; j < *ns; j++)
-              free(wlst[j]);
-            *ns = -1;
-            return NULL;
-          }
-          (*ns)++;
-        }
-      }
-    }
-  }
-  return NULL;
-}
-
-// see if two-level suffix is present in the word
-struct hentry* SfxEntry::check_twosfx(const char* word,
-                                      int len,
-                                      int optflags,
-                                      PfxEntry* ppfx,
-                                      const FLAG needflag) {
-  int tmpl;           // length of tmpword
-  struct hentry* he;  // hash entry pointer
-  unsigned char* cp;
-  char tmpword[MAXTEMPWORDLEN];
-  PfxEntry* ep = ppfx;
-
-  // if this suffix is being cross checked with a prefix
-  // but it does not support cross products skip it
-
-  if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
-    return NULL;
-
-  // upon entry suffix is 0 length or already matches the end of the word.
-  // So if the remaining root word has positive length
-  // and if there are enough chars in root word and added back strip chars
-  // to meet the number of characters conditions, then test it
-
-  tmpl = len - appnd.size();
-
-  if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
-      (tmpl + strip.size() >= numconds)) {
-    // generate new root word by removing suffix and adding
-    // back any characters that would have been stripped or
-    // or null terminating the shorter string
-
-    strncpy(tmpword, word, MAXTEMPWORDLEN - 1);
-    tmpword[MAXTEMPWORDLEN - 1] = '\0';
-    cp = (unsigned char*)(tmpword + tmpl);
-    if (strip.size()) {
-      strcpy((char*)cp, strip.c_str());
-      tmpl += strip.size();
-      cp = (unsigned char*)(tmpword + tmpl);
-    } else
-      *cp = '\0';
-
-    // now make sure all of the conditions on characters
-    // are met.  Please see the appendix at the end of
-    // this file for more info on exactly what is being
-    // tested
-
-    // if all conditions are met then recall suffix_check
-
-    if (test_condition((char*)cp, (char*)tmpword)) {
-      if (ppfx) {
-        // handle conditional suffix
-        if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
-          he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL,
-                                    (FLAG)aflag, needflag);
-        else
-          he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0,
-                                    NULL, (FLAG)aflag, needflag);
-      } else {
-        he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL,
-                                  (FLAG)aflag, needflag);
-      }
-      if (he)
-        return he;
-    }
-  }
-  return NULL;
-}
-
-// see if two-level suffix is present in the word
-char* SfxEntry::check_twosfx_morph(const char* word,
-                                   int len,
-                                   int optflags,
-                                   PfxEntry* ppfx,
-                                   const FLAG needflag) {
-  int tmpl;  // length of tmpword
-  unsigned char* cp;
-  char tmpword[MAXTEMPWORDLEN];
-  PfxEntry* ep = ppfx;
-  char* st;
-
-  char result[MAXLNLEN];
-
-  *result = '\0';
-
-  // if this suffix is being cross checked with a prefix
-  // but it does not support cross products skip it
-
-  if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
-    return NULL;
-
-  // upon entry suffix is 0 length or already matches the end of the word.
-  // So if the remaining root word has positive length
-  // and if there are enough chars in root word and added back strip chars
-  // to meet the number of characters conditions, then test it
-
-  tmpl = len - appnd.size();
-
-  if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
-      (tmpl + strip.size() >= numconds)) {
-    // generate new root word by removing suffix and adding
-    // back any characters that would have been stripped or
-    // or null terminating the shorter string
-
-    strncpy(tmpword, word, MAXTEMPWORDLEN - 1);
-    tmpword[MAXTEMPWORDLEN - 1] = '\0';
-    cp = (unsigned char*)(tmpword + tmpl);
-    if (strip.size()) {
-      strcpy((char*)cp, strip.c_str());
-      tmpl += strip.size();
-      cp = (unsigned char*)(tmpword + tmpl);
-    } else
-      *cp = '\0';
-
-    // now make sure all of the conditions on characters
-    // are met.  Please see the appendix at the end of
-    // this file for more info on exactly what is being
-    // tested
-
-    // if all conditions are met then recall suffix_check
-
-    if (test_condition((char*)cp, (char*)tmpword)) {
-      if (ppfx) {
-        // handle conditional suffix
-        if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
-          st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag,
-                                          needflag);
-          if (st) {
-            if (ppfx->getMorph()) {
-              mystrcat(result, ppfx->getMorph(), MAXLNLEN);
-              mystrcat(result, " ", MAXLNLEN);
-            }
-            mystrcat(result, st, MAXLNLEN);
-            free(st);
-            mychomp(result);
-          }
-        } else {
-          st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag,
-                                          needflag);
-          if (st) {
-            mystrcat(result, st, MAXLNLEN);
-            free(st);
-            mychomp(result);
-          }
-        }
-      } else {
-        st =
-            pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
-        if (st) {
-          mystrcat(result, st, MAXLNLEN);
-          free(st);
-          mychomp(result);
-        }
-      }
-      if (*result)
-        return mystrdup(result);
-    }
-  }
-  return NULL;
-}
-
-// get next homonym with same affix
-struct hentry* SfxEntry::get_next_homonym(struct hentry* he,
-                                          int optflags,
-                                          PfxEntry* ppfx,
-                                          const FLAG cclass,
-                                          const FLAG needflag) {
-  PfxEntry* ep = ppfx;
-  FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
-
-  while (he->next_homonym) {
-    he = he->next_homonym;
-    if ((TESTAFF(he->astr, aflag, he->alen) ||
-         (ep && ep->getCont() &&
-          TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
-        ((optflags & aeXPRODUCT) == 0 || TESTAFF(he->astr, eFlag, he->alen) ||
-         // handle conditional suffix
-         ((contclass) && TESTAFF(contclass, eFlag, contclasslen))) &&
-        // handle cont. class
-        ((!cclass) ||
-         ((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
-        // handle required flag
-        ((!needflag) ||
-         (TESTAFF(he->astr, needflag, he->alen) ||
-          ((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
-      return he;
-  }
-  return NULL;
-}
-
-#if 0
-
-Appendix:  Understanding Affix Code
-
-
-An affix is either a  prefix or a suffix attached to root words to make 
-other words.
-
-Basically a Prefix or a Suffix is set of AffEntry objects
-which store information about the prefix or suffix along 
-with supporting routines to check if a word has a particular 
-prefix or suffix or a combination.
-
-The structure affentry is defined as follows:
-
-struct affentry
-{
-   unsigned short aflag;    // ID used to represent the affix
-   std::string strip;       // string to strip before adding affix
-   std::string appnd;       // the affix string to add
-   char numconds;           // the number of conditions that must be met
-   char opts;               // flag: aeXPRODUCT- combine both prefix and suffix 
-   char   conds[SETSIZE];   // array which encodes the conditions to be met
-};
-
-
-Here is a suffix borrowed from the en_US.aff file.  This file 
-is whitespace delimited.
-
-SFX D Y 4 
-SFX D   0     e          d
-SFX D   y     ied        [^aeiou]y
-SFX D   0     ed         [^ey]
-SFX D   0     ed         [aeiou]y
-
-This information can be interpreted as follows:
-
-In the first line has 4 fields
-
-Field
------
-1     SFX - indicates this is a suffix
-2     D   - is the name of the character flag which represents this suffix
-3     Y   - indicates it can be combined with prefixes (cross product)
-4     4   - indicates that sequence of 4 affentry structures are needed to
-               properly store the affix information
-
-The remaining lines describe the unique information for the 4 SfxEntry 
-objects that make up this affix.  Each line can be interpreted
-as follows: (note fields 1 and 2 are as a check against line 1 info)
-
-Field
------
-1     SFX         - indicates this is a suffix
-2     D           - is the name of the character flag for this affix
-3     y           - the string of chars to strip off before adding affix
-                         (a 0 here indicates the NULL string)
-4     ied         - the string of affix characters to add
-5     [^aeiou]y   - the conditions which must be met before the affix
-                    can be applied
-
-Field 5 is interesting.  Since this is a suffix, field 5 tells us that
-there are 2 conditions that must be met.  The first condition is that 
-the next to the last character in the word must *NOT* be any of the 
-following "a", "e", "i", "o" or "u".  The second condition is that
-the last character of the word must end in "y".
-
-So how can we encode this information concisely and be able to 
-test for both conditions in a fast manner?  The answer is found
-but studying the wonderful ispell code of Geoff Kuenning, et.al. 
-(now available under a normal BSD license).
-
-If we set up a conds array of 256 bytes indexed (0 to 255) and access it
-using a character (cast to an unsigned char) of a string, we have 8 bits
-of information we can store about that character.  Specifically we
-could use each bit to say if that character is allowed in any of the 
-last (or first for prefixes) 8 characters of the word.
-
-Basically, each character at one end of the word (up to the number 
-of conditions) is used to index into the conds array and the resulting 
-value found there says whether the that character is valid for a 
-specific character position in the word.  
-
-For prefixes, it does this by setting bit 0 if that char is valid 
-in the first position, bit 1 if valid in the second position, and so on. 
-
-If a bit is not set, then that char is not valid for that postion in the
-word.
-
-If working with suffixes bit 0 is used for the character closest 
-to the front, bit 1 for the next character towards the end, ..., 
-with bit numconds-1 representing the last char at the end of the string. 
-
-Note: since entries in the conds[] are 8 bits, only 8 conditions 
-(read that only 8 character positions) can be examined at one
-end of a word (the beginning for prefixes and the end for suffixes.
-
-So to make this clearer, lets encode the conds array values for the 
-first two affentries for the suffix D described earlier.
-
-
-  For the first affentry:    
-     numconds = 1             (only examine the last character)
-
-     conds['e'] =  (1 << 0)   (the word must end in an E)
-     all others are all 0
-
-  For the second affentry:
-     numconds = 2             (only examine the last two characters)     
-
-     conds[X] = conds[X] | (1 << 0)     (aeiou are not allowed)
-         where X is all characters *but* a, e, i, o, or u
-         
-
-     conds['y'] = (1 << 1)     (the last char must be a y)
-     all other bits for all other entries in the conds array are zero
-
-#endif
author	George Hazan <george.hazan@gmail.com>	2016-01-31 16:14:52 +0000
committer	George Hazan <george.hazan@gmail.com>	2016-01-31 16:14:52 +0000
commit	5707c6b2b1eafbf38ee7c14f39e42c2280d294ea (patch)
tree	cefc6ec223a91562809b3a885cbde5b44f4fc798 /libs/hunspell/src/affentry.cxx
parent	62767bfaf7f7fb988b826da797463545db14b3b5 (diff)