From 5707c6b2b1eafbf38ee7c14f39e42c2280d294ea Mon Sep 17 00:00:00 2001 From: George Hazan Date: Sun, 31 Jan 2016 16:14:52 +0000 Subject: smaller unifired project for hunspell git-svn-id: http://svn.miranda-ng.org/main/trunk@16202 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c --- libs/hunspell/hunspell.vcxproj | 129 +- libs/hunspell/hunspell.vcxproj.filters | 603 ---- libs/hunspell/src/affentry.c++ | 1107 +++++++ libs/hunspell/src/affentry.cxx | 1107 ------- libs/hunspell/src/affixmgr.c++ | 5128 ++++++++++++++++++++++++++++++++ libs/hunspell/src/affixmgr.cxx | 5128 -------------------------------- libs/hunspell/src/csutil.c++ | 3194 ++++++++++++++++++++ libs/hunspell/src/csutil.cxx | 3194 -------------------- libs/hunspell/src/dictmgr.c++ | 216 ++ libs/hunspell/src/dictmgr.cxx | 216 -- libs/hunspell/src/filemgr.c++ | 124 + libs/hunspell/src/filemgr.cxx | 124 - libs/hunspell/src/hashmgr.c++ | 1116 +++++++ libs/hunspell/src/hashmgr.cxx | 1116 ------- libs/hunspell/src/hunspell.c++ | 2367 +++++++++++++++ libs/hunspell/src/hunspell.cxx | 2367 --------------- libs/hunspell/src/hunspelldll.c | 57 - libs/hunspell/src/hunzip.c++ | 263 ++ libs/hunspell/src/hunzip.cxx | 263 -- libs/hunspell/src/phonet.c++ | 285 ++ libs/hunspell/src/phonet.cxx | 285 -- libs/hunspell/src/replist.c++ | 175 ++ libs/hunspell/src/replist.cxx | 175 -- libs/hunspell/src/suggestmgr.c++ | 2376 +++++++++++++++ libs/hunspell/src/suggestmgr.cxx | 2376 --------------- 25 files changed, 16359 insertions(+), 17132 deletions(-) create mode 100644 libs/hunspell/src/affentry.c++ delete mode 100644 libs/hunspell/src/affentry.cxx create mode 100644 libs/hunspell/src/affixmgr.c++ delete mode 100644 libs/hunspell/src/affixmgr.cxx create mode 100644 libs/hunspell/src/csutil.c++ delete mode 100644 libs/hunspell/src/csutil.cxx create mode 100644 libs/hunspell/src/dictmgr.c++ delete mode 100644 libs/hunspell/src/dictmgr.cxx create mode 100644 libs/hunspell/src/filemgr.c++ delete mode 100644 libs/hunspell/src/filemgr.cxx create mode 100644 libs/hunspell/src/hashmgr.c++ delete mode 100644 libs/hunspell/src/hashmgr.cxx create mode 100644 libs/hunspell/src/hunspell.c++ delete mode 100644 libs/hunspell/src/hunspell.cxx delete mode 100644 libs/hunspell/src/hunspelldll.c create mode 100644 libs/hunspell/src/hunzip.c++ delete mode 100644 libs/hunspell/src/hunzip.cxx create mode 100644 libs/hunspell/src/phonet.c++ delete mode 100644 libs/hunspell/src/phonet.cxx create mode 100644 libs/hunspell/src/replist.c++ delete mode 100644 libs/hunspell/src/replist.cxx create mode 100644 libs/hunspell/src/suggestmgr.c++ delete mode 100644 libs/hunspell/src/suggestmgr.cxx (limited to 'libs') diff --git a/libs/hunspell/hunspell.vcxproj b/libs/hunspell/hunspell.vcxproj index 08f1277815..5355790b1e 100644 --- a/libs/hunspell/hunspell.vcxproj +++ b/libs/hunspell/hunspell.vcxproj @@ -22,132 +22,19 @@ {b80c2c8d-b987-48b6-afe2-8da2d17f2f6a} hunspell - - false - false - DynamicLibrary - $(SolutionDir)$(Configuration)\Libs\ - $(SolutionDir)$(Configuration)64\Libs\ - - - - 10.0 - $(ProjectDir)..\..\bin10\lib - $(ProjectDir)..\..\bin12\lib - $(ProjectDir)..\..\bin14\lib - - - $(ProjectName) - Win32Proj - Unicode - v100 - v110_xp - v120_xp - v140_xp - $(SolutionDir)$(Configuration)\Obj\$(ProjectName)\ - $(SolutionDir)$(Configuration)64\Obj\$(ProjectName)\ - true - false - - - + + + - $(ProjectDir)..\..\include;$(ProjectDir)..\..\plugins\ExternalAPI;$(ProjectDir)..\..\..\boost;%(AdditionalIncludeDirectories) - BUILDING_LIBHUNSPELL;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_WARNINGS;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) - 4995;4996;4312;4091;%(DisableSpecificWarnings) - OnlyExplicitInline - false - Level4 - NotUsing - stdafx.h - false + Sync - - winmm.lib;Wtsapi32.lib;netapi32.lib;pdh.lib;shlwapi.lib;Strmiids.lib;gdiplus.lib;dbghelp.lib;Setupapi.lib;msimg32.lib;comctl32.lib;ws2_32.lib;UxTheme.lib;Iphlpapi.lib;%(AdditionalDependencies) - $(CommonLibDir) - true - false - Windows - $(IntDir)$(TargetName).lib - false - - - $(ProjectDir)..\..\include;$(ProjectDir)..\..\include\msapi - - - - Disabled - false - DEBUG;%(PreprocessorDefinitions) - true - MultiThreadedDebugDLL - - - - - - - - - Full - true - Size - true - true - NDEBUG;%(PreprocessorDefinitions) - false - MultiThreadedDLL - true - - - true - true - UseLinkTimeCodeGeneration - /PDBALTPATH:%_PDB% - - - - - StreamingSIMDExtensions2 - - - $(ProjectDir)..\..\..\boost\stage\lib;%(AdditionalLibraryDirectories) - - - - - - - $(ProjectDir)..\..\..\boost\stage64\lib;%(AdditionalLibraryDirectories) - - - - .mir - $(OutDir)$(TargetName)$(TargetExt) - - - - - - - - - - - - - - - + + BUILDING_LIBHUNSPELL;%(PreprocessorDefinitions) + NotUsing + - - - - - - \ No newline at end of file diff --git a/libs/hunspell/hunspell.vcxproj.filters b/libs/hunspell/hunspell.vcxproj.filters index a6562c7606..de5ad9f66c 100644 --- a/libs/hunspell/hunspell.vcxproj.filters +++ b/libs/hunspell/hunspell.vcxproj.filters @@ -1,607 +1,4 @@  - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - - - Header Files - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - Header Files - - - - - - - - - - - - - - - - - - - - Resource Files - - \ No newline at end of file diff --git a/libs/hunspell/src/affentry.c++ b/libs/hunspell/src/affentry.c++ new file mode 100644 index 0000000000..983fe2c1ec --- /dev/null +++ b/libs/hunspell/src/affentry.c++ @@ -0,0 +1,1107 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Hunspell, based on MySpell. + * + * The Initial Developers of the Original Code are + * Kevin Hendricks (MySpell) and Németh László (Hunspell). + * Portions created by the Initial Developers are Copyright (C) 2002-2005 + * the Initial Developers. All Rights Reserved. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "affentry.hxx" +#include "csutil.hxx" + +#define MAXTEMPWORDLEN (MAXWORDUTF8LEN + 4) + +PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp) + // register affix manager + : pmyMgr(pmgr), + next(NULL), + nexteq(NULL), + nextne(NULL), + flgnxt(NULL) { + // set up its initial values + aflag = dp->aflag; // flag + strip = dp->strip; // string to strip + appnd = dp->appnd; // string to append + numconds = dp->numconds; // length of the condition + opts = dp->opts; // cross product flag + // then copy over all of the conditions + if (opts & aeLONGCOND) { + memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1); + c.l.conds2 = dp->c.l.conds2; + } else + memcpy(c.conds, dp->c.conds, MAXCONDLEN); + morphcode = dp->morphcode; + contclass = dp->contclass; + contclasslen = dp->contclasslen; +} + +PfxEntry::~PfxEntry() { + aflag = 0; + pmyMgr = NULL; + if (opts & aeLONGCOND) + free(c.l.conds2); + if (morphcode && !(opts & aeALIASM)) + free(morphcode); + if (contclass && !(opts & aeALIASF)) + free(contclass); +} + +// add prefix to this word assuming conditions hold +char* PfxEntry::add(const char* word, int len) { + if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) && + (len >= numconds) && test_condition(word) && + (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0)) && + ((MAXTEMPWORDLEN) > (len + appnd.size() - strip.size()))) { + /* we have a match so add prefix */ + std::string tword(appnd); + tword.append(word + strip.size()); + return mystrdup(tword.c_str()); + } + return NULL; +} + +inline char* PfxEntry::nextchar(char* p) { + if (p) { + p++; + if (opts & aeLONGCOND) { + // jump to the 2nd part of the condition + if (p == c.conds + MAXCONDLEN_1) + return c.l.conds2; + // end of the MAXCONDLEN length condition + } else if (p == c.conds + MAXCONDLEN) + return NULL; + return *p ? p : NULL; + } + return NULL; +} + +inline int PfxEntry::test_condition(const char* st) { + const char* pos = NULL; // group with pos input position + bool neg = false; // complementer + bool ingroup = false; // character in the group + if (numconds == 0) + return 1; + char* p = c.conds; + while (1) { + switch (*p) { + case '\0': + return 1; + case '[': { + neg = false; + ingroup = false; + p = nextchar(p); + pos = st; + break; + } + case '^': { + p = nextchar(p); + neg = true; + break; + } + case ']': { + if ((neg && ingroup) || (!neg && !ingroup)) + return 0; + pos = NULL; + p = nextchar(p); + // skip the next character + if (!ingroup && *st) + for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++) + ; + if (*st == '\0' && p) + return 0; // word <= condition + break; + } + case '.': + if (!pos) { // dots are not metacharacters in groups: [.] + p = nextchar(p); + // skip the next character + for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++) + ; + if (*st == '\0' && p) + return 0; // word <= condition + break; + } + /* FALLTHROUGH */ + default: { + if (*st == *p) { + st++; + p = nextchar(p); + if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte + while (p && (*p & 0xc0) == 0x80) { // character + if (*p != *st) { + if (!pos) + return 0; + st = pos; + break; + } + p = nextchar(p); + st++; + } + if (pos && st != pos) { + ingroup = true; + while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { + } + } + } else if (pos) { + ingroup = true; + while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { + } + } + } else if (pos) { // group + p = nextchar(p); + } else + return 0; + } + } + if (!p) + return 1; + } +} + +// check if this prefix entry matches +struct hentry* PfxEntry::checkword(const char* word, + int len, + char in_compound, + const FLAG needflag) { + int tmpl; // length of tmpword + struct hentry* he; // hash entry of root word or NULL + char tmpword[MAXTEMPWORDLEN]; + + // on entry prefix is 0 length or already matches the beginning of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + tmpl = len - appnd.size(); + + if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) { + // generate new root word by removing prefix and adding + // back any characters that would have been stripped + + if (strip.size()) { + strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); + tmpword[MAXTEMPWORDLEN - 1] = '\0'; + } + strcpy((tmpword + strip.size()), (word + appnd.size())); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then check if resulting + // root word in the dictionary + + if (test_condition(tmpword)) { + tmpl += strip.size(); + if ((he = pmyMgr->lookup(tmpword)) != NULL) { + do { + if (TESTAFF(he->astr, aflag, he->alen) && + // forbid single prefixes with needaffix flag + !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) && + // needflag + ((!needflag) || TESTAFF(he->astr, needflag, he->alen) || + (contclass && TESTAFF(contclass, needflag, contclasslen)))) + return he; + he = he->next_homonym; // check homonyms + } while (he); + } + + // prefix matched but no root word was found + // if aeXPRODUCT is allowed, try again but now + // ross checked combined with a suffix + + // if ((opts & aeXPRODUCT) && in_compound) { + if ((opts & aeXPRODUCT)) { + he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, this, NULL, 0, + NULL, FLAG_NULL, needflag, in_compound); + if (he) + return he; + } + } + } + return NULL; +} + +// check if this prefix entry matches +struct hentry* PfxEntry::check_twosfx(const char* word, + int len, + char in_compound, + const FLAG needflag) { + int tmpl; // length of tmpword + struct hentry* he; // hash entry of root word or NULL + char tmpword[MAXTEMPWORDLEN]; + + // on entry prefix is 0 length or already matches the beginning of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + tmpl = len - appnd.size(); + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing prefix and adding + // back any characters that would have been stripped + + if (strip.size()) { + strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); + tmpword[MAXTEMPWORDLEN - 1] = '\0'; + } + strcpy((tmpword + strip.size()), (word + appnd.size())); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then check if resulting + // root word in the dictionary + + if (test_condition(tmpword)) { + tmpl += strip.size(); + + // prefix matched but no root word was found + // if aeXPRODUCT is allowed, try again but now + // cross checked combined with a suffix + + if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { + he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, this, + needflag); + if (he) + return he; + } + } + } + return NULL; +} + +// check if this prefix entry matches +char* PfxEntry::check_twosfx_morph(const char* word, + int len, + char in_compound, + const FLAG needflag) { + int tmpl; // length of tmpword + char tmpword[MAXTEMPWORDLEN]; + + // on entry prefix is 0 length or already matches the beginning of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + tmpl = len - appnd.size(); + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing prefix and adding + // back any characters that would have been stripped + + if (strip.size()) { + strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); + tmpword[MAXTEMPWORDLEN - 1] = '\0'; + } + strcpy((tmpword + strip.size()), (word + appnd.size())); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then check if resulting + // root word in the dictionary + + if (test_condition(tmpword)) { + tmpl += strip.size(); + + // prefix matched but no root word was found + // if aeXPRODUCT is allowed, try again but now + // ross checked combined with a suffix + + if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { + return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl, aeXPRODUCT, + this, needflag); + } + } + } + return NULL; +} + +// check if this prefix entry matches +char* PfxEntry::check_morph(const char* word, + int len, + char in_compound, + const FLAG needflag) { + int tmpl; // length of tmpword + struct hentry* he; // hash entry of root word or NULL + char tmpword[MAXTEMPWORDLEN]; + char result[MAXLNLEN]; + char* st; + + *result = '\0'; + + // on entry prefix is 0 length or already matches the beginning of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + tmpl = len - appnd.size(); + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing prefix and adding + // back any characters that would have been stripped + + if (strip.size()) { + strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); + tmpword[MAXTEMPWORDLEN - 1] = '\0'; + } + strcpy(tmpword + strip.size(), word + appnd.size()); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then check if resulting + // root word in the dictionary + + if (test_condition(tmpword)) { + tmpl += strip.size(); + if ((he = pmyMgr->lookup(tmpword)) != NULL) { + do { + if (TESTAFF(he->astr, aflag, he->alen) && + // forbid single prefixes with needaffix flag + !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) && + // needflag + ((!needflag) || TESTAFF(he->astr, needflag, he->alen) || + (contclass && TESTAFF(contclass, needflag, contclasslen)))) { + if (morphcode) { + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, morphcode, MAXLNLEN); + } else + mystrcat(result, getKey(), MAXLNLEN); + if (!HENTRY_FIND(he, MORPH_STEM)) { + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, MORPH_STEM, MAXLNLEN); + mystrcat(result, HENTRY_WORD(he), MAXLNLEN); + } + // store the pointer of the hash entry + if (HENTRY_DATA(he)) { + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, HENTRY_DATA2(he), MAXLNLEN); + } else { + // return with debug information + char* flag = pmyMgr->encode_flag(getFlag()); + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, MORPH_FLAG, MAXLNLEN); + mystrcat(result, flag, MAXLNLEN); + free(flag); + } + mystrcat(result, "\n", MAXLNLEN); + } + he = he->next_homonym; + } while (he); + } + + // prefix matched but no root word was found + // if aeXPRODUCT is allowed, try again but now + // ross checked combined with a suffix + + if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { + st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, this, + FLAG_NULL, needflag); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + } + } + } + + if (*result) + return mystrdup(result); + return NULL; +} + +SfxEntry::SfxEntry(AffixMgr* pmgr, affentry* dp) + : pmyMgr(pmgr) // register affix manager + , + next(NULL), + nexteq(NULL), + nextne(NULL), + flgnxt(NULL), + l_morph(NULL), + r_morph(NULL), + eq_morph(NULL) { + // set up its initial values + aflag = dp->aflag; // char flag + strip = dp->strip; // string to strip + appnd = dp->appnd; // string to append + numconds = dp->numconds; // length of the condition + opts = dp->opts; // cross product flag + + // then copy over all of the conditions + if (opts & aeLONGCOND) { + memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1); + c.l.conds2 = dp->c.l.conds2; + } else + memcpy(c.conds, dp->c.conds, MAXCONDLEN); + rappnd = myrevstrdup(appnd.c_str()); + morphcode = dp->morphcode; + contclass = dp->contclass; + contclasslen = dp->contclasslen; +} + +SfxEntry::~SfxEntry() { + aflag = 0; + if (rappnd) + free(rappnd); + pmyMgr = NULL; + if (opts & aeLONGCOND) + free(c.l.conds2); + if (morphcode && !(opts & aeALIASM)) + free(morphcode); + if (contclass && !(opts & aeALIASF)) + free(contclass); +} + +// add suffix to this word assuming conditions hold +char* SfxEntry::add(const char* word, int len) { + /* make sure all conditions match */ + if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) && + (len >= numconds) && test_condition(word + len, word) && + (!strip.size() || + (strcmp(word + len - strip.size(), strip.c_str()) == 0)) && + ((MAXTEMPWORDLEN) > (len + appnd.size() - strip.size()))) { + std::string tword(word); + /* we have a match so add suffix */ + tword.replace(len - strip.size(), std::string::npos, appnd); + return mystrdup(tword.c_str()); + } + return NULL; +} + +inline char* SfxEntry::nextchar(char* p) { + if (p) { + p++; + if (opts & aeLONGCOND) { + // jump to the 2nd part of the condition + if (p == c.l.conds1 + MAXCONDLEN_1) + return c.l.conds2; + // end of the MAXCONDLEN length condition + } else if (p == c.conds + MAXCONDLEN) + return NULL; + return *p ? p : NULL; + } + return NULL; +} + +inline int SfxEntry::test_condition(const char* st, const char* beg) { + const char* pos = NULL; // group with pos input position + bool neg = false; // complementer + bool ingroup = false; // character in the group + if (numconds == 0) + return 1; + char* p = c.conds; + st--; + int i = 1; + while (1) { + switch (*p) { + case '\0': + return 1; + case '[': + p = nextchar(p); + pos = st; + break; + case '^': + p = nextchar(p); + neg = true; + break; + case ']': + if (!neg && !ingroup) + return 0; + i++; + // skip the next character + if (!ingroup) { + for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--) + ; + st--; + } + pos = NULL; + neg = false; + ingroup = false; + p = nextchar(p); + if (st < beg && p) + return 0; // word <= condition + break; + case '.': + if (!pos) { + // dots are not metacharacters in groups: [.] + p = nextchar(p); + // skip the next character + for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; + st--) + ; + if (st < beg) { // word <= condition + if (p) + return 0; + else + return 1; + } + if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character + st--; + if (st < beg) { // word <= condition + if (p) + return 0; + else + return 1; + } + } + break; + } + /* FALLTHROUGH */ + default: { + if (*st == *p) { + p = nextchar(p); + if ((opts & aeUTF8) && (*st & 0x80)) { + st--; + while (p && (st >= beg)) { + if (*p != *st) { + if (!pos) + return 0; + st = pos; + break; + } + // first byte of the UTF-8 multibyte character + if ((*p & 0xc0) != 0x80) + break; + p = nextchar(p); + st--; + } + if (pos && st != pos) { + if (neg) + return 0; + else if (i == numconds) + return 1; + ingroup = true; + while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { + } + st--; + } + if (p && *p != ']') + p = nextchar(p); + } else if (pos) { + if (neg) + return 0; + else if (i == numconds) + return 1; + ingroup = true; + while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { + } + // if (p && *p != ']') p = nextchar(p); + st--; + } + if (!pos) { + i++; + st--; + } + if (st < beg && p && *p != ']') + return 0; // word <= condition + } else if (pos) { // group + p = nextchar(p); + } else + return 0; + } + } + if (!p) + return 1; + } +} + +// see if this suffix is present in the word +struct hentry* SfxEntry::checkword(const char* word, + int len, + int optflags, + PfxEntry* ppfx, + char** wlst, + int maxSug, + int* ns, + const FLAG cclass, + const FLAG needflag, + const FLAG badflag) { + int tmpl; // length of tmpword + struct hentry* he; // hash entry pointer + unsigned char* cp; + char tmpword[MAXTEMPWORDLEN]; + PfxEntry* ep = ppfx; + + // if this suffix is being cross checked with a prefix + // but it does not support cross products skip it + + if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0)) + return NULL; + + // upon entry suffix is 0 length or already matches the end of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + tmpl = len - appnd.size(); + // the second condition is not enough for UTF-8 strings + // it checked in test_condition() + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing suffix and adding + // back any characters that would have been stripped or + // or null terminating the shorter string + + strncpy(tmpword, word, MAXTEMPWORDLEN - 1); + tmpword[MAXTEMPWORDLEN - 1] = '\0'; + cp = (unsigned char*)(tmpword + tmpl); + if (strip.size()) { + strcpy((char*)cp, strip.c_str()); + tmpl += strip.size(); + cp = (unsigned char*)(tmpword + tmpl); + } else + *cp = '\0'; + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then check if resulting + // root word in the dictionary + + if (test_condition((char*)cp, (char*)tmpword)) { +#ifdef SZOSZABLYA_POSSIBLE_ROOTS + fprintf(stdout, "%s %s %c\n", word, tmpword, aflag); +#endif + if ((he = pmyMgr->lookup(tmpword)) != NULL) { + do { + // check conditional suffix (enabled by prefix) + if ((TESTAFF(he->astr, aflag, he->alen) || + (ep && ep->getCont() && + TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && + (((optflags & aeXPRODUCT) == 0) || + (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) || + // enabled by prefix + ((contclass) && + (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))) && + // handle cont. class + ((!cclass) || + ((contclass) && TESTAFF(contclass, cclass, contclasslen))) && + // check only in compound homonyms (bad flags) + (!badflag || !TESTAFF(he->astr, badflag, he->alen)) && + // handle required flag + ((!needflag) || + (TESTAFF(he->astr, needflag, he->alen) || + ((contclass) && TESTAFF(contclass, needflag, contclasslen))))) + return he; + he = he->next_homonym; // check homonyms + } while (he); + + // obsolote stemming code (used only by the + // experimental SuffixMgr:suggest_pos_stems) + // store resulting root in wlst + } else if (wlst && (*ns < maxSug)) { + int cwrd = 1; + for (int k = 0; k < *ns; k++) + if (strcmp(tmpword, wlst[k]) == 0) { + cwrd = 0; + break; + } + if (cwrd) { + wlst[*ns] = mystrdup(tmpword); + if (wlst[*ns] == NULL) { + for (int j = 0; j < *ns; j++) + free(wlst[j]); + *ns = -1; + return NULL; + } + (*ns)++; + } + } + } + } + return NULL; +} + +// see if two-level suffix is present in the word +struct hentry* SfxEntry::check_twosfx(const char* word, + int len, + int optflags, + PfxEntry* ppfx, + const FLAG needflag) { + int tmpl; // length of tmpword + struct hentry* he; // hash entry pointer + unsigned char* cp; + char tmpword[MAXTEMPWORDLEN]; + PfxEntry* ep = ppfx; + + // if this suffix is being cross checked with a prefix + // but it does not support cross products skip it + + if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0) + return NULL; + + // upon entry suffix is 0 length or already matches the end of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + tmpl = len - appnd.size(); + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing suffix and adding + // back any characters that would have been stripped or + // or null terminating the shorter string + + strncpy(tmpword, word, MAXTEMPWORDLEN - 1); + tmpword[MAXTEMPWORDLEN - 1] = '\0'; + cp = (unsigned char*)(tmpword + tmpl); + if (strip.size()) { + strcpy((char*)cp, strip.c_str()); + tmpl += strip.size(); + cp = (unsigned char*)(tmpword + tmpl); + } else + *cp = '\0'; + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then recall suffix_check + + if (test_condition((char*)cp, (char*)tmpword)) { + if (ppfx) { + // handle conditional suffix + if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) + he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, + (FLAG)aflag, needflag); + else + he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0, + NULL, (FLAG)aflag, needflag); + } else { + he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, + (FLAG)aflag, needflag); + } + if (he) + return he; + } + } + return NULL; +} + +// see if two-level suffix is present in the word +char* SfxEntry::check_twosfx_morph(const char* word, + int len, + int optflags, + PfxEntry* ppfx, + const FLAG needflag) { + int tmpl; // length of tmpword + unsigned char* cp; + char tmpword[MAXTEMPWORDLEN]; + PfxEntry* ep = ppfx; + char* st; + + char result[MAXLNLEN]; + + *result = '\0'; + + // if this suffix is being cross checked with a prefix + // but it does not support cross products skip it + + if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0) + return NULL; + + // upon entry suffix is 0 length or already matches the end of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + tmpl = len - appnd.size(); + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing suffix and adding + // back any characters that would have been stripped or + // or null terminating the shorter string + + strncpy(tmpword, word, MAXTEMPWORDLEN - 1); + tmpword[MAXTEMPWORDLEN - 1] = '\0'; + cp = (unsigned char*)(tmpword + tmpl); + if (strip.size()) { + strcpy((char*)cp, strip.c_str()); + tmpl += strip.size(); + cp = (unsigned char*)(tmpword + tmpl); + } else + *cp = '\0'; + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then recall suffix_check + + if (test_condition((char*)cp, (char*)tmpword)) { + if (ppfx) { + // handle conditional suffix + if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) { + st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, + needflag); + if (st) { + if (ppfx->getMorph()) { + mystrcat(result, ppfx->getMorph(), MAXLNLEN); + mystrcat(result, " ", MAXLNLEN); + } + mystrcat(result, st, MAXLNLEN); + free(st); + mychomp(result); + } + } else { + st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, + needflag); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + mychomp(result); + } + } + } else { + st = + pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + mychomp(result); + } + } + if (*result) + return mystrdup(result); + } + } + return NULL; +} + +// get next homonym with same affix +struct hentry* SfxEntry::get_next_homonym(struct hentry* he, + int optflags, + PfxEntry* ppfx, + const FLAG cclass, + const FLAG needflag) { + PfxEntry* ep = ppfx; + FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL; + + while (he->next_homonym) { + he = he->next_homonym; + if ((TESTAFF(he->astr, aflag, he->alen) || + (ep && ep->getCont() && + TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && + ((optflags & aeXPRODUCT) == 0 || TESTAFF(he->astr, eFlag, he->alen) || + // handle conditional suffix + ((contclass) && TESTAFF(contclass, eFlag, contclasslen))) && + // handle cont. class + ((!cclass) || + ((contclass) && TESTAFF(contclass, cclass, contclasslen))) && + // handle required flag + ((!needflag) || + (TESTAFF(he->astr, needflag, he->alen) || + ((contclass) && TESTAFF(contclass, needflag, contclasslen))))) + return he; + } + return NULL; +} + +#if 0 + +Appendix: Understanding Affix Code + + +An affix is either a prefix or a suffix attached to root words to make +other words. + +Basically a Prefix or a Suffix is set of AffEntry objects +which store information about the prefix or suffix along +with supporting routines to check if a word has a particular +prefix or suffix or a combination. + +The structure affentry is defined as follows: + +struct affentry +{ + unsigned short aflag; // ID used to represent the affix + std::string strip; // string to strip before adding affix + std::string appnd; // the affix string to add + char numconds; // the number of conditions that must be met + char opts; // flag: aeXPRODUCT- combine both prefix and suffix + char conds[SETSIZE]; // array which encodes the conditions to be met +}; + + +Here is a suffix borrowed from the en_US.aff file. This file +is whitespace delimited. + +SFX D Y 4 +SFX D 0 e d +SFX D y ied [^aeiou]y +SFX D 0 ed [^ey] +SFX D 0 ed [aeiou]y + +This information can be interpreted as follows: + +In the first line has 4 fields + +Field +----- +1 SFX - indicates this is a suffix +2 D - is the name of the character flag which represents this suffix +3 Y - indicates it can be combined with prefixes (cross product) +4 4 - indicates that sequence of 4 affentry structures are needed to + properly store the affix information + +The remaining lines describe the unique information for the 4 SfxEntry +objects that make up this affix. Each line can be interpreted +as follows: (note fields 1 and 2 are as a check against line 1 info) + +Field +----- +1 SFX - indicates this is a suffix +2 D - is the name of the character flag for this affix +3 y - the string of chars to strip off before adding affix + (a 0 here indicates the NULL string) +4 ied - the string of affix characters to add +5 [^aeiou]y - the conditions which must be met before the affix + can be applied + +Field 5 is interesting. Since this is a suffix, field 5 tells us that +there are 2 conditions that must be met. The first condition is that +the next to the last character in the word must *NOT* be any of the +following "a", "e", "i", "o" or "u". The second condition is that +the last character of the word must end in "y". + +So how can we encode this information concisely and be able to +test for both conditions in a fast manner? The answer is found +but studying the wonderful ispell code of Geoff Kuenning, et.al. +(now available under a normal BSD license). + +If we set up a conds array of 256 bytes indexed (0 to 255) and access it +using a character (cast to an unsigned char) of a string, we have 8 bits +of information we can store about that character. Specifically we +could use each bit to say if that character is allowed in any of the +last (or first for prefixes) 8 characters of the word. + +Basically, each character at one end of the word (up to the number +of conditions) is used to index into the conds array and the resulting +value found there says whether the that character is valid for a +specific character position in the word. + +For prefixes, it does this by setting bit 0 if that char is valid +in the first position, bit 1 if valid in the second position, and so on. + +If a bit is not set, then that char is not valid for that postion in the +word. + +If working with suffixes bit 0 is used for the character closest +to the front, bit 1 for the next character towards the end, ..., +with bit numconds-1 representing the last char at the end of the string. + +Note: since entries in the conds[] are 8 bits, only 8 conditions +(read that only 8 character positions) can be examined at one +end of a word (the beginning for prefixes and the end for suffixes. + +So to make this clearer, lets encode the conds array values for the +first two affentries for the suffix D described earlier. + + + For the first affentry: + numconds = 1 (only examine the last character) + + conds['e'] = (1 << 0) (the word must end in an E) + all others are all 0 + + For the second affentry: + numconds = 2 (only examine the last two characters) + + conds[X] = conds[X] | (1 << 0) (aeiou are not allowed) + where X is all characters *but* a, e, i, o, or u + + + conds['y'] = (1 << 1) (the last char must be a y) + all other bits for all other entries in the conds array are zero + +#endif diff --git a/libs/hunspell/src/affentry.cxx b/libs/hunspell/src/affentry.cxx deleted file mode 100644 index 983fe2c1ec..0000000000 --- a/libs/hunspell/src/affentry.cxx +++ /dev/null @@ -1,1107 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -/* - * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada - * And Contributors. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. All modifications to the source code must be clearly marked as - * such. Binary redistributions based on modified source code - * must be clearly marked as modified versions in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include - -#include "affentry.hxx" -#include "csutil.hxx" - -#define MAXTEMPWORDLEN (MAXWORDUTF8LEN + 4) - -PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp) - // register affix manager - : pmyMgr(pmgr), - next(NULL), - nexteq(NULL), - nextne(NULL), - flgnxt(NULL) { - // set up its initial values - aflag = dp->aflag; // flag - strip = dp->strip; // string to strip - appnd = dp->appnd; // string to append - numconds = dp->numconds; // length of the condition - opts = dp->opts; // cross product flag - // then copy over all of the conditions - if (opts & aeLONGCOND) { - memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1); - c.l.conds2 = dp->c.l.conds2; - } else - memcpy(c.conds, dp->c.conds, MAXCONDLEN); - morphcode = dp->morphcode; - contclass = dp->contclass; - contclasslen = dp->contclasslen; -} - -PfxEntry::~PfxEntry() { - aflag = 0; - pmyMgr = NULL; - if (opts & aeLONGCOND) - free(c.l.conds2); - if (morphcode && !(opts & aeALIASM)) - free(morphcode); - if (contclass && !(opts & aeALIASF)) - free(contclass); -} - -// add prefix to this word assuming conditions hold -char* PfxEntry::add(const char* word, int len) { - if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) && - (len >= numconds) && test_condition(word) && - (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0)) && - ((MAXTEMPWORDLEN) > (len + appnd.size() - strip.size()))) { - /* we have a match so add prefix */ - std::string tword(appnd); - tword.append(word + strip.size()); - return mystrdup(tword.c_str()); - } - return NULL; -} - -inline char* PfxEntry::nextchar(char* p) { - if (p) { - p++; - if (opts & aeLONGCOND) { - // jump to the 2nd part of the condition - if (p == c.conds + MAXCONDLEN_1) - return c.l.conds2; - // end of the MAXCONDLEN length condition - } else if (p == c.conds + MAXCONDLEN) - return NULL; - return *p ? p : NULL; - } - return NULL; -} - -inline int PfxEntry::test_condition(const char* st) { - const char* pos = NULL; // group with pos input position - bool neg = false; // complementer - bool ingroup = false; // character in the group - if (numconds == 0) - return 1; - char* p = c.conds; - while (1) { - switch (*p) { - case '\0': - return 1; - case '[': { - neg = false; - ingroup = false; - p = nextchar(p); - pos = st; - break; - } - case '^': { - p = nextchar(p); - neg = true; - break; - } - case ']': { - if ((neg && ingroup) || (!neg && !ingroup)) - return 0; - pos = NULL; - p = nextchar(p); - // skip the next character - if (!ingroup && *st) - for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++) - ; - if (*st == '\0' && p) - return 0; // word <= condition - break; - } - case '.': - if (!pos) { // dots are not metacharacters in groups: [.] - p = nextchar(p); - // skip the next character - for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++) - ; - if (*st == '\0' && p) - return 0; // word <= condition - break; - } - /* FALLTHROUGH */ - default: { - if (*st == *p) { - st++; - p = nextchar(p); - if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte - while (p && (*p & 0xc0) == 0x80) { // character - if (*p != *st) { - if (!pos) - return 0; - st = pos; - break; - } - p = nextchar(p); - st++; - } - if (pos && st != pos) { - ingroup = true; - while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { - } - } - } else if (pos) { - ingroup = true; - while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { - } - } - } else if (pos) { // group - p = nextchar(p); - } else - return 0; - } - } - if (!p) - return 1; - } -} - -// check if this prefix entry matches -struct hentry* PfxEntry::checkword(const char* word, - int len, - char in_compound, - const FLAG needflag) { - int tmpl; // length of tmpword - struct hentry* he; // hash entry of root word or NULL - char tmpword[MAXTEMPWORDLEN]; - - // on entry prefix is 0 length or already matches the beginning of the word. - // So if the remaining root word has positive length - // and if there are enough chars in root word and added back strip chars - // to meet the number of characters conditions, then test it - - tmpl = len - appnd.size(); - - if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) { - // generate new root word by removing prefix and adding - // back any characters that would have been stripped - - if (strip.size()) { - strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); - tmpword[MAXTEMPWORDLEN - 1] = '\0'; - } - strcpy((tmpword + strip.size()), (word + appnd.size())); - - // now make sure all of the conditions on characters - // are met. Please see the appendix at the end of - // this file for more info on exactly what is being - // tested - - // if all conditions are met then check if resulting - // root word in the dictionary - - if (test_condition(tmpword)) { - tmpl += strip.size(); - if ((he = pmyMgr->lookup(tmpword)) != NULL) { - do { - if (TESTAFF(he->astr, aflag, he->alen) && - // forbid single prefixes with needaffix flag - !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) && - // needflag - ((!needflag) || TESTAFF(he->astr, needflag, he->alen) || - (contclass && TESTAFF(contclass, needflag, contclasslen)))) - return he; - he = he->next_homonym; // check homonyms - } while (he); - } - - // prefix matched but no root word was found - // if aeXPRODUCT is allowed, try again but now - // ross checked combined with a suffix - - // if ((opts & aeXPRODUCT) && in_compound) { - if ((opts & aeXPRODUCT)) { - he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, this, NULL, 0, - NULL, FLAG_NULL, needflag, in_compound); - if (he) - return he; - } - } - } - return NULL; -} - -// check if this prefix entry matches -struct hentry* PfxEntry::check_twosfx(const char* word, - int len, - char in_compound, - const FLAG needflag) { - int tmpl; // length of tmpword - struct hentry* he; // hash entry of root word or NULL - char tmpword[MAXTEMPWORDLEN]; - - // on entry prefix is 0 length or already matches the beginning of the word. - // So if the remaining root word has positive length - // and if there are enough chars in root word and added back strip chars - // to meet the number of characters conditions, then test it - - tmpl = len - appnd.size(); - - if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && - (tmpl + strip.size() >= numconds)) { - // generate new root word by removing prefix and adding - // back any characters that would have been stripped - - if (strip.size()) { - strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); - tmpword[MAXTEMPWORDLEN - 1] = '\0'; - } - strcpy((tmpword + strip.size()), (word + appnd.size())); - - // now make sure all of the conditions on characters - // are met. Please see the appendix at the end of - // this file for more info on exactly what is being - // tested - - // if all conditions are met then check if resulting - // root word in the dictionary - - if (test_condition(tmpword)) { - tmpl += strip.size(); - - // prefix matched but no root word was found - // if aeXPRODUCT is allowed, try again but now - // cross checked combined with a suffix - - if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { - he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, this, - needflag); - if (he) - return he; - } - } - } - return NULL; -} - -// check if this prefix entry matches -char* PfxEntry::check_twosfx_morph(const char* word, - int len, - char in_compound, - const FLAG needflag) { - int tmpl; // length of tmpword - char tmpword[MAXTEMPWORDLEN]; - - // on entry prefix is 0 length or already matches the beginning of the word. - // So if the remaining root word has positive length - // and if there are enough chars in root word and added back strip chars - // to meet the number of characters conditions, then test it - - tmpl = len - appnd.size(); - - if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && - (tmpl + strip.size() >= numconds)) { - // generate new root word by removing prefix and adding - // back any characters that would have been stripped - - if (strip.size()) { - strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); - tmpword[MAXTEMPWORDLEN - 1] = '\0'; - } - strcpy((tmpword + strip.size()), (word + appnd.size())); - - // now make sure all of the conditions on characters - // are met. Please see the appendix at the end of - // this file for more info on exactly what is being - // tested - - // if all conditions are met then check if resulting - // root word in the dictionary - - if (test_condition(tmpword)) { - tmpl += strip.size(); - - // prefix matched but no root word was found - // if aeXPRODUCT is allowed, try again but now - // ross checked combined with a suffix - - if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { - return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl, aeXPRODUCT, - this, needflag); - } - } - } - return NULL; -} - -// check if this prefix entry matches -char* PfxEntry::check_morph(const char* word, - int len, - char in_compound, - const FLAG needflag) { - int tmpl; // length of tmpword - struct hentry* he; // hash entry of root word or NULL - char tmpword[MAXTEMPWORDLEN]; - char result[MAXLNLEN]; - char* st; - - *result = '\0'; - - // on entry prefix is 0 length or already matches the beginning of the word. - // So if the remaining root word has positive length - // and if there are enough chars in root word and added back strip chars - // to meet the number of characters conditions, then test it - - tmpl = len - appnd.size(); - - if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && - (tmpl + strip.size() >= numconds)) { - // generate new root word by removing prefix and adding - // back any characters that would have been stripped - - if (strip.size()) { - strncpy(tmpword, strip.c_str(), MAXTEMPWORDLEN - 1); - tmpword[MAXTEMPWORDLEN - 1] = '\0'; - } - strcpy(tmpword + strip.size(), word + appnd.size()); - - // now make sure all of the conditions on characters - // are met. Please see the appendix at the end of - // this file for more info on exactly what is being - // tested - - // if all conditions are met then check if resulting - // root word in the dictionary - - if (test_condition(tmpword)) { - tmpl += strip.size(); - if ((he = pmyMgr->lookup(tmpword)) != NULL) { - do { - if (TESTAFF(he->astr, aflag, he->alen) && - // forbid single prefixes with needaffix flag - !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) && - // needflag - ((!needflag) || TESTAFF(he->astr, needflag, he->alen) || - (contclass && TESTAFF(contclass, needflag, contclasslen)))) { - if (morphcode) { - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, morphcode, MAXLNLEN); - } else - mystrcat(result, getKey(), MAXLNLEN); - if (!HENTRY_FIND(he, MORPH_STEM)) { - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, MORPH_STEM, MAXLNLEN); - mystrcat(result, HENTRY_WORD(he), MAXLNLEN); - } - // store the pointer of the hash entry - if (HENTRY_DATA(he)) { - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, HENTRY_DATA2(he), MAXLNLEN); - } else { - // return with debug information - char* flag = pmyMgr->encode_flag(getFlag()); - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, MORPH_FLAG, MAXLNLEN); - mystrcat(result, flag, MAXLNLEN); - free(flag); - } - mystrcat(result, "\n", MAXLNLEN); - } - he = he->next_homonym; - } while (he); - } - - // prefix matched but no root word was found - // if aeXPRODUCT is allowed, try again but now - // ross checked combined with a suffix - - if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { - st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, this, - FLAG_NULL, needflag); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - } - } - } - - if (*result) - return mystrdup(result); - return NULL; -} - -SfxEntry::SfxEntry(AffixMgr* pmgr, affentry* dp) - : pmyMgr(pmgr) // register affix manager - , - next(NULL), - nexteq(NULL), - nextne(NULL), - flgnxt(NULL), - l_morph(NULL), - r_morph(NULL), - eq_morph(NULL) { - // set up its initial values - aflag = dp->aflag; // char flag - strip = dp->strip; // string to strip - appnd = dp->appnd; // string to append - numconds = dp->numconds; // length of the condition - opts = dp->opts; // cross product flag - - // then copy over all of the conditions - if (opts & aeLONGCOND) { - memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1); - c.l.conds2 = dp->c.l.conds2; - } else - memcpy(c.conds, dp->c.conds, MAXCONDLEN); - rappnd = myrevstrdup(appnd.c_str()); - morphcode = dp->morphcode; - contclass = dp->contclass; - contclasslen = dp->contclasslen; -} - -SfxEntry::~SfxEntry() { - aflag = 0; - if (rappnd) - free(rappnd); - pmyMgr = NULL; - if (opts & aeLONGCOND) - free(c.l.conds2); - if (morphcode && !(opts & aeALIASM)) - free(morphcode); - if (contclass && !(opts & aeALIASF)) - free(contclass); -} - -// add suffix to this word assuming conditions hold -char* SfxEntry::add(const char* word, int len) { - /* make sure all conditions match */ - if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) && - (len >= numconds) && test_condition(word + len, word) && - (!strip.size() || - (strcmp(word + len - strip.size(), strip.c_str()) == 0)) && - ((MAXTEMPWORDLEN) > (len + appnd.size() - strip.size()))) { - std::string tword(word); - /* we have a match so add suffix */ - tword.replace(len - strip.size(), std::string::npos, appnd); - return mystrdup(tword.c_str()); - } - return NULL; -} - -inline char* SfxEntry::nextchar(char* p) { - if (p) { - p++; - if (opts & aeLONGCOND) { - // jump to the 2nd part of the condition - if (p == c.l.conds1 + MAXCONDLEN_1) - return c.l.conds2; - // end of the MAXCONDLEN length condition - } else if (p == c.conds + MAXCONDLEN) - return NULL; - return *p ? p : NULL; - } - return NULL; -} - -inline int SfxEntry::test_condition(const char* st, const char* beg) { - const char* pos = NULL; // group with pos input position - bool neg = false; // complementer - bool ingroup = false; // character in the group - if (numconds == 0) - return 1; - char* p = c.conds; - st--; - int i = 1; - while (1) { - switch (*p) { - case '\0': - return 1; - case '[': - p = nextchar(p); - pos = st; - break; - case '^': - p = nextchar(p); - neg = true; - break; - case ']': - if (!neg && !ingroup) - return 0; - i++; - // skip the next character - if (!ingroup) { - for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--) - ; - st--; - } - pos = NULL; - neg = false; - ingroup = false; - p = nextchar(p); - if (st < beg && p) - return 0; // word <= condition - break; - case '.': - if (!pos) { - // dots are not metacharacters in groups: [.] - p = nextchar(p); - // skip the next character - for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; - st--) - ; - if (st < beg) { // word <= condition - if (p) - return 0; - else - return 1; - } - if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character - st--; - if (st < beg) { // word <= condition - if (p) - return 0; - else - return 1; - } - } - break; - } - /* FALLTHROUGH */ - default: { - if (*st == *p) { - p = nextchar(p); - if ((opts & aeUTF8) && (*st & 0x80)) { - st--; - while (p && (st >= beg)) { - if (*p != *st) { - if (!pos) - return 0; - st = pos; - break; - } - // first byte of the UTF-8 multibyte character - if ((*p & 0xc0) != 0x80) - break; - p = nextchar(p); - st--; - } - if (pos && st != pos) { - if (neg) - return 0; - else if (i == numconds) - return 1; - ingroup = true; - while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { - } - st--; - } - if (p && *p != ']') - p = nextchar(p); - } else if (pos) { - if (neg) - return 0; - else if (i == numconds) - return 1; - ingroup = true; - while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { - } - // if (p && *p != ']') p = nextchar(p); - st--; - } - if (!pos) { - i++; - st--; - } - if (st < beg && p && *p != ']') - return 0; // word <= condition - } else if (pos) { // group - p = nextchar(p); - } else - return 0; - } - } - if (!p) - return 1; - } -} - -// see if this suffix is present in the word -struct hentry* SfxEntry::checkword(const char* word, - int len, - int optflags, - PfxEntry* ppfx, - char** wlst, - int maxSug, - int* ns, - const FLAG cclass, - const FLAG needflag, - const FLAG badflag) { - int tmpl; // length of tmpword - struct hentry* he; // hash entry pointer - unsigned char* cp; - char tmpword[MAXTEMPWORDLEN]; - PfxEntry* ep = ppfx; - - // if this suffix is being cross checked with a prefix - // but it does not support cross products skip it - - if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0)) - return NULL; - - // upon entry suffix is 0 length or already matches the end of the word. - // So if the remaining root word has positive length - // and if there are enough chars in root word and added back strip chars - // to meet the number of characters conditions, then test it - - tmpl = len - appnd.size(); - // the second condition is not enough for UTF-8 strings - // it checked in test_condition() - - if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && - (tmpl + strip.size() >= numconds)) { - // generate new root word by removing suffix and adding - // back any characters that would have been stripped or - // or null terminating the shorter string - - strncpy(tmpword, word, MAXTEMPWORDLEN - 1); - tmpword[MAXTEMPWORDLEN - 1] = '\0'; - cp = (unsigned char*)(tmpword + tmpl); - if (strip.size()) { - strcpy((char*)cp, strip.c_str()); - tmpl += strip.size(); - cp = (unsigned char*)(tmpword + tmpl); - } else - *cp = '\0'; - - // now make sure all of the conditions on characters - // are met. Please see the appendix at the end of - // this file for more info on exactly what is being - // tested - - // if all conditions are met then check if resulting - // root word in the dictionary - - if (test_condition((char*)cp, (char*)tmpword)) { -#ifdef SZOSZABLYA_POSSIBLE_ROOTS - fprintf(stdout, "%s %s %c\n", word, tmpword, aflag); -#endif - if ((he = pmyMgr->lookup(tmpword)) != NULL) { - do { - // check conditional suffix (enabled by prefix) - if ((TESTAFF(he->astr, aflag, he->alen) || - (ep && ep->getCont() && - TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && - (((optflags & aeXPRODUCT) == 0) || - (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) || - // enabled by prefix - ((contclass) && - (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))) && - // handle cont. class - ((!cclass) || - ((contclass) && TESTAFF(contclass, cclass, contclasslen))) && - // check only in compound homonyms (bad flags) - (!badflag || !TESTAFF(he->astr, badflag, he->alen)) && - // handle required flag - ((!needflag) || - (TESTAFF(he->astr, needflag, he->alen) || - ((contclass) && TESTAFF(contclass, needflag, contclasslen))))) - return he; - he = he->next_homonym; // check homonyms - } while (he); - - // obsolote stemming code (used only by the - // experimental SuffixMgr:suggest_pos_stems) - // store resulting root in wlst - } else if (wlst && (*ns < maxSug)) { - int cwrd = 1; - for (int k = 0; k < *ns; k++) - if (strcmp(tmpword, wlst[k]) == 0) { - cwrd = 0; - break; - } - if (cwrd) { - wlst[*ns] = mystrdup(tmpword); - if (wlst[*ns] == NULL) { - for (int j = 0; j < *ns; j++) - free(wlst[j]); - *ns = -1; - return NULL; - } - (*ns)++; - } - } - } - } - return NULL; -} - -// see if two-level suffix is present in the word -struct hentry* SfxEntry::check_twosfx(const char* word, - int len, - int optflags, - PfxEntry* ppfx, - const FLAG needflag) { - int tmpl; // length of tmpword - struct hentry* he; // hash entry pointer - unsigned char* cp; - char tmpword[MAXTEMPWORDLEN]; - PfxEntry* ep = ppfx; - - // if this suffix is being cross checked with a prefix - // but it does not support cross products skip it - - if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0) - return NULL; - - // upon entry suffix is 0 length or already matches the end of the word. - // So if the remaining root word has positive length - // and if there are enough chars in root word and added back strip chars - // to meet the number of characters conditions, then test it - - tmpl = len - appnd.size(); - - if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && - (tmpl + strip.size() >= numconds)) { - // generate new root word by removing suffix and adding - // back any characters that would have been stripped or - // or null terminating the shorter string - - strncpy(tmpword, word, MAXTEMPWORDLEN - 1); - tmpword[MAXTEMPWORDLEN - 1] = '\0'; - cp = (unsigned char*)(tmpword + tmpl); - if (strip.size()) { - strcpy((char*)cp, strip.c_str()); - tmpl += strip.size(); - cp = (unsigned char*)(tmpword + tmpl); - } else - *cp = '\0'; - - // now make sure all of the conditions on characters - // are met. Please see the appendix at the end of - // this file for more info on exactly what is being - // tested - - // if all conditions are met then recall suffix_check - - if (test_condition((char*)cp, (char*)tmpword)) { - if (ppfx) { - // handle conditional suffix - if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) - he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, - (FLAG)aflag, needflag); - else - he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0, - NULL, (FLAG)aflag, needflag); - } else { - he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, - (FLAG)aflag, needflag); - } - if (he) - return he; - } - } - return NULL; -} - -// see if two-level suffix is present in the word -char* SfxEntry::check_twosfx_morph(const char* word, - int len, - int optflags, - PfxEntry* ppfx, - const FLAG needflag) { - int tmpl; // length of tmpword - unsigned char* cp; - char tmpword[MAXTEMPWORDLEN]; - PfxEntry* ep = ppfx; - char* st; - - char result[MAXLNLEN]; - - *result = '\0'; - - // if this suffix is being cross checked with a prefix - // but it does not support cross products skip it - - if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0) - return NULL; - - // upon entry suffix is 0 length or already matches the end of the word. - // So if the remaining root word has positive length - // and if there are enough chars in root word and added back strip chars - // to meet the number of characters conditions, then test it - - tmpl = len - appnd.size(); - - if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && - (tmpl + strip.size() >= numconds)) { - // generate new root word by removing suffix and adding - // back any characters that would have been stripped or - // or null terminating the shorter string - - strncpy(tmpword, word, MAXTEMPWORDLEN - 1); - tmpword[MAXTEMPWORDLEN - 1] = '\0'; - cp = (unsigned char*)(tmpword + tmpl); - if (strip.size()) { - strcpy((char*)cp, strip.c_str()); - tmpl += strip.size(); - cp = (unsigned char*)(tmpword + tmpl); - } else - *cp = '\0'; - - // now make sure all of the conditions on characters - // are met. Please see the appendix at the end of - // this file for more info on exactly what is being - // tested - - // if all conditions are met then recall suffix_check - - if (test_condition((char*)cp, (char*)tmpword)) { - if (ppfx) { - // handle conditional suffix - if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) { - st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, - needflag); - if (st) { - if (ppfx->getMorph()) { - mystrcat(result, ppfx->getMorph(), MAXLNLEN); - mystrcat(result, " ", MAXLNLEN); - } - mystrcat(result, st, MAXLNLEN); - free(st); - mychomp(result); - } - } else { - st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, - needflag); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - mychomp(result); - } - } - } else { - st = - pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - mychomp(result); - } - } - if (*result) - return mystrdup(result); - } - } - return NULL; -} - -// get next homonym with same affix -struct hentry* SfxEntry::get_next_homonym(struct hentry* he, - int optflags, - PfxEntry* ppfx, - const FLAG cclass, - const FLAG needflag) { - PfxEntry* ep = ppfx; - FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL; - - while (he->next_homonym) { - he = he->next_homonym; - if ((TESTAFF(he->astr, aflag, he->alen) || - (ep && ep->getCont() && - TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && - ((optflags & aeXPRODUCT) == 0 || TESTAFF(he->astr, eFlag, he->alen) || - // handle conditional suffix - ((contclass) && TESTAFF(contclass, eFlag, contclasslen))) && - // handle cont. class - ((!cclass) || - ((contclass) && TESTAFF(contclass, cclass, contclasslen))) && - // handle required flag - ((!needflag) || - (TESTAFF(he->astr, needflag, he->alen) || - ((contclass) && TESTAFF(contclass, needflag, contclasslen))))) - return he; - } - return NULL; -} - -#if 0 - -Appendix: Understanding Affix Code - - -An affix is either a prefix or a suffix attached to root words to make -other words. - -Basically a Prefix or a Suffix is set of AffEntry objects -which store information about the prefix or suffix along -with supporting routines to check if a word has a particular -prefix or suffix or a combination. - -The structure affentry is defined as follows: - -struct affentry -{ - unsigned short aflag; // ID used to represent the affix - std::string strip; // string to strip before adding affix - std::string appnd; // the affix string to add - char numconds; // the number of conditions that must be met - char opts; // flag: aeXPRODUCT- combine both prefix and suffix - char conds[SETSIZE]; // array which encodes the conditions to be met -}; - - -Here is a suffix borrowed from the en_US.aff file. This file -is whitespace delimited. - -SFX D Y 4 -SFX D 0 e d -SFX D y ied [^aeiou]y -SFX D 0 ed [^ey] -SFX D 0 ed [aeiou]y - -This information can be interpreted as follows: - -In the first line has 4 fields - -Field ------ -1 SFX - indicates this is a suffix -2 D - is the name of the character flag which represents this suffix -3 Y - indicates it can be combined with prefixes (cross product) -4 4 - indicates that sequence of 4 affentry structures are needed to - properly store the affix information - -The remaining lines describe the unique information for the 4 SfxEntry -objects that make up this affix. Each line can be interpreted -as follows: (note fields 1 and 2 are as a check against line 1 info) - -Field ------ -1 SFX - indicates this is a suffix -2 D - is the name of the character flag for this affix -3 y - the string of chars to strip off before adding affix - (a 0 here indicates the NULL string) -4 ied - the string of affix characters to add -5 [^aeiou]y - the conditions which must be met before the affix - can be applied - -Field 5 is interesting. Since this is a suffix, field 5 tells us that -there are 2 conditions that must be met. The first condition is that -the next to the last character in the word must *NOT* be any of the -following "a", "e", "i", "o" or "u". The second condition is that -the last character of the word must end in "y". - -So how can we encode this information concisely and be able to -test for both conditions in a fast manner? The answer is found -but studying the wonderful ispell code of Geoff Kuenning, et.al. -(now available under a normal BSD license). - -If we set up a conds array of 256 bytes indexed (0 to 255) and access it -using a character (cast to an unsigned char) of a string, we have 8 bits -of information we can store about that character. Specifically we -could use each bit to say if that character is allowed in any of the -last (or first for prefixes) 8 characters of the word. - -Basically, each character at one end of the word (up to the number -of conditions) is used to index into the conds array and the resulting -value found there says whether the that character is valid for a -specific character position in the word. - -For prefixes, it does this by setting bit 0 if that char is valid -in the first position, bit 1 if valid in the second position, and so on. - -If a bit is not set, then that char is not valid for that postion in the -word. - -If working with suffixes bit 0 is used for the character closest -to the front, bit 1 for the next character towards the end, ..., -with bit numconds-1 representing the last char at the end of the string. - -Note: since entries in the conds[] are 8 bits, only 8 conditions -(read that only 8 character positions) can be examined at one -end of a word (the beginning for prefixes and the end for suffixes. - -So to make this clearer, lets encode the conds array values for the -first two affentries for the suffix D described earlier. - - - For the first affentry: - numconds = 1 (only examine the last character) - - conds['e'] = (1 << 0) (the word must end in an E) - all others are all 0 - - For the second affentry: - numconds = 2 (only examine the last two characters) - - conds[X] = conds[X] | (1 << 0) (aeiou are not allowed) - where X is all characters *but* a, e, i, o, or u - - - conds['y'] = (1 << 1) (the last char must be a y) - all other bits for all other entries in the conds array are zero - -#endif diff --git a/libs/hunspell/src/affixmgr.c++ b/libs/hunspell/src/affixmgr.c++ new file mode 100644 index 0000000000..d21ff49573 --- /dev/null +++ b/libs/hunspell/src/affixmgr.c++ @@ -0,0 +1,5128 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Hunspell, based on MySpell. + * + * The Initial Developers of the Original Code are + * Kevin Hendricks (MySpell) and Németh László (Hunspell). + * Portions created by the Initial Developers are Copyright (C) 2002-2005 + * the Initial Developers. All Rights Reserved. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include + +#include + +#include "affixmgr.hxx" +#include "affentry.hxx" +#include "langnum.hxx" + +#include "csutil.hxx" + +AffixMgr::AffixMgr(const char* affpath, + HashMgr** ptr, + int* md, + const char* key) { + // register hash manager and load affix data from aff file + pHMgr = ptr[0]; + alldic = ptr; + maxdic = md; + keystring = NULL; + trystring = NULL; + encoding = NULL; + csconv = NULL; + utf8 = 0; + complexprefixes = 0; + maptable = NULL; + nummap = 0; + breaktable = NULL; + numbreak = -1; + reptable = NULL; + numrep = 0; + iconvtable = NULL; + oconvtable = NULL; + checkcpdtable = NULL; + // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN) + simplifiedcpd = 0; + numcheckcpd = 0; + defcpdtable = NULL; + numdefcpd = 0; + phone = NULL; + compoundflag = FLAG_NULL; // permits word in compound forms + compoundbegin = FLAG_NULL; // may be first word in compound forms + compoundmiddle = FLAG_NULL; // may be middle word in compound forms + compoundend = FLAG_NULL; // may be last word in compound forms + compoundroot = FLAG_NULL; // compound word signing flag + compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word + compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word + compoundmoresuffixes = 0; // allow more suffixes within compound words + checkcompounddup = 0; // forbid double words in compounds + checkcompoundrep = 0; // forbid bad compounds (may be non compound word with + // a REP substitution) + checkcompoundcase = + 0; // forbid upper and lowercase combinations at word bounds + checkcompoundtriple = 0; // forbid compounds with triple letters + simplifiedtriple = 0; // allow simplified triple letters in compounds + // (Schiff+fahrt -> Schiffahrt) + forbiddenword = FORBIDDENWORD; // forbidden word signing flag + nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag + nongramsuggest = FLAG_NULL; + lang = NULL; // language + langnum = 0; // language code (see http://l10n.openoffice.org/languages.html) + needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes + cpdwordmax = -1; // default: unlimited wordcount in compound words + cpdmin = -1; // undefined + cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words + cpdvowels = NULL; // vowels (for calculating of Hungarian compounding limit, + // O(n) search! XXX) + cpdvowels_utf16 = + NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search) + cpdvowels_utf16_len = 0; // vowels + pfxappnd = NULL; // previous prefix for counting syllables of the prefix BUG + sfxappnd = NULL; // previous suffix for counting syllables of the suffix BUG + sfxextra = 0; // modifier for syllable count of sfxappnd BUG + cpdsyllablenum = NULL; // syllable count incrementing flag + checknum = 0; // checking numbers, and word with numbers + wordchars = NULL; // letters + spec. word characters + wordchars_utf16 = NULL; // letters + spec. word characters + wordchars_utf16_len = 0; // letters + spec. word characters + ignorechars = NULL; // letters + spec. word characters + ignorechars_utf16 = NULL; // letters + spec. word characters + ignorechars_utf16_len = 0; // letters + spec. word characters + version = NULL; // affix and dictionary file version string + havecontclass = 0; // flags of possible continuing classes (double affix) + // LEMMA_PRESENT: not put root into the morphological output. Lemma presents + // in morhological description in dictionary file. It's often combined with + // PSEUDOROOT. + lemma_present = FLAG_NULL; + circumfix = FLAG_NULL; + onlyincompound = FLAG_NULL; + maxngramsugs = -1; // undefined + maxdiff = -1; // undefined + onlymaxdiff = 0; + maxcpdsugs = -1; // undefined + nosplitsugs = 0; + sugswithdots = 0; + keepcase = 0; + forceucase = 0; + warn = 0; + forbidwarn = 0; + checksharps = 0; + substandard = FLAG_NULL; + fullstrip = 0; + + sfx = NULL; + pfx = NULL; + + for (int i = 0; i < SETSIZE; i++) { + pStart[i] = NULL; + sStart[i] = NULL; + pFlag[i] = NULL; + sFlag[i] = NULL; + } + + for (int j = 0; j < CONTSIZE; j++) { + contclasses[j] = 0; + } + + if (parse_file(affpath, key)) { + HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n", affpath); + } + + if (cpdmin == -1) + cpdmin = MINCPDLEN; +} + +AffixMgr::~AffixMgr() { + // pass through linked prefix entries and clean up + for (int i = 0; i < SETSIZE; i++) { + pFlag[i] = NULL; + PfxEntry* ptr = pStart[i]; + PfxEntry* nptr = NULL; + while (ptr) { + nptr = ptr->getNext(); + delete (ptr); + ptr = nptr; + nptr = NULL; + } + } + + // pass through linked suffix entries and clean up + for (int j = 0; j < SETSIZE; j++) { + sFlag[j] = NULL; + SfxEntry* ptr = sStart[j]; + SfxEntry* nptr = NULL; + while (ptr) { + nptr = ptr->getNext(); + delete (ptr); + ptr = nptr; + nptr = NULL; + } + sStart[j] = NULL; + } + + if (keystring) + free(keystring); + keystring = NULL; + if (trystring) + free(trystring); + trystring = NULL; + if (encoding) + free(encoding); + encoding = NULL; + if (maptable) { + for (int j = 0; j < nummap; j++) { + for (int k = 0; k < maptable[j].len; k++) { + if (maptable[j].set[k]) + free(maptable[j].set[k]); + } + free(maptable[j].set); + maptable[j].set = NULL; + maptable[j].len = 0; + } + free(maptable); + maptable = NULL; + } + nummap = 0; + if (breaktable) { + for (int j = 0; j < numbreak; j++) { + if (breaktable[j]) + free(breaktable[j]); + breaktable[j] = NULL; + } + free(breaktable); + breaktable = NULL; + } + numbreak = 0; + if (reptable) { + for (int j = 0; j < numrep; j++) { + free(reptable[j].pattern); + free(reptable[j].pattern2); + } + free(reptable); + reptable = NULL; + } + if (iconvtable) + delete iconvtable; + if (oconvtable) + delete oconvtable; + if (phone && phone->rules) { + for (int j = 0; j < phone->num + 1; j++) { + free(phone->rules[j * 2]); + free(phone->rules[j * 2 + 1]); + } + free(phone->rules); + free(phone); + phone = NULL; + } + + if (defcpdtable) { + for (int j = 0; j < numdefcpd; j++) { + free(defcpdtable[j].def); + defcpdtable[j].def = NULL; + } + free(defcpdtable); + defcpdtable = NULL; + } + numrep = 0; + if (checkcpdtable) { + for (int j = 0; j < numcheckcpd; j++) { + free(checkcpdtable[j].pattern); + free(checkcpdtable[j].pattern2); + free(checkcpdtable[j].pattern3); + checkcpdtable[j].pattern = NULL; + checkcpdtable[j].pattern2 = NULL; + checkcpdtable[j].pattern3 = NULL; + } + free(checkcpdtable); + checkcpdtable = NULL; + } + numcheckcpd = 0; + FREE_FLAG(compoundflag); + FREE_FLAG(compoundbegin); + FREE_FLAG(compoundmiddle); + FREE_FLAG(compoundend); + FREE_FLAG(compoundpermitflag); + FREE_FLAG(compoundforbidflag); + FREE_FLAG(compoundroot); + FREE_FLAG(forbiddenword); + FREE_FLAG(nosuggest); + FREE_FLAG(nongramsuggest); + FREE_FLAG(needaffix); + FREE_FLAG(lemma_present); + FREE_FLAG(circumfix); + FREE_FLAG(onlyincompound); + + cpdwordmax = 0; + pHMgr = NULL; + cpdmin = 0; + cpdmaxsyllable = 0; + if (cpdvowels) + free(cpdvowels); + if (cpdvowels_utf16) + free(cpdvowels_utf16); + if (cpdsyllablenum) + free(cpdsyllablenum); + free_utf_tbl(); + if (lang) + free(lang); + if (wordchars) + free(wordchars); + if (wordchars_utf16) + free(wordchars_utf16); + if (ignorechars) + free(ignorechars); + if (ignorechars_utf16) + free(ignorechars_utf16); + if (version) + free(version); + checknum = 0; +#ifdef MOZILLA_CLIENT + delete[] csconv; +#endif +} + +void AffixMgr::finishFileMgr(FileMgr* afflst) { + delete afflst; + + // convert affix trees to sorted list + process_pfx_tree_to_list(); + process_sfx_tree_to_list(); +} + +// read in aff file and build up prefix and suffix entry objects +int AffixMgr::parse_file(const char* affpath, const char* key) { + char* line; // io buffers + char ft; // affix type + + // checking flag duplication + char dupflags[CONTSIZE]; + char dupflags_ini = 1; + + // first line indicator for removing byte order mark + int firstline = 1; + + // open the affix file + FileMgr* afflst = new FileMgr(affpath, key); + if (!afflst) { + HUNSPELL_WARNING( + stderr, "error: could not open affix description file %s\n", affpath); + return 1; + } + + // step one is to parse the affix file building up the internal + // affix data structures + + // read in each line ignoring any that do not + // start with a known line type indicator + while ((line = afflst->getline()) != NULL) { + mychomp(line); + + /* remove byte order mark */ + if (firstline) { + firstline = 0; + // Affix file begins with byte order mark: possible incompatibility with + // old Hunspell versions + if (strncmp(line, "\xEF\xBB\xBF", 3) == 0) { + memmove(line, line + 3, strlen(line + 3) + 1); + } + } + + /* parse in the keyboard string */ + if (strncmp(line, "KEY", 3) == 0) { + if (parse_string(line, &keystring, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the try string */ + if (strncmp(line, "TRY", 3) == 0) { + if (parse_string(line, &trystring, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the name of the character set used by the .dict and .aff */ + if (strncmp(line, "SET", 3) == 0) { + if (parse_string(line, &encoding, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + if (strcmp(encoding, "UTF-8") == 0) { + utf8 = 1; +#ifndef OPENOFFICEORG +#ifndef MOZILLA_CLIENT + if (initialize_utf_tbl()) { + finishFileMgr(afflst); + return 1; + } +#endif +#endif + } + } + + /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left + * writing system */ + if (strncmp(line, "COMPLEXPREFIXES", 15) == 0) + complexprefixes = 1; + + /* parse in the flag used by the controlled compound words */ + if (strncmp(line, "COMPOUNDFLAG", 12) == 0) { + if (parse_flag(line, &compoundflag, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by compound words */ + if (strncmp(line, "COMPOUNDBEGIN", 13) == 0) { + if (complexprefixes) { + if (parse_flag(line, &compoundend, afflst)) { + finishFileMgr(afflst); + return 1; + } + } else { + if (parse_flag(line, &compoundbegin, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + } + + /* parse in the flag used by compound words */ + if (strncmp(line, "COMPOUNDMIDDLE", 14) == 0) { + if (parse_flag(line, &compoundmiddle, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + /* parse in the flag used by compound words */ + if (strncmp(line, "COMPOUNDEND", 11) == 0) { + if (complexprefixes) { + if (parse_flag(line, &compoundbegin, afflst)) { + finishFileMgr(afflst); + return 1; + } + } else { + if (parse_flag(line, &compoundend, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + } + + /* parse in the data used by compound_check() method */ + if (strncmp(line, "COMPOUNDWORDMAX", 15) == 0) { + if (parse_num(line, &cpdwordmax, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag sign compounds in dictionary */ + if (strncmp(line, "COMPOUNDROOT", 12) == 0) { + if (parse_flag(line, &compoundroot, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by compound_check() method */ + if (strncmp(line, "COMPOUNDPERMITFLAG", 18) == 0) { + if (parse_flag(line, &compoundpermitflag, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by compound_check() method */ + if (strncmp(line, "COMPOUNDFORBIDFLAG", 18) == 0) { + if (parse_flag(line, &compoundforbidflag, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (strncmp(line, "COMPOUNDMORESUFFIXES", 20) == 0) { + compoundmoresuffixes = 1; + } + + if (strncmp(line, "CHECKCOMPOUNDDUP", 16) == 0) { + checkcompounddup = 1; + } + + if (strncmp(line, "CHECKCOMPOUNDREP", 16) == 0) { + checkcompoundrep = 1; + } + + if (strncmp(line, "CHECKCOMPOUNDTRIPLE", 19) == 0) { + checkcompoundtriple = 1; + } + + if (strncmp(line, "SIMPLIFIEDTRIPLE", 16) == 0) { + simplifiedtriple = 1; + } + + if (strncmp(line, "CHECKCOMPOUNDCASE", 17) == 0) { + checkcompoundcase = 1; + } + + if (strncmp(line, "NOSUGGEST", 9) == 0) { + if (parse_flag(line, &nosuggest, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (strncmp(line, "NONGRAMSUGGEST", 14) == 0) { + if (parse_flag(line, &nongramsuggest, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by forbidden words */ + if (strncmp(line, "FORBIDDENWORD", 13) == 0) { + if (parse_flag(line, &forbiddenword, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by forbidden words */ + if (strncmp(line, "LEMMA_PRESENT", 13) == 0) { + if (parse_flag(line, &lemma_present, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by circumfixes */ + if (strncmp(line, "CIRCUMFIX", 9) == 0) { + if (parse_flag(line, &circumfix, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by fogemorphemes */ + if (strncmp(line, "ONLYINCOMPOUND", 14) == 0) { + if (parse_flag(line, &onlyincompound, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by `needaffixs' */ + if (strncmp(line, "PSEUDOROOT", 10) == 0) { + if (parse_flag(line, &needaffix, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by `needaffixs' */ + if (strncmp(line, "NEEDAFFIX", 9) == 0) { + if (parse_flag(line, &needaffix, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the minimal length for words in compounds */ + if (strncmp(line, "COMPOUNDMIN", 11) == 0) { + if (parse_num(line, &cpdmin, afflst)) { + finishFileMgr(afflst); + return 1; + } + if (cpdmin < 1) + cpdmin = 1; + } + + /* parse in the max. words and syllables in compounds */ + if (strncmp(line, "COMPOUNDSYLLABLE", 16) == 0) { + if (parse_cpdsyllable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by compound_check() method */ + if (strncmp(line, "SYLLABLENUM", 11) == 0) { + if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by the controlled compound words */ + if (strncmp(line, "CHECKNUM", 8) == 0) { + checknum = 1; + } + + /* parse in the extra word characters */ + if (strncmp(line, "WORDCHARS", 9) == 0) { + if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, + utf8, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the ignored characters (for example, Arabic optional diacretics + * charachters */ + if (strncmp(line, "IGNORE", 6) == 0) { + if (parse_array(line, &ignorechars, &ignorechars_utf16, + &ignorechars_utf16_len, utf8, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the typical fault correcting table */ + if (strncmp(line, "REP", 3) == 0) { + if (parse_reptable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the input conversion table */ + if (strncmp(line, "ICONV", 5) == 0) { + if (parse_convtable(line, afflst, &iconvtable, "ICONV")) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the input conversion table */ + if (strncmp(line, "OCONV", 5) == 0) { + if (parse_convtable(line, afflst, &oconvtable, "OCONV")) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the phonetic translation table */ + if (strncmp(line, "PHONE", 5) == 0) { + if (parse_phonetable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the checkcompoundpattern table */ + if (strncmp(line, "CHECKCOMPOUNDPATTERN", 20) == 0) { + if (parse_checkcpdtable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the defcompound table */ + if (strncmp(line, "COMPOUNDRULE", 12) == 0) { + if (parse_defcpdtable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the related character map table */ + if (strncmp(line, "MAP", 3) == 0) { + if (parse_maptable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the word breakpoints table */ + if (strncmp(line, "BREAK", 5) == 0) { + if (parse_breaktable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the language for language specific codes */ + if (strncmp(line, "LANG", 4) == 0) { + if (parse_string(line, &lang, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + langnum = get_lang_num(lang); + } + + if (strncmp(line, "VERSION", 7) == 0) { + for (line = line + 7; *line == ' ' || *line == '\t'; line++) + ; + version = mystrdup(line); + } + + if (strncmp(line, "MAXNGRAMSUGS", 12) == 0) { + if (parse_num(line, &maxngramsugs, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (strncmp(line, "ONLYMAXDIFF", 11) == 0) + onlymaxdiff = 1; + + if (strncmp(line, "MAXDIFF", 7) == 0) { + if (parse_num(line, &maxdiff, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (strncmp(line, "MAXCPDSUGS", 10) == 0) { + if (parse_num(line, &maxcpdsugs, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (strncmp(line, "NOSPLITSUGS", 11) == 0) { + nosplitsugs = 1; + } + + if (strncmp(line, "FULLSTRIP", 9) == 0) { + fullstrip = 1; + } + + if (strncmp(line, "SUGSWITHDOTS", 12) == 0) { + sugswithdots = 1; + } + + /* parse in the flag used by forbidden words */ + if (strncmp(line, "KEEPCASE", 8) == 0) { + if (parse_flag(line, &keepcase, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by `forceucase' */ + if (strncmp(line, "FORCEUCASE", 10) == 0) { + if (parse_flag(line, &forceucase, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by `warn' */ + if (strncmp(line, "WARN", 4) == 0) { + if (parse_flag(line, &warn, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (strncmp(line, "FORBIDWARN", 10) == 0) { + forbidwarn = 1; + } + + /* parse in the flag used by the affix generator */ + if (strncmp(line, "SUBSTANDARD", 11) == 0) { + if (parse_flag(line, &substandard, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (strncmp(line, "CHECKSHARPS", 11) == 0) { + checksharps = 1; + } + + /* parse this affix: P - prefix, S - suffix */ + ft = ' '; + if (strncmp(line, "PFX", 3) == 0) + ft = complexprefixes ? 'S' : 'P'; + if (strncmp(line, "SFX", 3) == 0) + ft = complexprefixes ? 'P' : 'S'; + if (ft != ' ') { + if (dupflags_ini) { + memset(dupflags, 0, sizeof(dupflags)); + dupflags_ini = 0; + } + if (parse_affix(line, ft, afflst, dupflags)) { + finishFileMgr(afflst); + return 1; + } + } + } + + finishFileMgr(afflst); + // affix trees are sorted now + + // now we can speed up performance greatly taking advantage of the + // relationship between the affixes and the idea of "subsets". + + // View each prefix as a potential leading subset of another and view + // each suffix (reversed) as a potential trailing subset of another. + + // To illustrate this relationship if we know the prefix "ab" is found in the + // word to examine, only prefixes that "ab" is a leading subset of need be + // examined. + // Furthermore is "ab" is not present then none of the prefixes that "ab" is + // is a subset need be examined. + // The same argument goes for suffix string that are reversed. + + // Then to top this off why not examine the first char of the word to quickly + // limit the set of prefixes to examine (i.e. the prefixes to examine must + // be leading supersets of the first character of the word (if they exist) + + // To take advantage of this "subset" relationship, we need to add two links + // from entry. One to take next if the current prefix is found (call it + // nexteq) + // and one to take next if the current prefix is not found (call it nextne). + + // Since we have built ordered lists, all that remains is to properly + // initialize + // the nextne and nexteq pointers that relate them + + process_pfx_order(); + process_sfx_order(); + + /* get encoding for CHECKCOMPOUNDCASE */ + if (!utf8) { + char* enc = get_encoding(); + csconv = get_current_cs(enc); + free(enc); + enc = NULL; + + std::string expw; + if (wordchars) { + expw.assign(wordchars); + free(wordchars); + } + + for (int i = 0; i <= 255; i++) { + if ((csconv[i].cupper != csconv[i].clower) && + (expw.find((char)i) == std::string::npos)) { + expw.push_back((char)i); + } + } + + wordchars = mystrdup(expw.c_str()); + } + + // default BREAK definition + if (numbreak == -1) { + breaktable = (char**)malloc(sizeof(char*) * 3); + if (!breaktable) + return 1; + breaktable[0] = mystrdup("-"); + breaktable[1] = mystrdup("^-"); + breaktable[2] = mystrdup("-$"); + if (breaktable[0] && breaktable[1] && breaktable[2]) + numbreak = 3; + } + return 0; +} + +// we want to be able to quickly access prefix information +// both by prefix flag, and sorted by prefix string itself +// so we need to set up two indexes + +int AffixMgr::build_pfxtree(PfxEntry* pfxptr) { + PfxEntry* ptr; + PfxEntry* pptr; + PfxEntry* ep = pfxptr; + + // get the right starting points + const char* key = ep->getKey(); + const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF); + + // first index by flag which must exist + ptr = pFlag[flg]; + ep->setFlgNxt(ptr); + pFlag[flg] = ep; + + // handle the special case of null affix string + if (strlen(key) == 0) { + // always inset them at head of list at element 0 + ptr = pStart[0]; + ep->setNext(ptr); + pStart[0] = ep; + return 0; + } + + // now handle the normal case + ep->setNextEQ(NULL); + ep->setNextNE(NULL); + + unsigned char sp = *((const unsigned char*)key); + ptr = pStart[sp]; + + // handle the first insert + if (!ptr) { + pStart[sp] = ep; + return 0; + } + + // otherwise use binary tree insertion so that a sorted + // list can easily be generated later + pptr = NULL; + for (;;) { + pptr = ptr; + if (strcmp(ep->getKey(), ptr->getKey()) <= 0) { + ptr = ptr->getNextEQ(); + if (!ptr) { + pptr->setNextEQ(ep); + break; + } + } else { + ptr = ptr->getNextNE(); + if (!ptr) { + pptr->setNextNE(ep); + break; + } + } + } + return 0; +} + +// we want to be able to quickly access suffix information +// both by suffix flag, and sorted by the reverse of the +// suffix string itself; so we need to set up two indexes +int AffixMgr::build_sfxtree(SfxEntry* sfxptr) { + SfxEntry* ptr; + SfxEntry* pptr; + SfxEntry* ep = sfxptr; + + /* get the right starting point */ + const char* key = ep->getKey(); + const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF); + + // first index by flag which must exist + ptr = sFlag[flg]; + ep->setFlgNxt(ptr); + sFlag[flg] = ep; + + // next index by affix string + + // handle the special case of null affix string + if (strlen(key) == 0) { + // always inset them at head of list at element 0 + ptr = sStart[0]; + ep->setNext(ptr); + sStart[0] = ep; + return 0; + } + + // now handle the normal case + ep->setNextEQ(NULL); + ep->setNextNE(NULL); + + unsigned char sp = *((const unsigned char*)key); + ptr = sStart[sp]; + + // handle the first insert + if (!ptr) { + sStart[sp] = ep; + return 0; + } + + // otherwise use binary tree insertion so that a sorted + // list can easily be generated later + pptr = NULL; + for (;;) { + pptr = ptr; + if (strcmp(ep->getKey(), ptr->getKey()) <= 0) { + ptr = ptr->getNextEQ(); + if (!ptr) { + pptr->setNextEQ(ep); + break; + } + } else { + ptr = ptr->getNextNE(); + if (!ptr) { + pptr->setNextNE(ep); + break; + } + } + } + return 0; +} + +// convert from binary tree to sorted list +int AffixMgr::process_pfx_tree_to_list() { + for (int i = 1; i < SETSIZE; i++) { + pStart[i] = process_pfx_in_order(pStart[i], NULL); + } + return 0; +} + +PfxEntry* AffixMgr::process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr) { + if (ptr) { + nptr = process_pfx_in_order(ptr->getNextNE(), nptr); + ptr->setNext(nptr); + nptr = process_pfx_in_order(ptr->getNextEQ(), ptr); + } + return nptr; +} + +// convert from binary tree to sorted list +int AffixMgr::process_sfx_tree_to_list() { + for (int i = 1; i < SETSIZE; i++) { + sStart[i] = process_sfx_in_order(sStart[i], NULL); + } + return 0; +} + +SfxEntry* AffixMgr::process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr) { + if (ptr) { + nptr = process_sfx_in_order(ptr->getNextNE(), nptr); + ptr->setNext(nptr); + nptr = process_sfx_in_order(ptr->getNextEQ(), ptr); + } + return nptr; +} + +// reinitialize the PfxEntry links NextEQ and NextNE to speed searching +// using the idea of leading subsets this time +int AffixMgr::process_pfx_order() { + PfxEntry* ptr; + + // loop through each prefix list starting point + for (int i = 1; i < SETSIZE; i++) { + ptr = pStart[i]; + + // look through the remainder of the list + // and find next entry with affix that + // the current one is not a subset of + // mark that as destination for NextNE + // use next in list that you are a subset + // of as NextEQ + + for (; ptr != NULL; ptr = ptr->getNext()) { + PfxEntry* nptr = ptr->getNext(); + for (; nptr != NULL; nptr = nptr->getNext()) { + if (!isSubset(ptr->getKey(), nptr->getKey())) + break; + } + ptr->setNextNE(nptr); + ptr->setNextEQ(NULL); + if ((ptr->getNext()) && + isSubset(ptr->getKey(), (ptr->getNext())->getKey())) + ptr->setNextEQ(ptr->getNext()); + } + + // now clean up by adding smart search termination strings: + // if you are already a superset of the previous prefix + // but not a subset of the next, search can end here + // so set NextNE properly + + ptr = pStart[i]; + for (; ptr != NULL; ptr = ptr->getNext()) { + PfxEntry* nptr = ptr->getNext(); + PfxEntry* mptr = NULL; + for (; nptr != NULL; nptr = nptr->getNext()) { + if (!isSubset(ptr->getKey(), nptr->getKey())) + break; + mptr = nptr; + } + if (mptr) + mptr->setNextNE(NULL); + } + } + return 0; +} + +// initialize the SfxEntry links NextEQ and NextNE to speed searching +// using the idea of leading subsets this time +int AffixMgr::process_sfx_order() { + SfxEntry* ptr; + + // loop through each prefix list starting point + for (int i = 1; i < SETSIZE; i++) { + ptr = sStart[i]; + + // look through the remainder of the list + // and find next entry with affix that + // the current one is not a subset of + // mark that as destination for NextNE + // use next in list that you are a subset + // of as NextEQ + + for (; ptr != NULL; ptr = ptr->getNext()) { + SfxEntry* nptr = ptr->getNext(); + for (; nptr != NULL; nptr = nptr->getNext()) { + if (!isSubset(ptr->getKey(), nptr->getKey())) + break; + } + ptr->setNextNE(nptr); + ptr->setNextEQ(NULL); + if ((ptr->getNext()) && + isSubset(ptr->getKey(), (ptr->getNext())->getKey())) + ptr->setNextEQ(ptr->getNext()); + } + + // now clean up by adding smart search termination strings: + // if you are already a superset of the previous suffix + // but not a subset of the next, search can end here + // so set NextNE properly + + ptr = sStart[i]; + for (; ptr != NULL; ptr = ptr->getNext()) { + SfxEntry* nptr = ptr->getNext(); + SfxEntry* mptr = NULL; + for (; nptr != NULL; nptr = nptr->getNext()) { + if (!isSubset(ptr->getKey(), nptr->getKey())) + break; + mptr = nptr; + } + if (mptr) + mptr->setNextNE(NULL); + } + } + return 0; +} + +// add flags to the result for dictionary debugging +void AffixMgr::debugflag(char* result, unsigned short flag) { + char* st = encode_flag(flag); + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, MORPH_FLAG, MAXLNLEN); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } +} + +// add flags to the result for dictionary debugging +std::string& AffixMgr::debugflag(std::string& result, unsigned short flag) { + char* st = encode_flag(flag); + result.append(" "); + result.append(MORPH_FLAG); + if (st) { + result.append(st); + free(st); + } + return result; +} + +// calculate the character length of the condition +int AffixMgr::condlen(char* st) { + int l = 0; + bool group = false; + for (; *st; st++) { + if (*st == '[') { + group = true; + l++; + } else if (*st == ']') + group = false; + else if (!group && (!utf8 || (!(*st & 0x80) || ((*st & 0xc0) == 0x80)))) + l++; + } + return l; +} + +int AffixMgr::encodeit(affentry& entry, char* cs) { + if (strcmp(cs, ".") != 0) { + entry.numconds = (char)condlen(cs); + // coverity[buffer_size_warning] - deliberate use of lack of end of conds + // padded by strncpy as long condition flag + strncpy(entry.c.conds, cs, MAXCONDLEN); + if (entry.c.conds[MAXCONDLEN - 1] && cs[MAXCONDLEN]) { + entry.opts += aeLONGCOND; + entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1); + if (!entry.c.l.conds2) + return 1; + } + } else { + entry.numconds = 0; + entry.c.conds[0] = '\0'; + } + return 0; +} + +// return 1 if s1 is a leading subset of s2 (dots are for infixes) +inline int AffixMgr::isSubset(const char* s1, const char* s2) { + while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) { + s1++; + s2++; + } + return (*s1 == '\0'); +} + +// check word for prefixes +struct hentry* AffixMgr::prefix_check(const char* word, + int len, + char in_compound, + const FLAG needflag) { + struct hentry* rv = NULL; + + pfx = NULL; + pfxappnd = NULL; + sfxappnd = NULL; + sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry* pe = pStart[0]; + while (pe) { + if ( + // fogemorpheme + ((in_compound != IN_CPD_NOT) || + !(pe->getCont() && + (TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) && + // permit prefixes in compounds + ((in_compound != IN_CPD_END) || + (pe->getCont() && + (TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen()))))) { + // check prefix + rv = pe->checkword(word, len, in_compound, needflag); + if (rv) { + pfx = pe; // BUG: pfx not stateless + return rv; + } + } + pe = pe->getNext(); + } + + // now handle the general case + unsigned char sp = *((const unsigned char*)word); + PfxEntry* pptr = pStart[sp]; + + while (pptr) { + if (isSubset(pptr->getKey(), word)) { + if ( + // fogemorpheme + ((in_compound != IN_CPD_NOT) || + !(pptr->getCont() && + (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) && + // permit prefixes in compounds + ((in_compound != IN_CPD_END) || + (pptr->getCont() && (TESTAFF(pptr->getCont(), compoundpermitflag, + pptr->getContLen()))))) { + // check prefix + rv = pptr->checkword(word, len, in_compound, needflag); + if (rv) { + pfx = pptr; // BUG: pfx not stateless + return rv; + } + } + pptr = pptr->getNextEQ(); + } else { + pptr = pptr->getNextNE(); + } + } + + return NULL; +} + +// check word for prefixes +struct hentry* AffixMgr::prefix_check_twosfx(const char* word, + int len, + char in_compound, + const FLAG needflag) { + struct hentry* rv = NULL; + + pfx = NULL; + sfxappnd = NULL; + sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry* pe = pStart[0]; + + while (pe) { + rv = pe->check_twosfx(word, len, in_compound, needflag); + if (rv) + return rv; + pe = pe->getNext(); + } + + // now handle the general case + unsigned char sp = *((const unsigned char*)word); + PfxEntry* pptr = pStart[sp]; + + while (pptr) { + if (isSubset(pptr->getKey(), word)) { + rv = pptr->check_twosfx(word, len, in_compound, needflag); + if (rv) { + pfx = pptr; + return rv; + } + pptr = pptr->getNextEQ(); + } else { + pptr = pptr->getNextNE(); + } + } + + return NULL; +} + +// check word for prefixes +char* AffixMgr::prefix_check_morph(const char* word, + int len, + char in_compound, + const FLAG needflag) { + char* st; + + char result[MAXLNLEN]; + result[0] = '\0'; + + pfx = NULL; + sfxappnd = NULL; + sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry* pe = pStart[0]; + while (pe) { + st = pe->check_morph(word, len, in_compound, needflag); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + // if (rv) return rv; + pe = pe->getNext(); + } + + // now handle the general case + unsigned char sp = *((const unsigned char*)word); + PfxEntry* pptr = pStart[sp]; + + while (pptr) { + if (isSubset(pptr->getKey(), word)) { + st = pptr->check_morph(word, len, in_compound, needflag); + if (st) { + // fogemorpheme + if ((in_compound != IN_CPD_NOT) || + !((pptr->getCont() && (TESTAFF(pptr->getCont(), onlyincompound, + pptr->getContLen()))))) { + mystrcat(result, st, MAXLNLEN); + pfx = pptr; + } + free(st); + } + pptr = pptr->getNextEQ(); + } else { + pptr = pptr->getNextNE(); + } + } + + if (*result) + return mystrdup(result); + return NULL; +} + +// check word for prefixes +char* AffixMgr::prefix_check_twosfx_morph(const char* word, + int len, + char in_compound, + const FLAG needflag) { + char* st; + + char result[MAXLNLEN]; + result[0] = '\0'; + + pfx = NULL; + sfxappnd = NULL; + sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry* pe = pStart[0]; + while (pe) { + st = pe->check_twosfx_morph(word, len, in_compound, needflag); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + pe = pe->getNext(); + } + + // now handle the general case + unsigned char sp = *((const unsigned char*)word); + PfxEntry* pptr = pStart[sp]; + + while (pptr) { + if (isSubset(pptr->getKey(), word)) { + st = pptr->check_twosfx_morph(word, len, in_compound, needflag); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + pfx = pptr; + } + pptr = pptr->getNextEQ(); + } else { + pptr = pptr->getNextNE(); + } + } + + if (*result) + return mystrdup(result); + return NULL; +} + +// Is word a non compound with a REP substitution (see checkcompoundrep)? +int AffixMgr::cpdrep_check(const char* word, int wl) { + const char* r; + + if ((wl < 2) || !numrep) + return 0; + + for (int i = 0; i < numrep; i++) { + r = word; + int lenp = strlen(reptable[i].pattern); + // search every occurence of the pattern in the word + while ((r = strstr(r, reptable[i].pattern)) != NULL) { + std::string candidate(word); + candidate.replace(r - word, lenp, reptable[i].pattern2); + if (candidate_check(candidate.c_str(), candidate.size())) + return 1; + r++; // search for the next letter + } + } + return 0; +} + +// forbid compoundings when there are special patterns at word bound +int AffixMgr::cpdpat_check(const char* word, + int pos, + hentry* r1, + hentry* r2, + const char /*affixed*/) { + int len; + for (int i = 0; i < numcheckcpd; i++) { + if (isSubset(checkcpdtable[i].pattern2, word + pos) && + (!r1 || !checkcpdtable[i].cond || + (r1->astr && TESTAFF(r1->astr, checkcpdtable[i].cond, r1->alen))) && + (!r2 || !checkcpdtable[i].cond2 || + (r2->astr && TESTAFF(r2->astr, checkcpdtable[i].cond2, r2->alen))) && + // zero length pattern => only TESTAFF + // zero pattern (0/flag) => unmodified stem (zero affixes allowed) + (!*(checkcpdtable[i].pattern) || + ((*(checkcpdtable[i].pattern) == '0' && r1->blen <= pos && + strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) || + (*(checkcpdtable[i].pattern) != '0' && + ((len = strlen(checkcpdtable[i].pattern)) != 0) && + strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) { + return 1; + } + } + return 0; +} + +// forbid compounding with neighbouring upper and lower case characters at word +// bounds +int AffixMgr::cpdcase_check(const char* word, int pos) { + if (utf8) { + w_char u, w; + const char* p; + u8_u16(&u, 1, word + pos); + for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--) + ; + u8_u16(&w, 1, p); + unsigned short a = (u.h << 8) + u.l; + unsigned short b = (w.h << 8) + w.l; + if (((unicodetoupper(a, langnum) == a) || + (unicodetoupper(b, langnum) == b)) && + (a != '-') && (b != '-')) + return 1; + } else { + unsigned char a = *(word + pos - 1); + unsigned char b = *(word + pos); + if ((csconv[a].ccase || csconv[b].ccase) && (a != '-') && (b != '-')) + return 1; + } + return 0; +} + +// check compound patterns +int AffixMgr::defcpd_check(hentry*** words, + short wnum, + hentry* rv, + hentry** def, + char all) { + signed short + btpp[MAXWORDLEN]; // metacharacter (*, ?) positions for backtracking + signed short btwp[MAXWORDLEN]; // word positions for metacharacters + int btnum[MAXWORDLEN]; // number of matched characters in metacharacter + // positions + short bt = 0; + int i, j; + int ok; + int w = 0; + + if (!*words) { + w = 1; + *words = def; + } + + if (!*words) { + return 0; + } + + (*words)[wnum] = rv; + + // has the last word COMPOUNDRULE flag? + if (rv->alen == 0) { + (*words)[wnum] = NULL; + if (w) + *words = NULL; + return 0; + } + ok = 0; + for (i = 0; i < numdefcpd; i++) { + for (j = 0; j < defcpdtable[i].len; j++) { + if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' && + TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) { + ok = 1; + break; + } + } + } + if (ok == 0) { + (*words)[wnum] = NULL; + if (w) + *words = NULL; + return 0; + } + + for (i = 0; i < numdefcpd; i++) { + signed short pp = 0; // pattern position + signed short wp = 0; // "words" position + int ok2; + ok = 1; + ok2 = 1; + do { + while ((pp < defcpdtable[i].len) && (wp <= wnum)) { + if (((pp + 1) < defcpdtable[i].len) && + ((defcpdtable[i].def[pp + 1] == '*') || + (defcpdtable[i].def[pp + 1] == '?'))) { + int wend = (defcpdtable[i].def[pp + 1] == '?') ? wp : wnum; + ok2 = 1; + pp += 2; + btpp[bt] = pp; + btwp[bt] = wp; + while (wp <= wend) { + if (!(*words)[wp]->alen || + !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp - 2], + (*words)[wp]->alen)) { + ok2 = 0; + break; + } + wp++; + } + if (wp <= wnum) + ok2 = 0; + btnum[bt] = wp - btwp[bt]; + if (btnum[bt] > 0) + bt++; + if (ok2) + break; + } else { + ok2 = 1; + if (!(*words)[wp] || !(*words)[wp]->alen || + !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp], + (*words)[wp]->alen)) { + ok = 0; + break; + } + pp++; + wp++; + if ((defcpdtable[i].len == pp) && !(wp > wnum)) + ok = 0; + } + } + if (ok && ok2) { + int r = pp; + while ((defcpdtable[i].len > r) && ((r + 1) < defcpdtable[i].len) && + ((defcpdtable[i].def[r + 1] == '*') || + (defcpdtable[i].def[r + 1] == '?'))) + r += 2; + if (defcpdtable[i].len <= r) + return 1; + } + // backtrack + if (bt) + do { + ok = 1; + btnum[bt - 1]--; + pp = btpp[bt - 1]; + wp = btwp[bt - 1] + (signed short)btnum[bt - 1]; + } while ((btnum[bt - 1] < 0) && --bt); + } while (bt); + + if (ok && ok2 && (!all || (defcpdtable[i].len <= pp))) + return 1; + + // check zero ending + while (ok && ok2 && (defcpdtable[i].len > pp) && + ((pp + 1) < defcpdtable[i].len) && + ((defcpdtable[i].def[pp + 1] == '*') || + (defcpdtable[i].def[pp + 1] == '?'))) + pp += 2; + if (ok && ok2 && (defcpdtable[i].len <= pp)) + return 1; + } + (*words)[wnum] = NULL; + if (w) + *words = NULL; + return 0; +} + +inline int AffixMgr::candidate_check(const char* word, int len) { + struct hentry* rv = NULL; + + rv = lookup(word); + if (rv) + return 1; + + // rv = prefix_check(word,len,1); + // if (rv) return 1; + + rv = affix_check(word, len); + if (rv) + return 1; + return 0; +} + +// calculate number of syllable for compound-checking +short AffixMgr::get_syllable(const char* word, int wlen) { + if (cpdmaxsyllable == 0) + return 0; + + short num = 0; + + if (!utf8) { + for (int i = 0; i < wlen; i++) { + if (strchr(cpdvowels, word[i])) + num++; + } + } else if (cpdvowels_utf16) { + w_char w[MAXWORDUTF8LEN]; + int i = u8_u16(w, MAXWORDUTF8LEN, word); + for (; i > 0; i--) { + if (flag_bsearch((unsigned short*)cpdvowels_utf16, + ((unsigned short*)w)[i - 1], cpdvowels_utf16_len)) + num++; + } + } + return num; +} + +void AffixMgr::setcminmax(int* cmin, int* cmax, const char* word, int len) { + if (utf8) { + int i; + for (*cmin = 0, i = 0; (i < cpdmin) && word[*cmin]; i++) { + for ((*cmin)++; (word[*cmin] & 0xc0) == 0x80; (*cmin)++) + ; + } + for (*cmax = len, i = 0; (i < (cpdmin - 1)) && *cmax; i++) { + for ((*cmax)--; (word[*cmax] & 0xc0) == 0x80; (*cmax)--) + ; + } + } else { + *cmin = cpdmin; + *cmax = len - cpdmin + 1; + } +} + +// check if compound word is correctly spelled +// hu_mov_rule = spec. Hungarian rule (XXX) +struct hentry* AffixMgr::compound_check(const char* word, + int len, + short wordnum, + short numsyllable, + short maxwordnum, + short wnum, + hentry** words = NULL, + char hu_mov_rule = 0, + char is_sug = 0, + int* info = NULL) { + int i; + short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2; + struct hentry* rv = NULL; + struct hentry* rv_first; + struct hentry* rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking + char st[MAXWORDUTF8LEN + 4]; + char ch = '\0'; + int cmin; + int cmax; + int striple = 0; + int scpd = 0; + int soldi = 0; + int oldcmin = 0; + int oldcmax = 0; + int oldlen = 0; + int checkedstriple = 0; + int onlycpdrule; + char affixed = 0; + hentry** oldwords = words; + + int checked_prefix; + + setcminmax(&cmin, &cmax, word, len); + + strcpy(st, word); + + for (i = cmin; i < cmax; i++) { + // go to end of the UTF-8 character + if (utf8) { + for (; (st[i] & 0xc0) == 0x80; i++) + ; + if (i >= cmax) + return NULL; + } + + words = oldwords; + onlycpdrule = (words) ? 1 : 0; + + do { // onlycpdrule loop + + oldnumsyllable = numsyllable; + oldwordnum = wordnum; + checked_prefix = 0; + + do { // simplified checkcompoundpattern loop + + if (scpd > 0) { + for (; scpd <= numcheckcpd && + (!checkcpdtable[scpd - 1].pattern3 || + strncmp(word + i, checkcpdtable[scpd - 1].pattern3, + strlen(checkcpdtable[scpd - 1].pattern3)) != 0); + scpd++) + ; + + if (scpd > numcheckcpd) + break; // break simplified checkcompoundpattern loop + strcpy(st + i, checkcpdtable[scpd - 1].pattern); + soldi = i; + i += strlen(checkcpdtable[scpd - 1].pattern); + strcpy(st + i, checkcpdtable[scpd - 1].pattern2); + strcpy(st + i + strlen(checkcpdtable[scpd - 1].pattern2), + word + soldi + strlen(checkcpdtable[scpd - 1].pattern3)); + + oldlen = len; + len += strlen(checkcpdtable[scpd - 1].pattern) + + strlen(checkcpdtable[scpd - 1].pattern2) - + strlen(checkcpdtable[scpd - 1].pattern3); + oldcmin = cmin; + oldcmax = cmax; + setcminmax(&cmin, &cmax, st, len); + + cmax = len - cpdmin + 1; + } + + ch = st[i]; + st[i] = '\0'; + + sfx = NULL; + pfx = NULL; + + // FIRST WORD + + affixed = 1; + rv = lookup(st); // perhaps without prefix + + // search homonym with compound flag + while ((rv) && !hu_mov_rule && + ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || + !((compoundflag && !words && !onlycpdrule && + TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundbegin && !wordnum && !onlycpdrule && + TESTAFF(rv->astr, compoundbegin, rv->alen)) || + (compoundmiddle && wordnum && !words && !onlycpdrule && + TESTAFF(rv->astr, compoundmiddle, rv->alen)) || + (numdefcpd && onlycpdrule && + ((!words && !wordnum && + defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0)) || + (words && + defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0))))) || + (scpd != 0 && checkcpdtable[scpd - 1].cond != FLAG_NULL && + !TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)))) { + rv = rv->next_homonym; + } + + if (rv) + affixed = 0; + + if (!rv) { + if (onlycpdrule) + break; + if (compoundflag && + !(rv = prefix_check(st, i, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundflag))) { + if (((rv = suffix_check( + st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundflag, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && + !hu_mov_rule && sfx->getCont() && + ((compoundforbidflag && + TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())) || + (compoundend && + TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { + rv = NULL; + } + } + + if (rv || + (((wordnum == 0) && compoundbegin && + ((rv = suffix_check( + st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx( + st, i, 0, NULL, + compoundbegin))) || // twofold suffixes + compound + (rv = prefix_check(st, i, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundbegin)))) || + ((wordnum > 0) && compoundmiddle && + ((rv = suffix_check( + st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx( + st, i, 0, NULL, + compoundmiddle))) || // twofold suffixes + compound + (rv = prefix_check(st, i, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundmiddle)))))) + checked_prefix = 1; + // else check forbiddenwords and needaffix + } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, needaffix, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + (is_sug && nosuggest && + TESTAFF(rv->astr, nosuggest, rv->alen)))) { + st[i] = ch; + // continue; + break; + } + + // check non_compound flag in suffix and prefix + if ((rv) && !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())))) { + rv = NULL; + } + + // check compoundend flag in suffix and prefix + if ((rv) && !checked_prefix && compoundend && !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { + rv = NULL; + } + + // check compoundmiddle flag in suffix and prefix + if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle && + !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())))) { + rv = NULL; + } + + // check forbiddenwords + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) { + return NULL; + } + + // increment word number, if the second root has a compoundroot flag + if ((rv) && compoundroot && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + + // first word is acceptable in compound words? + if (((rv) && + (checked_prefix || (words && words[wnum]) || + (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || + ((oldwordnum == 0) && compoundbegin && + TESTAFF(rv->astr, compoundbegin, rv->alen)) || + ((oldwordnum > 0) && compoundmiddle && + TESTAFF(rv->astr, compoundmiddle, rv->alen)) // || + // (numdefcpd && ) + + // LANG_hu section: spec. Hungarian rule + || ((langnum == LANG_hu) && hu_mov_rule && + (TESTAFF( + rv->astr, 'F', + rv->alen) || // XXX hardwired Hungarian dictionary codes + TESTAFF(rv->astr, 'G', rv->alen) || + TESTAFF(rv->astr, 'H', rv->alen))) + // END of LANG_hu section + ) && + ( + // test CHECKCOMPOUNDPATTERN conditions + scpd == 0 || checkcpdtable[scpd - 1].cond == FLAG_NULL || + TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)) && + !((checkcompoundtriple && scpd == 0 && + !words && // test triple letters + (word[i - 1] == word[i]) && + (((i > 1) && (word[i - 1] == word[i - 2])) || + ((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0' + )) || + (checkcompoundcase && scpd == 0 && !words && + cpdcase_check(word, i)))) + // LANG_hu section: spec. Hungarian rule + || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && + (rv = affix_check(st, i)) && + (sfx && sfx->getCont() && + ( // XXX hardwired Hungarian dic. codes + TESTAFF(sfx->getCont(), (unsigned short)'x', + sfx->getContLen()) || + TESTAFF( + sfx->getCont(), (unsigned short)'%', + sfx->getContLen()))))) { // first word is ok condition + + // LANG_hu section: spec. Hungarian rule + if (langnum == LANG_hu) { + // calculate syllable number of the word + numsyllable += get_syllable(st, i); + // + 1 word, if syllable number of the prefix > 1 (hungarian + // convention) + if (pfx && (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) + wordnum++; + } + // END of LANG_hu section + + // NEXT WORD(S) + rv_first = rv; + st[i] = ch; + + do { // striple loop + + // check simplifiedtriple + if (simplifiedtriple) { + if (striple) { + checkedstriple = 1; + i--; // check "fahrt" instead of "ahrt" in "Schiffahrt" + } else if (i > 2 && *(word + i - 1) == *(word + i - 2)) + striple = 1; + } + + rv = lookup((st + i)); // perhaps without prefix + + // search homonym with compound flag + while ((rv) && + ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || + !((compoundflag && !words && + TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundend && !words && + TESTAFF(rv->astr, compoundend, rv->alen)) || + (numdefcpd && words && + defcpd_check(&words, wnum + 1, rv, NULL, 1))) || + (scpd != 0 && checkcpdtable[scpd - 1].cond2 != FLAG_NULL && + !TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, + rv->alen)))) { + rv = rv->next_homonym; + } + + // check FORCEUCASE + if (rv && forceucase && (rv) && + (TESTAFF(rv->astr, forceucase, rv->alen)) && + !(info && *info & SPELL_ORIGCAP)) + rv = NULL; + + if (rv && words && words[wnum + 1]) + return rv_first; + + oldnumsyllable2 = numsyllable; + oldwordnum2 = wordnum; + + // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary + // code + if ((rv) && (langnum == LANG_hu) && + (TESTAFF(rv->astr, 'I', rv->alen)) && + !(TESTAFF(rv->astr, 'J', rv->alen))) { + numsyllable--; + } + // END of LANG_hu section + + // increment word number, if the second root has a compoundroot flag + if ((rv) && (compoundroot) && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + + // check forbiddenwords + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + (is_sug && nosuggest && + TESTAFF(rv->astr, nosuggest, rv->alen)))) + return NULL; + + // second word is acceptable, as a root? + // hungarian conventions: compounding is acceptable, + // when compound forms consist of 2 words, or if more, + // then the syllable number of root words must be 6, or lesser. + + if ((rv) && + ((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) && + (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || + ((cpdmaxsyllable != 0) && + (numsyllable + get_syllable(HENTRY_WORD(rv), rv->clen) <= + cpdmaxsyllable))) && + ( + // test CHECKCOMPOUNDPATTERN + !numcheckcpd || scpd != 0 || + !cpdpat_check(word, i, rv_first, rv, 0)) && + ((!checkcompounddup || (rv != rv_first))) + // test CHECKCOMPOUNDPATTERN conditions + && + (scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL || + TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen))) { + // forbid compound word, if it is a non compound word with typical + // fault + if (checkcompoundrep && cpdrep_check(word, len)) + return NULL; + return rv_first; + } + + numsyllable = oldnumsyllable2; + wordnum = oldwordnum2; + + // perhaps second word has prefix or/and suffix + sfx = NULL; + sfxflag = FLAG_NULL; + rv = (compoundflag && !onlycpdrule) + ? affix_check((word + i), strlen(word + i), compoundflag, + IN_CPD_END) + : NULL; + if (!rv && compoundend && !onlycpdrule) { + sfx = NULL; + pfx = NULL; + rv = affix_check((word + i), strlen(word + i), compoundend, + IN_CPD_END); + } + + if (!rv && numdefcpd && words) { + rv = affix_check((word + i), strlen(word + i), 0, IN_CPD_END); + if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1)) + return rv_first; + rv = NULL; + } + + // test CHECKCOMPOUNDPATTERN conditions (allowed forms) + if (rv && + !(scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL || + TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen))) + rv = NULL; + + // test CHECKCOMPOUNDPATTERN conditions (forbidden compounds) + if (rv && numcheckcpd && scpd == 0 && + cpdpat_check(word, i, rv_first, rv, affixed)) + rv = NULL; + + // check non_compound flag in suffix and prefix + if ((rv) && ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundforbidflag, + pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())))) { + rv = NULL; + } + + // check FORCEUCASE + if (rv && forceucase && (rv) && + (TESTAFF(rv->astr, forceucase, rv->alen)) && + !(info && *info & SPELL_ORIGCAP)) + rv = NULL; + + // check forbiddenwords + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + (is_sug && nosuggest && + TESTAFF(rv->astr, nosuggest, rv->alen)))) + return NULL; + + // pfxappnd = prefix of word+i, or NULL + // calculate syllable number of prefix. + // hungarian convention: when syllable number of prefix is more, + // than 1, the prefix+word counts as two words. + + if (langnum == LANG_hu) { + // calculate syllable number of the word + numsyllable += get_syllable(word + i, strlen(word + i)); + + // - affix syllable num. + // XXX only second suffix (inflections, not derivations) + if (sfxappnd) { + char* tmp = myrevstrdup(sfxappnd); + numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra; + free(tmp); + } + + // + 1 word, if syllable number of the prefix > 1 (hungarian + // convention) + if (pfx && + (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) + wordnum++; + + // increment syllable num, if last word has a SYLLABLENUM flag + // and the suffix is beginning `s' + + if (cpdsyllablenum) { + switch (sfxflag) { + case 'c': { + numsyllable += 2; + break; + } + case 'J': { + numsyllable += 1; + break; + } + case 'I': { + if (rv && TESTAFF(rv->astr, 'J', rv->alen)) + numsyllable += 1; + break; + } + } + } + } + + // increment word number, if the second word has a compoundroot flag + if ((rv) && (compoundroot) && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + + // second word is acceptable, as a word with prefix or/and suffix? + // hungarian conventions: compounding is acceptable, + // when compound forms consist 2 word, otherwise + // the syllable number of root words is 6, or lesser. + if ((rv) && + (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || + ((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) && + ((!checkcompounddup || (rv != rv_first)))) { + // forbid compound word, if it is a non compound word with typical + // fault + if (checkcompoundrep && cpdrep_check(word, len)) + return NULL; + return rv_first; + } + + numsyllable = oldnumsyllable2; + wordnum = oldwordnum2; + + // perhaps second word is a compound word (recursive call) + if (wordnum < maxwordnum) { + rv = compound_check((st + i), strlen(st + i), wordnum + 1, + numsyllable, maxwordnum, wnum + 1, words, 0, + is_sug, info); + + if (rv && numcheckcpd && + ((scpd == 0 && + cpdpat_check(word, i, rv_first, rv, affixed)) || + (scpd != 0 && + !cpdpat_check(word, i, rv_first, rv, affixed)))) + rv = NULL; + } else { + rv = NULL; + } + if (rv) { + // forbid compound word, if it is a non compound word with typical + // fault + if (checkcompoundrep || forbiddenword) { + struct hentry* rv2 = NULL; + + if (checkcompoundrep && cpdrep_check(word, len)) + return NULL; + + // check first part + if (strncmp(rv->word, word + i, rv->blen) == 0) { + char r = *(st + i + rv->blen); + *(st + i + rv->blen) = '\0'; + + if (checkcompoundrep && cpdrep_check(st, i + rv->blen)) { + *(st + i + rv->blen) = r; + continue; + } + + if (forbiddenword) { + rv2 = lookup(word); + if (!rv2) + rv2 = affix_check(word, len); + if (rv2 && rv2->astr && + TESTAFF(rv2->astr, forbiddenword, rv2->alen) && + (strncmp(rv2->word, st, i + rv->blen) == 0)) { + return NULL; + } + } + *(st + i + rv->blen) = r; + } + } + return rv_first; + } + } while (striple && !checkedstriple); // end of striple loop + + if (checkedstriple) { + i++; + checkedstriple = 0; + striple = 0; + } + + } // first word is ok condition + + if (soldi != 0) { + i = soldi; + soldi = 0; + len = oldlen; + cmin = oldcmin; + cmax = oldcmax; + } + scpd++; + + } while (!onlycpdrule && simplifiedcpd && + scpd <= numcheckcpd); // end of simplifiedcpd loop + + scpd = 0; + wordnum = oldwordnum; + numsyllable = oldnumsyllable; + + if (soldi != 0) { + i = soldi; + strcpy(st, word); // XXX add more optim. + soldi = 0; + } else + st[i] = ch; + + } while (numdefcpd && oldwordnum == 0 && + onlycpdrule++ < 1); // end of onlycpd loop + } + + return NULL; +} + +// check if compound word is correctly spelled +// hu_mov_rule = spec. Hungarian rule (XXX) +int AffixMgr::compound_check_morph(const char* word, + int len, + short wordnum, + short numsyllable, + short maxwordnum, + short wnum, + hentry** words, + char hu_mov_rule = 0, + char** result = NULL, + char* partresult = NULL) { + int i; + short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2; + int ok = 0; + + struct hentry* rv = NULL; + struct hentry* rv_first; + struct hentry* rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking + char st[MAXWORDUTF8LEN + 4]; + char ch; + + int checked_prefix; + char presult[MAXLNLEN]; + + int cmin; + int cmax; + + int onlycpdrule; + char affixed = 0; + hentry** oldwords = words; + + setcminmax(&cmin, &cmax, word, len); + + strcpy(st, word); + + for (i = cmin; i < cmax; i++) { + // go to end of the UTF-8 character + if (utf8) { + for (; (st[i] & 0xc0) == 0x80; i++) + ; + if (i >= cmax) + return 0; + } + + words = oldwords; + onlycpdrule = (words) ? 1 : 0; + + do { // onlycpdrule loop + + oldnumsyllable = numsyllable; + oldwordnum = wordnum; + checked_prefix = 0; + + ch = st[i]; + st[i] = '\0'; + sfx = NULL; + + // FIRST WORD + + affixed = 1; + + *presult = '\0'; + if (partresult) + mystrcat(presult, partresult, MAXLNLEN); + + rv = lookup(st); // perhaps without prefix + + // search homonym with compound flag + while ((rv) && !hu_mov_rule && + ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || + !((compoundflag && !words && !onlycpdrule && + TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundbegin && !wordnum && !onlycpdrule && + TESTAFF(rv->astr, compoundbegin, rv->alen)) || + (compoundmiddle && wordnum && !words && !onlycpdrule && + TESTAFF(rv->astr, compoundmiddle, rv->alen)) || + (numdefcpd && onlycpdrule && + ((!words && !wordnum && + defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0)) || + (words && + defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0))))))) { + rv = rv->next_homonym; + } + + if (rv) + affixed = 0; + + if (rv) { + sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_PART, st); + if (!HENTRY_FIND(rv, MORPH_STEM)) { + sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_STEM, + st); + } + // store the pointer of the hash entry + // sprintf(presult + strlen(presult), "%c%s%p", MSEP_FLD, + // MORPH_HENTRY, rv); + if (HENTRY_DATA(rv)) { + sprintf(presult + strlen(presult), "%c%s", MSEP_FLD, + HENTRY_DATA2(rv)); + } + } + + if (!rv) { + if (onlycpdrule && strlen(*result) > MAXLNLEN / 10) + break; + if (compoundflag && + !(rv = + prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundflag))) { + if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, + compoundflag, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && + !hu_mov_rule && sfx->getCont() && + ((compoundforbidflag && + TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())) || + (compoundend && + TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { + rv = NULL; + } + } + + if (rv || + (((wordnum == 0) && compoundbegin && + ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, + compoundbegin, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx( + st, i, 0, NULL, + compoundbegin))) || // twofold suffix+compound + (rv = prefix_check(st, i, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundbegin)))) || + ((wordnum > 0) && compoundmiddle && + ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, + compoundmiddle, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx( + st, i, 0, NULL, + compoundmiddle))) || // twofold suffix+compound + (rv = prefix_check(st, i, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundmiddle)))))) { + // char * p = prefix_check_morph(st, i, 0, compound); + char* p = NULL; + if (compoundflag) + p = affix_check_morph(st, i, compoundflag); + if (!p || (*p == '\0')) { + if (p) + free(p); + p = NULL; + if ((wordnum == 0) && compoundbegin) { + p = affix_check_morph(st, i, compoundbegin); + } else if ((wordnum > 0) && compoundmiddle) { + p = affix_check_morph(st, i, compoundmiddle); + } + } + if (p && (*p != '\0')) { + sprintf(presult + strlen(presult), "%c%s%s%s", MSEP_FLD, MORPH_PART, + st, line_uniq_app(&p, MSEP_REC)); + } + if (p) + free(p); + checked_prefix = 1; + } + // else check forbiddenwords + } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + TESTAFF(rv->astr, needaffix, rv->alen))) { + st[i] = ch; + continue; + } + + // check non_compound flag in suffix and prefix + if ((rv) && !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())))) { + continue; + } + + // check compoundend flag in suffix and prefix + if ((rv) && !checked_prefix && compoundend && !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { + continue; + } + + // check compoundmiddle flag in suffix and prefix + if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle && + !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())))) { + rv = NULL; + } + + // check forbiddenwords + if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) + continue; + + // increment word number, if the second root has a compoundroot flag + if ((rv) && (compoundroot) && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + + // first word is acceptable in compound words? + if (((rv) && + (checked_prefix || (words && words[wnum]) || + (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || + ((oldwordnum == 0) && compoundbegin && + TESTAFF(rv->astr, compoundbegin, rv->alen)) || + ((oldwordnum > 0) && compoundmiddle && + TESTAFF(rv->astr, compoundmiddle, rv->alen)) + // LANG_hu section: spec. Hungarian rule + || ((langnum == LANG_hu) && // hu_mov_rule + hu_mov_rule && (TESTAFF(rv->astr, 'F', rv->alen) || + TESTAFF(rv->astr, 'G', rv->alen) || + TESTAFF(rv->astr, 'H', rv->alen))) + // END of LANG_hu section + ) && + !((checkcompoundtriple && !words && // test triple letters + (word[i - 1] == word[i]) && + (((i > 1) && (word[i - 1] == word[i - 2])) || + ((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0' + )) || + ( + // test CHECKCOMPOUNDPATTERN + numcheckcpd && !words && + cpdpat_check(word, i, rv, NULL, affixed)) || + (checkcompoundcase && !words && cpdcase_check(word, i)))) + // LANG_hu section: spec. Hungarian rule + || + ((!rv) && (langnum == LANG_hu) && hu_mov_rule && + (rv = affix_check(st, i)) && + (sfx && sfx->getCont() && + (TESTAFF(sfx->getCont(), (unsigned short)'x', sfx->getContLen()) || + TESTAFF(sfx->getCont(), (unsigned short)'%', sfx->getContLen())))) + // END of LANG_hu section + ) { + // LANG_hu section: spec. Hungarian rule + if (langnum == LANG_hu) { + // calculate syllable number of the word + numsyllable += get_syllable(st, i); + + // + 1 word, if syllable number of the prefix > 1 (hungarian + // convention) + if (pfx && (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) + wordnum++; + } + // END of LANG_hu section + + // NEXT WORD(S) + rv_first = rv; + rv = lookup((word + i)); // perhaps without prefix + + // search homonym with compound flag + while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || + !((compoundflag && !words && + TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundend && !words && + TESTAFF(rv->astr, compoundend, rv->alen)) || + (numdefcpd && words && + defcpd_check(&words, wnum + 1, rv, NULL, 1))))) { + rv = rv->next_homonym; + } + + if (rv && words && words[wnum + 1]) { + mystrcat(*result, presult, MAXLNLEN); + mystrcat(*result, " ", MAXLNLEN); + mystrcat(*result, MORPH_PART, MAXLNLEN); + mystrcat(*result, word + i, MAXLNLEN); + if (complexprefixes && HENTRY_DATA(rv)) + mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN); + if (!HENTRY_FIND(rv, MORPH_STEM)) { + mystrcat(*result, " ", MAXLNLEN); + mystrcat(*result, MORPH_STEM, MAXLNLEN); + mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN); + } + // store the pointer of the hash entry + // sprintf(*result + strlen(*result), " %s%p", + // MORPH_HENTRY, rv); + if (!complexprefixes && HENTRY_DATA(rv)) { + mystrcat(*result, " ", MAXLNLEN); + mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN); + } + mystrcat(*result, "\n", MAXLNLEN); + return 0; + } + + oldnumsyllable2 = numsyllable; + oldwordnum2 = wordnum; + + // LANG_hu section: spec. Hungarian rule + if ((rv) && (langnum == LANG_hu) && + (TESTAFF(rv->astr, 'I', rv->alen)) && + !(TESTAFF(rv->astr, 'J', rv->alen))) { + numsyllable--; + } + // END of LANG_hu section + // increment word number, if the second root has a compoundroot flag + if ((rv) && (compoundroot) && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + + // check forbiddenwords + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) { + st[i] = ch; + continue; + } + + // second word is acceptable, as a root? + // hungarian conventions: compounding is acceptable, + // when compound forms consist of 2 words, or if more, + // then the syllable number of root words must be 6, or lesser. + if ((rv) && + ((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) && + (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || + ((cpdmaxsyllable != 0) && + (numsyllable + get_syllable(HENTRY_WORD(rv), rv->blen) <= + cpdmaxsyllable))) && + ((!checkcompounddup || (rv != rv_first)))) { + // bad compound word + mystrcat(*result, presult, MAXLNLEN); + mystrcat(*result, " ", MAXLNLEN); + mystrcat(*result, MORPH_PART, MAXLNLEN); + mystrcat(*result, word + i, MAXLNLEN); + + if (HENTRY_DATA(rv)) { + if (complexprefixes) + mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN); + if (!HENTRY_FIND(rv, MORPH_STEM)) { + mystrcat(*result, " ", MAXLNLEN); + mystrcat(*result, MORPH_STEM, MAXLNLEN); + mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN); + } + // store the pointer of the hash entry + // sprintf(*result + strlen(*result), " + // %s%p", MORPH_HENTRY, rv); + if (!complexprefixes) { + mystrcat(*result, " ", MAXLNLEN); + mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN); + } + } + mystrcat(*result, "\n", MAXLNLEN); + ok = 1; + } + + numsyllable = oldnumsyllable2; + wordnum = oldwordnum2; + + // perhaps second word has prefix or/and suffix + sfx = NULL; + sfxflag = FLAG_NULL; + + if (compoundflag && !onlycpdrule) + rv = affix_check((word + i), strlen(word + i), compoundflag); + else + rv = NULL; + + if (!rv && compoundend && !onlycpdrule) { + sfx = NULL; + pfx = NULL; + rv = affix_check((word + i), strlen(word + i), compoundend); + } + + if (!rv && numdefcpd && words) { + rv = affix_check((word + i), strlen(word + i), 0, IN_CPD_END); + if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) { + char* m = NULL; + if (compoundflag) + m = affix_check_morph((word + i), strlen(word + i), compoundflag); + if ((!m || *m == '\0') && compoundend) { + if (m) + free(m); + m = affix_check_morph((word + i), strlen(word + i), compoundend); + } + mystrcat(*result, presult, MAXLNLEN); + if (m || (*m != '\0')) { + char m2[MAXLNLEN]; + sprintf(m2, "%c%s%s%s", MSEP_FLD, MORPH_PART, word + i, + line_uniq_app(&m, MSEP_REC)); + mystrcat(*result, m2, MAXLNLEN); + } + if (m) + free(m); + mystrcat(*result, "\n", MAXLNLEN); + ok = 1; + } + } + + // check non_compound flag in suffix and prefix + if ((rv) && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())))) { + rv = NULL; + } + + // check forbiddenwords + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)) && + (!TESTAFF(rv->astr, needaffix, rv->alen))) { + st[i] = ch; + continue; + } + + if (langnum == LANG_hu) { + // calculate syllable number of the word + numsyllable += get_syllable(word + i, strlen(word + i)); + + // - affix syllable num. + // XXX only second suffix (inflections, not derivations) + if (sfxappnd) { + char* tmp = myrevstrdup(sfxappnd); + numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra; + free(tmp); + } + + // + 1 word, if syllable number of the prefix > 1 (hungarian + // convention) + if (pfx && (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) + wordnum++; + + // increment syllable num, if last word has a SYLLABLENUM flag + // and the suffix is beginning `s' + + if (cpdsyllablenum) { + switch (sfxflag) { + case 'c': { + numsyllable += 2; + break; + } + case 'J': { + numsyllable += 1; + break; + } + case 'I': { + if (rv && TESTAFF(rv->astr, 'J', rv->alen)) + numsyllable += 1; + break; + } + } + } + } + + // increment word number, if the second word has a compoundroot flag + if ((rv) && (compoundroot) && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + // second word is acceptable, as a word with prefix or/and suffix? + // hungarian conventions: compounding is acceptable, + // when compound forms consist 2 word, otherwise + // the syllable number of root words is 6, or lesser. + if ((rv) && + (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || + ((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) && + ((!checkcompounddup || (rv != rv_first)))) { + char* m = NULL; + if (compoundflag) + m = affix_check_morph((word + i), strlen(word + i), compoundflag); + if ((!m || *m == '\0') && compoundend) { + if (m) + free(m); + m = affix_check_morph((word + i), strlen(word + i), compoundend); + } + mystrcat(*result, presult, MAXLNLEN); + if (m && (*m != '\0')) { + char m2[MAXLNLEN]; + sprintf(m2, "%c%s%s%s", MSEP_FLD, MORPH_PART, word + i, + line_uniq_app(&m, MSEP_REC)); + mystrcat(*result, m2, MAXLNLEN); + } + if (m) + free(m); + if (strlen(*result) + 1 < MAXLNLEN) + sprintf(*result + strlen(*result), "%c", MSEP_REC); + ok = 1; + } + + numsyllable = oldnumsyllable2; + wordnum = oldwordnum2; + + // perhaps second word is a compound word (recursive call) + if ((wordnum < maxwordnum) && (ok == 0)) { + compound_check_morph((word + i), strlen(word + i), wordnum + 1, + numsyllable, maxwordnum, wnum + 1, words, 0, + result, presult); + } else { + rv = NULL; + } + } + st[i] = ch; + wordnum = oldwordnum; + numsyllable = oldnumsyllable; + + } while (numdefcpd && oldwordnum == 0 && + onlycpdrule++ < 1); // end of onlycpd loop + } + return 0; +} + +// return 1 if s1 (reversed) is a leading subset of end of s2 +/* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int + len) + { + while ((len > 0) && *s1 && (*s1 == *end_of_s2)) { + s1++; + end_of_s2--; + len--; + } + return (*s1 == '\0'); + } + */ + +inline int AffixMgr::isRevSubset(const char* s1, + const char* end_of_s2, + int len) { + while ((len > 0) && (*s1 != '\0') && ((*s1 == *end_of_s2) || (*s1 == '.'))) { + s1++; + end_of_s2--; + len--; + } + return (*s1 == '\0'); +} + +// check word for suffixes + +struct hentry* AffixMgr::suffix_check(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + char** wlst, + int maxSug, + int* ns, + const FLAG cclass, + const FLAG needflag, + char in_compound) { + struct hentry* rv = NULL; + PfxEntry* ep = ppfx; + + // first handle the special case of 0 length suffixes + SfxEntry* se = sStart[0]; + + while (se) { + if (!cclass || se->getCont()) { + // suffixes are not allowed in beginning of compounds + if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass + // except when signed with compoundpermitflag flag + (se->getCont() && compoundpermitflag && + TESTAFF(se->getCont(), compoundpermitflag, se->getContLen()))) && + (!circumfix || + // no circumfix flag in prefix and suffix + ((!ppfx || !(ep->getCont()) || + !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (!se->getCont() || + !(TESTAFF(se->getCont(), circumfix, se->getContLen())))) || + // circumfix flag in prefix AND suffix + ((ppfx && (ep->getCont()) && + TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (se->getCont() && + (TESTAFF(se->getCont(), circumfix, se->getContLen()))))) && + // fogemorpheme + (in_compound || + !(se->getCont() && + (TESTAFF(se->getCont(), onlyincompound, se->getContLen())))) && + // needaffix on prefix or first suffix + (cclass || + !(se->getCont() && + TESTAFF(se->getCont(), needaffix, se->getContLen())) || + (ppfx && + !((ep->getCont()) && + TESTAFF(ep->getCont(), needaffix, ep->getContLen()))))) { + rv = se->checkword(word, len, sfxopts, ppfx, wlst, maxSug, ns, + (FLAG)cclass, needflag, + (in_compound ? 0 : onlyincompound)); + if (rv) { + sfx = se; // BUG: sfx not stateless + return rv; + } + } + } + se = se->getNext(); + } + + // now handle the general case + if (len == 0) + return NULL; // FULLSTRIP + unsigned char sp = *((const unsigned char*)(word + len - 1)); + SfxEntry* sptr = sStart[sp]; + + while (sptr) { + if (isRevSubset(sptr->getKey(), word + len - 1, len)) { + // suffixes are not allowed in beginning of compounds + if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass + // except when signed with compoundpermitflag flag + (sptr->getCont() && compoundpermitflag && + TESTAFF(sptr->getCont(), compoundpermitflag, + sptr->getContLen()))) && + (!circumfix || + // no circumfix flag in prefix and suffix + ((!ppfx || !(ep->getCont()) || + !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (!sptr->getCont() || + !(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())))) || + // circumfix flag in prefix AND suffix + ((ppfx && (ep->getCont()) && + TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (sptr->getCont() && + (TESTAFF(sptr->getCont(), circumfix, sptr->getContLen()))))) && + // fogemorpheme + (in_compound || + !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, + sptr->getContLen()))))) && + // needaffix on prefix or first suffix + (cclass || + !(sptr->getCont() && + TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) || + (ppfx && + !((ep->getCont()) && + TESTAFF(ep->getCont(), needaffix, ep->getContLen()))))) + if (in_compound != IN_CPD_END || ppfx || + !(sptr->getCont() && + TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))) { + rv = sptr->checkword(word, len, sfxopts, ppfx, wlst, maxSug, ns, + cclass, needflag, + (in_compound ? 0 : onlyincompound)); + if (rv) { + sfx = sptr; // BUG: sfx not stateless + sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless + if (!sptr->getCont()) + sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless + // LANG_hu section: spec. Hungarian rule + else if (langnum == LANG_hu && sptr->getKeyLen() && + sptr->getKey()[0] == 'i' && sptr->getKey()[1] != 'y' && + sptr->getKey()[1] != 't') { + sfxextra = 1; + } + // END of LANG_hu section + return rv; + } + } + sptr = sptr->getNextEQ(); + } else { + sptr = sptr->getNextNE(); + } + } + + return NULL; +} + +// check word for two-level suffixes + +struct hentry* AffixMgr::suffix_check_twosfx(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + const FLAG needflag) { + struct hentry* rv = NULL; + + // first handle the special case of 0 length suffixes + SfxEntry* se = sStart[0]; + while (se) { + if (contclasses[se->getFlag()]) { + rv = se->check_twosfx(word, len, sfxopts, ppfx, needflag); + if (rv) + return rv; + } + se = se->getNext(); + } + + // now handle the general case + if (len == 0) + return NULL; // FULLSTRIP + unsigned char sp = *((const unsigned char*)(word + len - 1)); + SfxEntry* sptr = sStart[sp]; + + while (sptr) { + if (isRevSubset(sptr->getKey(), word + len - 1, len)) { + if (contclasses[sptr->getFlag()]) { + rv = sptr->check_twosfx(word, len, sfxopts, ppfx, needflag); + if (rv) { + sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless + if (!sptr->getCont()) + sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless + return rv; + } + } + sptr = sptr->getNextEQ(); + } else { + sptr = sptr->getNextNE(); + } + } + + return NULL; +} + +char* AffixMgr::suffix_check_twosfx_morph(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + const FLAG needflag) { + std::string result; + std::string result2; + std::string result3; + + char* st; + + // first handle the special case of 0 length suffixes + SfxEntry* se = sStart[0]; + while (se) { + if (contclasses[se->getFlag()]) { + st = se->check_twosfx_morph(word, len, sfxopts, ppfx, needflag); + if (st) { + if (ppfx) { + if (ppfx->getMorph()) { + result.append(ppfx->getMorph()); + result.append(" "); + } else + debugflag(result, ppfx->getFlag()); + } + result.append(st); + free(st); + if (se->getMorph()) { + result.append(" "); + result.append(se->getMorph()); + } else + debugflag(result, se->getFlag()); + result.append("\n"); + } + } + se = se->getNext(); + } + + // now handle the general case + if (len == 0) + return NULL; // FULLSTRIP + unsigned char sp = *((const unsigned char*)(word + len - 1)); + SfxEntry* sptr = sStart[sp]; + + while (sptr) { + if (isRevSubset(sptr->getKey(), word + len - 1, len)) { + if (contclasses[sptr->getFlag()]) { + st = sptr->check_twosfx_morph(word, len, sfxopts, ppfx, needflag); + if (st) { + sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless + if (!sptr->getCont()) + sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless + result2.assign(st); + free(st); + + result3.clear(); + + if (sptr->getMorph()) { + result3.append(" "); + result3.append(sptr->getMorph()); + } else + debugflag(result3, sptr->getFlag()); + strlinecat(result2, result3); + result2.append("\n"); + result.append(result2); + } + } + sptr = sptr->getNextEQ(); + } else { + sptr = sptr->getNextNE(); + } + } + + if (!result.empty()) + return mystrdup(result.c_str()); + + return NULL; +} + +char* AffixMgr::suffix_check_morph(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + const FLAG cclass, + const FLAG needflag, + char in_compound) { + char result[MAXLNLEN]; + + struct hentry* rv = NULL; + + result[0] = '\0'; + + PfxEntry* ep = ppfx; + + // first handle the special case of 0 length suffixes + SfxEntry* se = sStart[0]; + while (se) { + if (!cclass || se->getCont()) { + // suffixes are not allowed in beginning of compounds + if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass + // except when signed with compoundpermitflag flag + (se->getCont() && compoundpermitflag && + TESTAFF(se->getCont(), compoundpermitflag, se->getContLen()))) && + (!circumfix || + // no circumfix flag in prefix and suffix + ((!ppfx || !(ep->getCont()) || + !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (!se->getCont() || + !(TESTAFF(se->getCont(), circumfix, se->getContLen())))) || + // circumfix flag in prefix AND suffix + ((ppfx && (ep->getCont()) && + TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (se->getCont() && + (TESTAFF(se->getCont(), circumfix, se->getContLen()))))) && + // fogemorpheme + (in_compound || + !((se->getCont() && + (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) && + // needaffix on prefix or first suffix + (cclass || + !(se->getCont() && + TESTAFF(se->getCont(), needaffix, se->getContLen())) || + (ppfx && + !((ep->getCont()) && + TESTAFF(ep->getCont(), needaffix, ep->getContLen())))))) + rv = se->checkword(word, len, sfxopts, ppfx, NULL, 0, 0, cclass, + needflag); + while (rv) { + if (ppfx) { + if (ppfx->getMorph()) { + mystrcat(result, ppfx->getMorph(), MAXLNLEN); + mystrcat(result, " ", MAXLNLEN); + } else + debugflag(result, ppfx->getFlag()); + } + if (complexprefixes && HENTRY_DATA(rv)) + mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); + if (!HENTRY_FIND(rv, MORPH_STEM)) { + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, MORPH_STEM, MAXLNLEN); + mystrcat(result, HENTRY_WORD(rv), MAXLNLEN); + } + // store the pointer of the hash entry + // sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, + // rv); + + if (!complexprefixes && HENTRY_DATA(rv)) { + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); + } + if (se->getMorph()) { + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, se->getMorph(), MAXLNLEN); + } else + debugflag(result, se->getFlag()); + mystrcat(result, "\n", MAXLNLEN); + rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); + } + } + se = se->getNext(); + } + + // now handle the general case + if (len == 0) + return NULL; // FULLSTRIP + unsigned char sp = *((const unsigned char*)(word + len - 1)); + SfxEntry* sptr = sStart[sp]; + + while (sptr) { + if (isRevSubset(sptr->getKey(), word + len - 1, len)) { + // suffixes are not allowed in beginning of compounds + if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass + // except when signed with compoundpermitflag flag + (sptr->getCont() && compoundpermitflag && + TESTAFF(sptr->getCont(), compoundpermitflag, + sptr->getContLen()))) && + (!circumfix || + // no circumfix flag in prefix and suffix + ((!ppfx || !(ep->getCont()) || + !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (!sptr->getCont() || + !(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())))) || + // circumfix flag in prefix AND suffix + ((ppfx && (ep->getCont()) && + TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (sptr->getCont() && + (TESTAFF(sptr->getCont(), circumfix, sptr->getContLen()))))) && + // fogemorpheme + (in_compound || + !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, + sptr->getContLen()))))) && + // needaffix on first suffix + (cclass || + !(sptr->getCont() && + TESTAFF(sptr->getCont(), needaffix, sptr->getContLen()))))) + rv = sptr->checkword(word, len, sfxopts, ppfx, NULL, 0, 0, cclass, + needflag); + while (rv) { + if (ppfx) { + if (ppfx->getMorph()) { + mystrcat(result, ppfx->getMorph(), MAXLNLEN); + mystrcat(result, " ", MAXLNLEN); + } else + debugflag(result, ppfx->getFlag()); + } + if (complexprefixes && HENTRY_DATA(rv)) + mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); + if (!HENTRY_FIND(rv, MORPH_STEM)) { + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, MORPH_STEM, MAXLNLEN); + mystrcat(result, HENTRY_WORD(rv), MAXLNLEN); + } + // store the pointer of the hash entry + // sprintf(result + strlen(result), " %s%p", + // MORPH_HENTRY, rv); + + if (!complexprefixes && HENTRY_DATA(rv)) { + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); + } + + if (sptr->getMorph()) { + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, sptr->getMorph(), MAXLNLEN); + } else + debugflag(result, sptr->getFlag()); + mystrcat(result, "\n", MAXLNLEN); + rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); + } + sptr = sptr->getNextEQ(); + } else { + sptr = sptr->getNextNE(); + } + } + + if (*result) + return mystrdup(result); + return NULL; +} + +// check if word with affixes is correctly spelled +struct hentry* AffixMgr::affix_check(const char* word, + int len, + const FLAG needflag, + char in_compound) { + struct hentry* rv = NULL; + + // check all prefixes (also crossed with suffixes if allowed) + rv = prefix_check(word, len, in_compound, needflag); + if (rv) + return rv; + + // if still not found check all suffixes + rv = suffix_check(word, len, 0, NULL, NULL, 0, NULL, FLAG_NULL, needflag, + in_compound); + + if (havecontclass) { + sfx = NULL; + pfx = NULL; + + if (rv) + return rv; + // if still not found check all two-level suffixes + rv = suffix_check_twosfx(word, len, 0, NULL, needflag); + + if (rv) + return rv; + // if still not found check all two-level suffixes + rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag); + } + + return rv; +} + +// check if word with affixes is correctly spelled +char* AffixMgr::affix_check_morph(const char* word, + int len, + const FLAG needflag, + char in_compound) { + char result[MAXLNLEN]; + char* st = NULL; + + *result = '\0'; + + // check all prefixes (also crossed with suffixes if allowed) + st = prefix_check_morph(word, len, in_compound); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + + // if still not found check all suffixes + st = suffix_check_morph(word, len, 0, NULL, '\0', needflag, in_compound); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + + if (havecontclass) { + sfx = NULL; + pfx = NULL; + // if still not found check all two-level suffixes + st = suffix_check_twosfx_morph(word, len, 0, NULL, needflag); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + + // if still not found check all two-level suffixes + st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT, needflag); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + } + + return mystrdup(result); +} + +char* AffixMgr::morphgen(const char* ts, + int wl, + const unsigned short* ap, + unsigned short al, + const char* morph, + const char* targetmorph, + int level) { + // handle suffixes + if (!morph) + return NULL; + + // check substandard flag + if (TESTAFF(ap, substandard, al)) + return NULL; + + if (morphcmp(morph, targetmorph) == 0) + return mystrdup(ts); + + size_t stemmorphcatpos; + std::string mymorph; + + // use input suffix fields, if exist + if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) { + mymorph.assign(morph); + mymorph.append(" "); + stemmorphcatpos = mymorph.size(); + } else { + stemmorphcatpos = std::string::npos; + } + + for (int i = 0; i < al; i++) { + const unsigned char c = (unsigned char)(ap[i] & 0x00FF); + SfxEntry* sptr = sFlag[c]; + while (sptr) { + if (sptr->getFlag() == ap[i] && sptr->getMorph() && + ((sptr->getContLen() == 0) || + // don't generate forms with substandard affixes + !TESTAFF(sptr->getCont(), substandard, sptr->getContLen()))) { + const char* stemmorph; + if (stemmorphcatpos != std::string::npos) { + mymorph.replace(stemmorphcatpos, std::string::npos, sptr->getMorph()); + stemmorph = mymorph.c_str(); + } else { + stemmorph = sptr->getMorph(); + } + + int cmp = morphcmp(stemmorph, targetmorph); + + if (cmp == 0) { + char* newword = sptr->add(ts, wl); + if (newword) { + hentry* check = pHMgr->lookup(newword); // XXX extra dic + if (!check || !check->astr || + !(TESTAFF(check->astr, forbiddenword, check->alen) || + TESTAFF(check->astr, ONLYUPCASEFLAG, check->alen))) { + return newword; + } + free(newword); + } + } + + // recursive call for secondary suffixes + if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) && + // (get_sfxcount(stemmorph) < targetcount) && + !TESTAFF(sptr->getCont(), substandard, sptr->getContLen())) { + char* newword = sptr->add(ts, wl); + if (newword) { + char* newword2 = + morphgen(newword, strlen(newword), sptr->getCont(), + sptr->getContLen(), stemmorph, targetmorph, 1); + + if (newword2) { + free(newword); + return newword2; + } + free(newword); + newword = NULL; + } + } + } + sptr = sptr->getFlgNxt(); + } + } + return NULL; +} + +int AffixMgr::expand_rootword(struct guessword* wlst, + int maxn, + const char* ts, + int wl, + const unsigned short* ap, + unsigned short al, + const char* bad, + int badl, + char* phon) { + int nh = 0; + // first add root word to list + if ((nh < maxn) && + !(al && ((needaffix && TESTAFF(ap, needaffix, al)) || + (onlyincompound && TESTAFF(ap, onlyincompound, al))))) { + wlst[nh].word = mystrdup(ts); + if (!wlst[nh].word) + return 0; + wlst[nh].allow = (1 == 0); + wlst[nh].orig = NULL; + nh++; + // add special phonetic version + if (phon && (nh < maxn)) { + wlst[nh].word = mystrdup(phon); + if (!wlst[nh].word) + return nh - 1; + wlst[nh].allow = (1 == 0); + wlst[nh].orig = mystrdup(ts); + if (!wlst[nh].orig) + return nh - 1; + nh++; + } + } + + // handle suffixes + for (int i = 0; i < al; i++) { + const unsigned char c = (unsigned char)(ap[i] & 0x00FF); + SfxEntry* sptr = sFlag[c]; + while (sptr) { + if ((sptr->getFlag() == ap[i]) && + (!sptr->getKeyLen() || + ((badl > sptr->getKeyLen()) && + (strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) && + // check needaffix flag + !(sptr->getCont() && + ((needaffix && + TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) || + (circumfix && + TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) || + (onlyincompound && + TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) { + char* newword = sptr->add(ts, wl); + if (newword) { + if (nh < maxn) { + wlst[nh].word = newword; + wlst[nh].allow = sptr->allowCross(); + wlst[nh].orig = NULL; + nh++; + // add special phonetic version + if (phon && (nh < maxn)) { + std::string prefix(phon); + std::string key(sptr->getKey()); + reverseword(key); + prefix.append(key); + wlst[nh].word = mystrdup(prefix.c_str()); + if (!wlst[nh].word) + return nh - 1; + wlst[nh].allow = (1 == 0); + wlst[nh].orig = mystrdup(newword); + if (!wlst[nh].orig) + return nh - 1; + nh++; + } + } else { + free(newword); + } + } + } + sptr = sptr->getFlgNxt(); + } + } + + int n = nh; + + // handle cross products of prefixes and suffixes + for (int j = 1; j < n; j++) + if (wlst[j].allow) { + for (int k = 0; k < al; k++) { + const unsigned char c = (unsigned char)(ap[k] & 0x00FF); + PfxEntry* cptr = pFlag[c]; + while (cptr) { + if ((cptr->getFlag() == ap[k]) && cptr->allowCross() && + (!cptr->getKeyLen() || + ((badl > cptr->getKeyLen()) && + (strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) { + int l1 = strlen(wlst[j].word); + char* newword = cptr->add(wlst[j].word, l1); + if (newword) { + if (nh < maxn) { + wlst[nh].word = newword; + wlst[nh].allow = cptr->allowCross(); + wlst[nh].orig = NULL; + nh++; + } else { + free(newword); + } + } + } + cptr = cptr->getFlgNxt(); + } + } + } + + // now handle pure prefixes + for (int m = 0; m < al; m++) { + const unsigned char c = (unsigned char)(ap[m] & 0x00FF); + PfxEntry* ptr = pFlag[c]; + while (ptr) { + if ((ptr->getFlag() == ap[m]) && + (!ptr->getKeyLen() || + ((badl > ptr->getKeyLen()) && + (strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) && + // check needaffix flag + !(ptr->getCont() && + ((needaffix && + TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())) || + (circumfix && + TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) || + (onlyincompound && + TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen()))))) { + char* newword = ptr->add(ts, wl); + if (newword) { + if (nh < maxn) { + wlst[nh].word = newword; + wlst[nh].allow = ptr->allowCross(); + wlst[nh].orig = NULL; + nh++; + } else { + free(newword); + } + } + } + ptr = ptr->getFlgNxt(); + } + } + + return nh; +} + +// return length of replacing table +int AffixMgr::get_numrep() const { + return numrep; +} + +// return replacing table +struct replentry* AffixMgr::get_reptable() const { + if (!reptable) + return NULL; + return reptable; +} + +// return iconv table +RepList* AffixMgr::get_iconvtable() const { + if (!iconvtable) + return NULL; + return iconvtable; +} + +// return oconv table +RepList* AffixMgr::get_oconvtable() const { + if (!oconvtable) + return NULL; + return oconvtable; +} + +// return replacing table +struct phonetable* AffixMgr::get_phonetable() const { + if (!phone) + return NULL; + return phone; +} + +// return length of character map table +int AffixMgr::get_nummap() const { + return nummap; +} + +// return character map table +struct mapentry* AffixMgr::get_maptable() const { + if (!maptable) + return NULL; + return maptable; +} + +// return length of word break table +int AffixMgr::get_numbreak() const { + return numbreak; +} + +// return character map table +char** AffixMgr::get_breaktable() const { + if (!breaktable) + return NULL; + return breaktable; +} + +// return text encoding of dictionary +char* AffixMgr::get_encoding() { + if (!encoding) + encoding = mystrdup(SPELL_ENCODING); + return mystrdup(encoding); +} + +// return text encoding of dictionary +int AffixMgr::get_langnum() const { + return langnum; +} + +// return double prefix option +int AffixMgr::get_complexprefixes() const { + return complexprefixes; +} + +// return FULLSTRIP option +int AffixMgr::get_fullstrip() const { + return fullstrip; +} + +FLAG AffixMgr::get_keepcase() const { + return keepcase; +} + +FLAG AffixMgr::get_forceucase() const { + return forceucase; +} + +FLAG AffixMgr::get_warn() const { + return warn; +} + +int AffixMgr::get_forbidwarn() const { + return forbidwarn; +} + +int AffixMgr::get_checksharps() const { + return checksharps; +} + +char* AffixMgr::encode_flag(unsigned short aflag) const { + return pHMgr->encode_flag(aflag); +} + +// return the preferred ignore string for suggestions +char* AffixMgr::get_ignore() const { + if (!ignorechars) + return NULL; + return ignorechars; +} + +// return the preferred ignore string for suggestions +unsigned short* AffixMgr::get_ignore_utf16(int* len) const { + *len = ignorechars_utf16_len; + return ignorechars_utf16; +} + +// return the keyboard string for suggestions +char* AffixMgr::get_key_string() { + if (!keystring) + keystring = mystrdup(SPELL_KEYSTRING); + return mystrdup(keystring); +} + +// return the preferred try string for suggestions +char* AffixMgr::get_try_string() const { + if (!trystring) + return NULL; + return mystrdup(trystring); +} + +// return the preferred try string for suggestions +const char* AffixMgr::get_wordchars() const { + return wordchars; +} + +unsigned short* AffixMgr::get_wordchars_utf16(int* len) const { + *len = wordchars_utf16_len; + return wordchars_utf16; +} + +// is there compounding? +int AffixMgr::get_compound() const { + return compoundflag || compoundbegin || numdefcpd; +} + +// return the compound words control flag +FLAG AffixMgr::get_compoundflag() const { + return compoundflag; +} + +// return the forbidden words control flag +FLAG AffixMgr::get_forbiddenword() const { + return forbiddenword; +} + +// return the forbidden words control flag +FLAG AffixMgr::get_nosuggest() const { + return nosuggest; +} + +// return the forbidden words control flag +FLAG AffixMgr::get_nongramsuggest() const { + return nongramsuggest; +} + +// return the forbidden words flag modify flag +FLAG AffixMgr::get_needaffix() const { + return needaffix; +} + +// return the onlyincompound flag +FLAG AffixMgr::get_onlyincompound() const { + return onlyincompound; +} + +// return the compound word signal flag +FLAG AffixMgr::get_compoundroot() const { + return compoundroot; +} + +// return the compound begin signal flag +FLAG AffixMgr::get_compoundbegin() const { + return compoundbegin; +} + +// return the value of checknum +int AffixMgr::get_checknum() const { + return checknum; +} + +// return the value of prefix +const char* AffixMgr::get_prefix() const { + if (pfx) + return pfx->getKey(); + return NULL; +} + +// return the value of suffix +const char* AffixMgr::get_suffix() const { + return sfxappnd; +} + +// return the value of suffix +const char* AffixMgr::get_version() const { + return version; +} + +// return lemma_present flag +FLAG AffixMgr::get_lemma_present() const { + return lemma_present; +} + +// utility method to look up root words in hash table +struct hentry* AffixMgr::lookup(const char* word) { + int i; + struct hentry* he = NULL; + for (i = 0; i < *maxdic && !he; i++) { + he = (alldic[i])->lookup(word); + } + return he; +} + +// return the value of suffix +int AffixMgr::have_contclass() const { + return havecontclass; +} + +// return utf8 +int AffixMgr::get_utf8() const { + return utf8; +} + +int AffixMgr::get_maxngramsugs(void) const { + return maxngramsugs; +} + +int AffixMgr::get_maxcpdsugs(void) const { + return maxcpdsugs; +} + +int AffixMgr::get_maxdiff(void) const { + return maxdiff; +} + +int AffixMgr::get_onlymaxdiff(void) const { + return onlymaxdiff; +} + +// return nosplitsugs +int AffixMgr::get_nosplitsugs(void) const { + return nosplitsugs; +} + +// return sugswithdots +int AffixMgr::get_sugswithdots(void) const { + return sugswithdots; +} + +/* parse flag */ +int AffixMgr::parse_flag(char* line, unsigned short* out, FileMgr* af) { + char* s = NULL; + if (*out != FLAG_NULL && !(*out >= DEFAULTFLAGS)) { + HUNSPELL_WARNING( + stderr, + "error: line %d: multiple definitions of an affix file parameter\n", + af->getlinenum()); + return 1; + } + if (parse_string(line, &s, af->getlinenum())) + return 1; + *out = pHMgr->decode_flag(s); + free(s); + return 0; +} + +/* parse num */ +int AffixMgr::parse_num(char* line, int* out, FileMgr* af) { + char* s = NULL; + if (*out != -1) { + HUNSPELL_WARNING( + stderr, + "error: line %d: multiple definitions of an affix file parameter\n", + af->getlinenum()); + return 1; + } + if (parse_string(line, &s, af->getlinenum())) + return 1; + *out = atoi(s); + free(s); + return 0; +} + +/* parse in the max syllablecount of compound words and */ +int AffixMgr::parse_cpdsyllable(char* line, FileMgr* af) { + char* tp = line; + char* piece; + int i = 0; + int np = 0; + w_char w[MAXWORDLEN]; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + cpdmaxsyllable = atoi(piece); + np++; + break; + } + case 2: { + if (!utf8) { + cpdvowels = mystrdup(piece); + } else { + int n = u8_u16(w, MAXWORDLEN, piece); + if (n > 0) { + flag_qsort((unsigned short*)w, 0, n); + cpdvowels_utf16 = (w_char*)malloc(n * sizeof(w_char)); + if (!cpdvowels_utf16) + return 1; + memcpy(cpdvowels_utf16, w, n * sizeof(w_char)); + } + cpdvowels_utf16_len = n; + } + np++; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (np < 2) { + HUNSPELL_WARNING(stderr, + "error: line %d: missing compoundsyllable information\n", + af->getlinenum()); + return 1; + } + if (np == 2) + cpdvowels = mystrdup("aeiouAEIOU"); + return 0; +} + +/* parse in the typical fault correcting table */ +int AffixMgr::parse_reptable(char* line, FileMgr* af) { + if (numrep != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return 1; + } + char* tp = line; + char* piece; + int i = 0; + int np = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numrep = atoi(piece); + if (numrep < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", + af->getlinenum()); + return 1; + } + reptable = (replentry*)malloc(numrep * sizeof(struct replentry)); + if (!reptable) + return 1; + np++; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return 1; + } + + /* now parse the numrep lines to read in the remainder of the table */ + char* nl; + for (int j = 0; j < numrep; j++) { + if ((nl = af->getline()) == NULL) + return 1; + mychomp(nl); + tp = nl; + i = 0; + reptable[j].pattern = NULL; + reptable[j].pattern2 = NULL; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + if (strncmp(piece, "REP", 3) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + numrep = 0; + return 1; + } + break; + } + case 1: { + if (*piece == '^') + reptable[j].start = true; + else + reptable[j].start = false; + reptable[j].pattern = + mystrrep(mystrdup(piece + int(reptable[j].start)), "_", " "); + int lr = strlen(reptable[j].pattern) - 1; + if (reptable[j].pattern[lr] == '$') { + reptable[j].end = true; + reptable[j].pattern[lr] = '\0'; + } else + reptable[j].end = false; + break; + } + case 2: { + reptable[j].pattern2 = mystrrep(mystrdup(piece), "_", " "); + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + numrep = 0; + return 1; + } + } + return 0; +} + +/* parse in the typical fault correcting table */ +int AffixMgr::parse_convtable(char* line, + FileMgr* af, + RepList** rl, + const char* keyword) { + if (*rl) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return 1; + } + char* tp = line; + char* piece; + int i = 0; + int np = 0; + int numrl = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numrl = atoi(piece); + if (numrl < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", + af->getlinenum()); + return 1; + } + *rl = new RepList(numrl); + if (!*rl) + return 1; + np++; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return 1; + } + + /* now parse the num lines to read in the remainder of the table */ + char* nl; + for (int j = 0; j < numrl; j++) { + if (!(nl = af->getline())) + return 1; + mychomp(nl); + tp = nl; + i = 0; + char* pattern = NULL; + char* pattern2 = NULL; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + if (strncmp(piece, keyword, strlen(keyword)) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + delete *rl; + *rl = NULL; + return 1; + } + break; + } + case 1: { + pattern = mystrrep(mystrdup(piece), "_", " "); + break; + } + case 2: { + pattern2 = mystrrep(mystrdup(piece), "_", " "); + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (!pattern || !pattern2) { + if (pattern) + free(pattern); + if (pattern2) + free(pattern2); + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return 1; + } + (*rl)->add(pattern, pattern2); + } + return 0; +} + +/* parse in the typical fault correcting table */ +int AffixMgr::parse_phonetable(char* line, FileMgr* af) { + if (phone) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return 1; + } + char* tp = line; + char* piece; + int i = 0; + int np = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + phone = (phonetable*)malloc(sizeof(struct phonetable)); + if (!phone) + return 1; + phone->num = atoi(piece); + phone->rules = NULL; + phone->utf8 = (char)utf8; + if (phone->num < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return 1; + } + phone->rules = (char**)malloc(2 * (phone->num + 1) * sizeof(char*)); + if (!phone->rules) { + free(phone); + phone = NULL; + return 1; + } + np++; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return 1; + } + + /* now parse the phone->num lines to read in the remainder of the table */ + char* nl; + for (int j = 0; j < phone->num; j++) { + if (!(nl = af->getline())) + return 1; + mychomp(nl); + tp = nl; + i = 0; + phone->rules[j * 2] = NULL; + phone->rules[j * 2 + 1] = NULL; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + if (strncmp(piece, "PHONE", 5) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + phone->num = 0; + return 1; + } + break; + } + case 1: { + phone->rules[j * 2] = mystrrep(mystrdup(piece), "_", ""); + break; + } + case 2: { + phone->rules[j * 2 + 1] = mystrrep(mystrdup(piece), "_", ""); + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if ((!(phone->rules[j * 2])) || (!(phone->rules[j * 2 + 1]))) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + phone->num = 0; + return 1; + } + } + phone->rules[phone->num * 2] = mystrdup(""); + phone->rules[phone->num * 2 + 1] = mystrdup(""); + init_phonet_hash(*phone); + return 0; +} + +/* parse in the checkcompoundpattern table */ +int AffixMgr::parse_checkcpdtable(char* line, FileMgr* af) { + if (numcheckcpd != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return 1; + } + char* tp = line; + char* piece; + int i = 0; + int np = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numcheckcpd = atoi(piece); + if (numcheckcpd < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return 1; + } + checkcpdtable = + (patentry*)malloc(numcheckcpd * sizeof(struct patentry)); + if (!checkcpdtable) + return 1; + np++; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return 1; + } + + /* now parse the numcheckcpd lines to read in the remainder of the table */ + char* nl; + for (int j = 0; j < numcheckcpd; j++) { + if (!(nl = af->getline())) + return 1; + mychomp(nl); + tp = nl; + i = 0; + checkcpdtable[j].pattern = NULL; + checkcpdtable[j].pattern2 = NULL; + checkcpdtable[j].pattern3 = NULL; + checkcpdtable[j].cond = FLAG_NULL; + checkcpdtable[j].cond2 = FLAG_NULL; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + if (strncmp(piece, "CHECKCOMPOUNDPATTERN", 20) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + numcheckcpd = 0; + return 1; + } + break; + } + case 1: { + checkcpdtable[j].pattern = mystrdup(piece); + char* p = strchr(checkcpdtable[j].pattern, '/'); + if (p) { + *p = '\0'; + checkcpdtable[j].cond = pHMgr->decode_flag(p + 1); + } + break; + } + case 2: { + checkcpdtable[j].pattern2 = mystrdup(piece); + char* p = strchr(checkcpdtable[j].pattern2, '/'); + if (p) { + *p = '\0'; + checkcpdtable[j].cond2 = pHMgr->decode_flag(p + 1); + } + break; + } + case 3: { + checkcpdtable[j].pattern3 = mystrdup(piece); + simplifiedcpd = 1; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if ((!(checkcpdtable[j].pattern)) || (!(checkcpdtable[j].pattern2))) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + numcheckcpd = 0; + return 1; + } + } + return 0; +} + +/* parse in the compound rule table */ +int AffixMgr::parse_defcpdtable(char* line, FileMgr* af) { + if (numdefcpd != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return 1; + } + char* tp = line; + char* piece; + int i = 0; + int np = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numdefcpd = atoi(piece); + if (numdefcpd < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return 1; + } + defcpdtable = (flagentry*)malloc(numdefcpd * sizeof(flagentry)); + if (!defcpdtable) + return 1; + np++; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return 1; + } + + /* now parse the numdefcpd lines to read in the remainder of the table */ + char* nl; + for (int j = 0; j < numdefcpd; j++) { + if (!(nl = af->getline())) + return 1; + mychomp(nl); + tp = nl; + i = 0; + defcpdtable[j].def = NULL; + defcpdtable[j].len = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + if (strncmp(piece, "COMPOUNDRULE", 12) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + numdefcpd = 0; + return 1; + } + break; + } + case 1: { // handle parenthesized flags + if (strchr(piece, '(')) { + defcpdtable[j].def = (FLAG*)malloc(strlen(piece) * sizeof(FLAG)); + defcpdtable[j].len = 0; + int end = 0; + FLAG* conv; + while (!end) { + char* par = piece + 1; + while (*par != '(' && *par != ')' && *par != '\0') + par++; + if (*par == '\0') + end = 1; + else + *par = '\0'; + if (*piece == '(') + piece++; + if (*piece == '*' || *piece == '?') { + defcpdtable[j].def[defcpdtable[j].len++] = (FLAG)*piece; + } else if (*piece != '\0') { + int l = pHMgr->decode_flags(&conv, piece, af); + for (int k = 0; k < l; k++) + defcpdtable[j].def[defcpdtable[j].len++] = conv[k]; + free(conv); + } + piece = par + 1; + } + } else { + defcpdtable[j].len = + pHMgr->decode_flags(&(defcpdtable[j].def), piece, af); + } + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (!defcpdtable[j].len) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + numdefcpd = 0; + return 1; + } + } + return 0; +} + +/* parse in the character map table */ +int AffixMgr::parse_maptable(char* line, FileMgr* af) { + if (nummap != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return 1; + } + char* tp = line; + char* piece; + int i = 0; + int np = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + nummap = atoi(piece); + if (nummap < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return 1; + } + maptable = (mapentry*)malloc(nummap * sizeof(struct mapentry)); + if (!maptable) + return 1; + np++; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return 1; + } + + /* now parse the nummap lines to read in the remainder of the table */ + char* nl; + for (int j = 0; j < nummap; j++) { + if (!(nl = af->getline())) + return 1; + mychomp(nl); + tp = nl; + i = 0; + maptable[j].set = NULL; + maptable[j].len = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + if (strncmp(piece, "MAP", 3) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + nummap = 0; + return 1; + } + break; + } + case 1: { + int setn = 0; + maptable[j].len = strlen(piece); + maptable[j].set = (char**)malloc(maptable[j].len * sizeof(char*)); + if (!maptable[j].set) + return 1; + for (int k = 0; k < maptable[j].len; k++) { + int chl = 1; + int chb = k; + if (piece[k] == '(') { + char* parpos = strchr(piece + k, ')'); + if (parpos != NULL) { + chb = k + 1; + chl = (int)(parpos - piece) - k - 1; + k = k + chl + 1; + } + } else { + if (utf8 && (piece[k] & 0xc0) == 0xc0) { + for (k++; utf8 && (piece[k] & 0xc0) == 0x80; k++) + ; + chl = k - chb; + k--; + } + } + maptable[j].set[setn] = (char*)malloc(chl + 1); + if (!maptable[j].set[setn]) + return 1; + strncpy(maptable[j].set[setn], piece + chb, chl); + maptable[j].set[setn][chl] = '\0'; + setn++; + } + maptable[j].len = setn; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (!maptable[j].set || !maptable[j].len) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + nummap = 0; + return 1; + } + } + return 0; +} + +/* parse in the word breakpoint table */ +int AffixMgr::parse_breaktable(char* line, FileMgr* af) { + if (numbreak > -1) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return 1; + } + char* tp = line; + char* piece; + int i = 0; + int np = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numbreak = atoi(piece); + if (numbreak < 0) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return 1; + } + if (numbreak == 0) + return 0; + breaktable = (char**)malloc(numbreak * sizeof(char*)); + if (!breaktable) + return 1; + np++; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return 1; + } + + /* now parse the numbreak lines to read in the remainder of the table */ + char* nl; + for (int j = 0; j < numbreak; j++) { + if (!(nl = af->getline())) + return 1; + mychomp(nl); + tp = nl; + i = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + if (strncmp(piece, "BREAK", 5) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + numbreak = 0; + return 1; + } + break; + } + case 1: { + breaktable[j] = mystrdup(piece); + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (!breaktable) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + numbreak = 0; + return 1; + } + } + return 0; +} + +void AffixMgr::reverse_condition(char* piece) { + int neg = 0; + for (char* k = piece + strlen(piece) - 1; k >= piece; k--) { + switch (*k) { + case '[': { + if (neg) + *(k + 1) = '['; + else + *k = ']'; + break; + } + case ']': { + *k = '['; + if (neg) + *(k + 1) = '^'; + neg = 0; + break; + } + case '^': { + if (*(k + 1) == ']') + neg = 1; + else + *(k + 1) = *k; + break; + } + default: { + if (neg) + *(k + 1) = *k; + } + } + } +} + +int AffixMgr::parse_affix(char* line, + const char at, + FileMgr* af, + char* dupflags) { + int numents = 0; // number of affentry structures to parse + + unsigned short aflag = 0; // affix char identifier + + char ff = 0; + std::vector affentries; + + char* tp = line; + char* nl = line; + char* piece; + int i = 0; + +// checking lines with bad syntax +#ifdef DEBUG + int basefieldnum = 0; +#endif + + // split affix header line into pieces + + int np = 0; + + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + // piece 1 - is type of affix + case 0: { + np++; + break; + } + + // piece 2 - is affix char + case 1: { + np++; + aflag = pHMgr->decode_flag(piece); + if (((at == 'S') && (dupflags[aflag] & dupSFX)) || + ((at == 'P') && (dupflags[aflag] & dupPFX))) { + HUNSPELL_WARNING( + stderr, + "error: line %d: multiple definitions of an affix flag\n", + af->getlinenum()); + // return 1; XXX permissive mode for bad dictionaries + } + dupflags[aflag] += (char)((at == 'S') ? dupSFX : dupPFX); + break; + } + // piece 3 - is cross product indicator + case 2: { + np++; + if (*piece == 'Y') + ff = aeXPRODUCT; + break; + } + + // piece 4 - is number of affentries + case 3: { + np++; + numents = atoi(piece); + if ((numents <= 0) || ((::std::numeric_limits::max() / + sizeof(struct affentry)) < numents)) { + char* err = pHMgr->encode_flag(aflag); + if (err) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + free(err); + } + return 1; + } + affentries.resize(numents); + affentries[0].opts = ff; + if (utf8) + affentries[0].opts += aeUTF8; + if (pHMgr->is_aliasf()) + affentries[0].opts += aeALIASF; + if (pHMgr->is_aliasm()) + affentries[0].opts += aeALIASM; + affentries[0].aflag = aflag; + } + + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + // check to make sure we parsed enough pieces + if (np != 4) { + char* err = pHMgr->encode_flag(aflag); + if (err) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + free(err); + } + return 1; + } + + // now parse numents affentries for this affix + std::vector::iterator start = affentries.begin(); + std::vector::iterator end = affentries.end(); + for (std::vector::iterator entry = start; entry != end; ++entry) { + if ((nl = af->getline()) == NULL) + return 1; + mychomp(nl); + tp = nl; + i = 0; + np = 0; + + // split line into pieces + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + // piece 1 - is type + case 0: { + np++; + if (entry != start) + entry->opts = start->opts & + (char)(aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM); + break; + } + + // piece 2 - is affix char + case 1: { + np++; + if (pHMgr->decode_flag(piece) != aflag) { + char* err = pHMgr->encode_flag(aflag); + if (err) { + HUNSPELL_WARNING(stderr, + "error: line %d: affix %s is corrupt\n", + af->getlinenum(), err); + free(err); + } + return 1; + } + + if (entry != start) + entry->aflag = start->aflag; + break; + } + + // piece 3 - is string to strip or 0 for null + case 2: { + np++; + if (complexprefixes) { + if (utf8) + reverseword_utf(piece); + else + reverseword(piece); + } + entry->strip = mystrdup(piece); + entry->stripl = (unsigned char)strlen(entry->strip); + if (strcmp(entry->strip, "0") == 0) { + free(entry->strip); + entry->strip = mystrdup(""); + entry->stripl = 0; + } + break; + } + + // piece 4 - is affix string or 0 for null + case 3: { + char* dash; + entry->morphcode = NULL; + entry->contclass = NULL; + entry->contclasslen = 0; + np++; + dash = strchr(piece, '/'); + if (dash) { + *dash = '\0'; + + if (ignorechars) { + if (utf8) { + remove_ignored_chars_utf(piece, ignorechars_utf16, + ignorechars_utf16_len); + } else { + remove_ignored_chars(piece, ignorechars); + } + } + + if (complexprefixes) { + if (utf8) + reverseword_utf(piece); + else + reverseword(piece); + } + entry->appnd = mystrdup(piece); + + if (pHMgr->is_aliasf()) { + int index = atoi(dash + 1); + entry->contclasslen = (unsigned short)pHMgr->get_aliasf( + index, &(entry->contclass), af); + if (!entry->contclasslen) + HUNSPELL_WARNING(stderr, + "error: bad affix flag alias: \"%s\"\n", + dash + 1); + } else { + entry->contclasslen = (unsigned short)pHMgr->decode_flags( + &(entry->contclass), dash + 1, af); + flag_qsort(entry->contclass, 0, entry->contclasslen); + } + *dash = '/'; + + havecontclass = 1; + for (unsigned short _i = 0; _i < entry->contclasslen; _i++) { + contclasses[(entry->contclass)[_i]] = 1; + } + } else { + if (ignorechars) { + if (utf8) { + remove_ignored_chars_utf(piece, ignorechars_utf16, + ignorechars_utf16_len); + } else { + remove_ignored_chars(piece, ignorechars); + } + } + + if (complexprefixes) { + if (utf8) + reverseword_utf(piece); + else + reverseword(piece); + } + entry->appnd = mystrdup(piece); + } + + entry->appndl = (unsigned char)strlen(entry->appnd); + if (strcmp(entry->appnd, "0") == 0) { + free(entry->appnd); + entry->appnd = mystrdup(""); + entry->appndl = 0; + } + break; + } + + // piece 5 - is the conditions descriptions + case 4: { + np++; + if (complexprefixes) { + if (utf8) + reverseword_utf(piece); + else + reverseword(piece); + reverse_condition(piece); + } + if (entry->stripl && (strcmp(piece, ".") != 0) && + redundant_condition(at, entry->strip, entry->stripl, piece, + af->getlinenum())) + strcpy(piece, "."); + if (at == 'S') { + reverseword(piece); + reverse_condition(piece); + } + if (encodeit(*entry, piece)) + return 1; + break; + } + + case 5: { + np++; + if (pHMgr->is_aliasm()) { + int index = atoi(piece); + entry->morphcode = pHMgr->get_aliasm(index); + } else { + if (complexprefixes) { // XXX - fix me for morph. gen. + if (utf8) + reverseword_utf(piece); + else + reverseword(piece); + } + // add the remaining of the line + if (*tp) { + *(tp - 1) = ' '; + tp = tp + strlen(tp); + } + entry->morphcode = mystrdup(piece); + if (!entry->morphcode) + return 1; + } + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + // check to make sure we parsed enough pieces + if (np < 4) { + char* err = pHMgr->encode_flag(aflag); + if (err) { + HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n", + af->getlinenum(), err); + free(err); + } + return 1; + } + +#ifdef DEBUG + // detect unnecessary fields, excepting comments + if (basefieldnum) { + int fieldnum = + !(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6); + if (fieldnum != basefieldnum) + HUNSPELL_WARNING(stderr, "warning: line %d: bad field number\n", + af->getlinenum()); + } else { + basefieldnum = + !(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6); + } +#endif + } + + // now create SfxEntry or PfxEntry objects and use links to + // build an ordered (sorted by affix string) list + for (std::vector::iterator entry = start; entry != end; ++entry) { + if (at == 'P') { + PfxEntry* pfxptr = new PfxEntry(this, &(*entry)); + build_pfxtree(pfxptr); + } else { + SfxEntry* sfxptr = new SfxEntry(this, &(*entry)); + build_sfxtree(sfxptr); + } + } + return 0; +} + +int AffixMgr::redundant_condition(char ft, + char* strip, + int stripl, + const char* cond, + int linenum) { + int condl = strlen(cond); + int i; + int j; + int neg; + int in; + if (ft == 'P') { // prefix + if (strncmp(strip, cond, condl) == 0) + return 1; + if (utf8) { + } else { + for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) { + if (cond[j] != '[') { + if (cond[j] != strip[i]) { + HUNSPELL_WARNING(stderr, + "warning: line %d: incompatible stripping " + "characters and condition\n", + linenum); + return 0; + } + } else { + neg = (cond[j + 1] == '^') ? 1 : 0; + in = 0; + do { + j++; + if (strip[i] == cond[j]) + in = 1; + } while ((j < (condl - 1)) && (cond[j] != ']')); + if (j == (condl - 1) && (cond[j] != ']')) { + HUNSPELL_WARNING(stderr, + "error: line %d: missing ] in condition:\n%s\n", + linenum, cond); + return 0; + } + if ((!neg && !in) || (neg && in)) { + HUNSPELL_WARNING(stderr, + "warning: line %d: incompatible stripping " + "characters and condition\n", + linenum); + return 0; + } + } + } + if (j >= condl) + return 1; + } + } else { // suffix + if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0) + return 1; + if (utf8) { + } else { + for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) { + if (cond[j] != ']') { + if (cond[j] != strip[i]) { + HUNSPELL_WARNING(stderr, + "warning: line %d: incompatible stripping " + "characters and condition\n", + linenum); + return 0; + } + } else { + in = 0; + do { + j--; + if (strip[i] == cond[j]) + in = 1; + } while ((j > 0) && (cond[j] != '[')); + if ((j == 0) && (cond[j] != '[')) { + HUNSPELL_WARNING(stderr, + "error: line: %d: missing ] in condition:\n%s\n", + linenum, cond); + return 0; + } + neg = (cond[j + 1] == '^') ? 1 : 0; + if ((!neg && !in) || (neg && in)) { + HUNSPELL_WARNING(stderr, + "warning: line %d: incompatible stripping " + "characters and condition\n", + linenum); + return 0; + } + } + } + if (j < 0) + return 1; + } + } + return 0; +} + +int AffixMgr::get_suffix_words(short unsigned* suff, + int len, + const char* root_word, + char** slst) { + int suff_words_cnt = 0; + short unsigned* start_ptr = suff; + for (int j = 0; j < SETSIZE; j++) { + SfxEntry* ptr = sStart[j]; + while (ptr) { + suff = start_ptr; + for (int i = 0; i < len; i++) { + if ((*suff) == ptr->getFlag()) { + std::string nw(root_word); + nw.append(ptr->getAffix()); + hentry* ht = ptr->checkword(nw.c_str(), nw.size(), 0, NULL, NULL, 0, + NULL, 0, 0, 0); + if (ht) { + slst[suff_words_cnt] = (char*)malloc(MAXWORDUTF8LEN * sizeof(char)); + if (slst[suff_words_cnt]) { + strcpy(slst[suff_words_cnt], nw.c_str()); + suff_words_cnt++; + } + } + } + suff++; + } + ptr = ptr->getNext(); + } + } + return suff_words_cnt; +} diff --git a/libs/hunspell/src/affixmgr.cxx b/libs/hunspell/src/affixmgr.cxx deleted file mode 100644 index d21ff49573..0000000000 --- a/libs/hunspell/src/affixmgr.cxx +++ /dev/null @@ -1,5128 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -/* - * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada - * And Contributors. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. All modifications to the source code must be clearly marked as - * such. Binary redistributions based on modified source code - * must be clearly marked as modified versions in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include - -#include - -#include - -#include "affixmgr.hxx" -#include "affentry.hxx" -#include "langnum.hxx" - -#include "csutil.hxx" - -AffixMgr::AffixMgr(const char* affpath, - HashMgr** ptr, - int* md, - const char* key) { - // register hash manager and load affix data from aff file - pHMgr = ptr[0]; - alldic = ptr; - maxdic = md; - keystring = NULL; - trystring = NULL; - encoding = NULL; - csconv = NULL; - utf8 = 0; - complexprefixes = 0; - maptable = NULL; - nummap = 0; - breaktable = NULL; - numbreak = -1; - reptable = NULL; - numrep = 0; - iconvtable = NULL; - oconvtable = NULL; - checkcpdtable = NULL; - // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN) - simplifiedcpd = 0; - numcheckcpd = 0; - defcpdtable = NULL; - numdefcpd = 0; - phone = NULL; - compoundflag = FLAG_NULL; // permits word in compound forms - compoundbegin = FLAG_NULL; // may be first word in compound forms - compoundmiddle = FLAG_NULL; // may be middle word in compound forms - compoundend = FLAG_NULL; // may be last word in compound forms - compoundroot = FLAG_NULL; // compound word signing flag - compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word - compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word - compoundmoresuffixes = 0; // allow more suffixes within compound words - checkcompounddup = 0; // forbid double words in compounds - checkcompoundrep = 0; // forbid bad compounds (may be non compound word with - // a REP substitution) - checkcompoundcase = - 0; // forbid upper and lowercase combinations at word bounds - checkcompoundtriple = 0; // forbid compounds with triple letters - simplifiedtriple = 0; // allow simplified triple letters in compounds - // (Schiff+fahrt -> Schiffahrt) - forbiddenword = FORBIDDENWORD; // forbidden word signing flag - nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag - nongramsuggest = FLAG_NULL; - lang = NULL; // language - langnum = 0; // language code (see http://l10n.openoffice.org/languages.html) - needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes - cpdwordmax = -1; // default: unlimited wordcount in compound words - cpdmin = -1; // undefined - cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words - cpdvowels = NULL; // vowels (for calculating of Hungarian compounding limit, - // O(n) search! XXX) - cpdvowels_utf16 = - NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search) - cpdvowels_utf16_len = 0; // vowels - pfxappnd = NULL; // previous prefix for counting syllables of the prefix BUG - sfxappnd = NULL; // previous suffix for counting syllables of the suffix BUG - sfxextra = 0; // modifier for syllable count of sfxappnd BUG - cpdsyllablenum = NULL; // syllable count incrementing flag - checknum = 0; // checking numbers, and word with numbers - wordchars = NULL; // letters + spec. word characters - wordchars_utf16 = NULL; // letters + spec. word characters - wordchars_utf16_len = 0; // letters + spec. word characters - ignorechars = NULL; // letters + spec. word characters - ignorechars_utf16 = NULL; // letters + spec. word characters - ignorechars_utf16_len = 0; // letters + spec. word characters - version = NULL; // affix and dictionary file version string - havecontclass = 0; // flags of possible continuing classes (double affix) - // LEMMA_PRESENT: not put root into the morphological output. Lemma presents - // in morhological description in dictionary file. It's often combined with - // PSEUDOROOT. - lemma_present = FLAG_NULL; - circumfix = FLAG_NULL; - onlyincompound = FLAG_NULL; - maxngramsugs = -1; // undefined - maxdiff = -1; // undefined - onlymaxdiff = 0; - maxcpdsugs = -1; // undefined - nosplitsugs = 0; - sugswithdots = 0; - keepcase = 0; - forceucase = 0; - warn = 0; - forbidwarn = 0; - checksharps = 0; - substandard = FLAG_NULL; - fullstrip = 0; - - sfx = NULL; - pfx = NULL; - - for (int i = 0; i < SETSIZE; i++) { - pStart[i] = NULL; - sStart[i] = NULL; - pFlag[i] = NULL; - sFlag[i] = NULL; - } - - for (int j = 0; j < CONTSIZE; j++) { - contclasses[j] = 0; - } - - if (parse_file(affpath, key)) { - HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n", affpath); - } - - if (cpdmin == -1) - cpdmin = MINCPDLEN; -} - -AffixMgr::~AffixMgr() { - // pass through linked prefix entries and clean up - for (int i = 0; i < SETSIZE; i++) { - pFlag[i] = NULL; - PfxEntry* ptr = pStart[i]; - PfxEntry* nptr = NULL; - while (ptr) { - nptr = ptr->getNext(); - delete (ptr); - ptr = nptr; - nptr = NULL; - } - } - - // pass through linked suffix entries and clean up - for (int j = 0; j < SETSIZE; j++) { - sFlag[j] = NULL; - SfxEntry* ptr = sStart[j]; - SfxEntry* nptr = NULL; - while (ptr) { - nptr = ptr->getNext(); - delete (ptr); - ptr = nptr; - nptr = NULL; - } - sStart[j] = NULL; - } - - if (keystring) - free(keystring); - keystring = NULL; - if (trystring) - free(trystring); - trystring = NULL; - if (encoding) - free(encoding); - encoding = NULL; - if (maptable) { - for (int j = 0; j < nummap; j++) { - for (int k = 0; k < maptable[j].len; k++) { - if (maptable[j].set[k]) - free(maptable[j].set[k]); - } - free(maptable[j].set); - maptable[j].set = NULL; - maptable[j].len = 0; - } - free(maptable); - maptable = NULL; - } - nummap = 0; - if (breaktable) { - for (int j = 0; j < numbreak; j++) { - if (breaktable[j]) - free(breaktable[j]); - breaktable[j] = NULL; - } - free(breaktable); - breaktable = NULL; - } - numbreak = 0; - if (reptable) { - for (int j = 0; j < numrep; j++) { - free(reptable[j].pattern); - free(reptable[j].pattern2); - } - free(reptable); - reptable = NULL; - } - if (iconvtable) - delete iconvtable; - if (oconvtable) - delete oconvtable; - if (phone && phone->rules) { - for (int j = 0; j < phone->num + 1; j++) { - free(phone->rules[j * 2]); - free(phone->rules[j * 2 + 1]); - } - free(phone->rules); - free(phone); - phone = NULL; - } - - if (defcpdtable) { - for (int j = 0; j < numdefcpd; j++) { - free(defcpdtable[j].def); - defcpdtable[j].def = NULL; - } - free(defcpdtable); - defcpdtable = NULL; - } - numrep = 0; - if (checkcpdtable) { - for (int j = 0; j < numcheckcpd; j++) { - free(checkcpdtable[j].pattern); - free(checkcpdtable[j].pattern2); - free(checkcpdtable[j].pattern3); - checkcpdtable[j].pattern = NULL; - checkcpdtable[j].pattern2 = NULL; - checkcpdtable[j].pattern3 = NULL; - } - free(checkcpdtable); - checkcpdtable = NULL; - } - numcheckcpd = 0; - FREE_FLAG(compoundflag); - FREE_FLAG(compoundbegin); - FREE_FLAG(compoundmiddle); - FREE_FLAG(compoundend); - FREE_FLAG(compoundpermitflag); - FREE_FLAG(compoundforbidflag); - FREE_FLAG(compoundroot); - FREE_FLAG(forbiddenword); - FREE_FLAG(nosuggest); - FREE_FLAG(nongramsuggest); - FREE_FLAG(needaffix); - FREE_FLAG(lemma_present); - FREE_FLAG(circumfix); - FREE_FLAG(onlyincompound); - - cpdwordmax = 0; - pHMgr = NULL; - cpdmin = 0; - cpdmaxsyllable = 0; - if (cpdvowels) - free(cpdvowels); - if (cpdvowels_utf16) - free(cpdvowels_utf16); - if (cpdsyllablenum) - free(cpdsyllablenum); - free_utf_tbl(); - if (lang) - free(lang); - if (wordchars) - free(wordchars); - if (wordchars_utf16) - free(wordchars_utf16); - if (ignorechars) - free(ignorechars); - if (ignorechars_utf16) - free(ignorechars_utf16); - if (version) - free(version); - checknum = 0; -#ifdef MOZILLA_CLIENT - delete[] csconv; -#endif -} - -void AffixMgr::finishFileMgr(FileMgr* afflst) { - delete afflst; - - // convert affix trees to sorted list - process_pfx_tree_to_list(); - process_sfx_tree_to_list(); -} - -// read in aff file and build up prefix and suffix entry objects -int AffixMgr::parse_file(const char* affpath, const char* key) { - char* line; // io buffers - char ft; // affix type - - // checking flag duplication - char dupflags[CONTSIZE]; - char dupflags_ini = 1; - - // first line indicator for removing byte order mark - int firstline = 1; - - // open the affix file - FileMgr* afflst = new FileMgr(affpath, key); - if (!afflst) { - HUNSPELL_WARNING( - stderr, "error: could not open affix description file %s\n", affpath); - return 1; - } - - // step one is to parse the affix file building up the internal - // affix data structures - - // read in each line ignoring any that do not - // start with a known line type indicator - while ((line = afflst->getline()) != NULL) { - mychomp(line); - - /* remove byte order mark */ - if (firstline) { - firstline = 0; - // Affix file begins with byte order mark: possible incompatibility with - // old Hunspell versions - if (strncmp(line, "\xEF\xBB\xBF", 3) == 0) { - memmove(line, line + 3, strlen(line + 3) + 1); - } - } - - /* parse in the keyboard string */ - if (strncmp(line, "KEY", 3) == 0) { - if (parse_string(line, &keystring, afflst->getlinenum())) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the try string */ - if (strncmp(line, "TRY", 3) == 0) { - if (parse_string(line, &trystring, afflst->getlinenum())) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the name of the character set used by the .dict and .aff */ - if (strncmp(line, "SET", 3) == 0) { - if (parse_string(line, &encoding, afflst->getlinenum())) { - finishFileMgr(afflst); - return 1; - } - if (strcmp(encoding, "UTF-8") == 0) { - utf8 = 1; -#ifndef OPENOFFICEORG -#ifndef MOZILLA_CLIENT - if (initialize_utf_tbl()) { - finishFileMgr(afflst); - return 1; - } -#endif -#endif - } - } - - /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left - * writing system */ - if (strncmp(line, "COMPLEXPREFIXES", 15) == 0) - complexprefixes = 1; - - /* parse in the flag used by the controlled compound words */ - if (strncmp(line, "COMPOUNDFLAG", 12) == 0) { - if (parse_flag(line, &compoundflag, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by compound words */ - if (strncmp(line, "COMPOUNDBEGIN", 13) == 0) { - if (complexprefixes) { - if (parse_flag(line, &compoundend, afflst)) { - finishFileMgr(afflst); - return 1; - } - } else { - if (parse_flag(line, &compoundbegin, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - } - - /* parse in the flag used by compound words */ - if (strncmp(line, "COMPOUNDMIDDLE", 14) == 0) { - if (parse_flag(line, &compoundmiddle, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - /* parse in the flag used by compound words */ - if (strncmp(line, "COMPOUNDEND", 11) == 0) { - if (complexprefixes) { - if (parse_flag(line, &compoundbegin, afflst)) { - finishFileMgr(afflst); - return 1; - } - } else { - if (parse_flag(line, &compoundend, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - } - - /* parse in the data used by compound_check() method */ - if (strncmp(line, "COMPOUNDWORDMAX", 15) == 0) { - if (parse_num(line, &cpdwordmax, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag sign compounds in dictionary */ - if (strncmp(line, "COMPOUNDROOT", 12) == 0) { - if (parse_flag(line, &compoundroot, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by compound_check() method */ - if (strncmp(line, "COMPOUNDPERMITFLAG", 18) == 0) { - if (parse_flag(line, &compoundpermitflag, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by compound_check() method */ - if (strncmp(line, "COMPOUNDFORBIDFLAG", 18) == 0) { - if (parse_flag(line, &compoundforbidflag, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - if (strncmp(line, "COMPOUNDMORESUFFIXES", 20) == 0) { - compoundmoresuffixes = 1; - } - - if (strncmp(line, "CHECKCOMPOUNDDUP", 16) == 0) { - checkcompounddup = 1; - } - - if (strncmp(line, "CHECKCOMPOUNDREP", 16) == 0) { - checkcompoundrep = 1; - } - - if (strncmp(line, "CHECKCOMPOUNDTRIPLE", 19) == 0) { - checkcompoundtriple = 1; - } - - if (strncmp(line, "SIMPLIFIEDTRIPLE", 16) == 0) { - simplifiedtriple = 1; - } - - if (strncmp(line, "CHECKCOMPOUNDCASE", 17) == 0) { - checkcompoundcase = 1; - } - - if (strncmp(line, "NOSUGGEST", 9) == 0) { - if (parse_flag(line, &nosuggest, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - if (strncmp(line, "NONGRAMSUGGEST", 14) == 0) { - if (parse_flag(line, &nongramsuggest, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by forbidden words */ - if (strncmp(line, "FORBIDDENWORD", 13) == 0) { - if (parse_flag(line, &forbiddenword, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by forbidden words */ - if (strncmp(line, "LEMMA_PRESENT", 13) == 0) { - if (parse_flag(line, &lemma_present, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by circumfixes */ - if (strncmp(line, "CIRCUMFIX", 9) == 0) { - if (parse_flag(line, &circumfix, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by fogemorphemes */ - if (strncmp(line, "ONLYINCOMPOUND", 14) == 0) { - if (parse_flag(line, &onlyincompound, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by `needaffixs' */ - if (strncmp(line, "PSEUDOROOT", 10) == 0) { - if (parse_flag(line, &needaffix, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by `needaffixs' */ - if (strncmp(line, "NEEDAFFIX", 9) == 0) { - if (parse_flag(line, &needaffix, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the minimal length for words in compounds */ - if (strncmp(line, "COMPOUNDMIN", 11) == 0) { - if (parse_num(line, &cpdmin, afflst)) { - finishFileMgr(afflst); - return 1; - } - if (cpdmin < 1) - cpdmin = 1; - } - - /* parse in the max. words and syllables in compounds */ - if (strncmp(line, "COMPOUNDSYLLABLE", 16) == 0) { - if (parse_cpdsyllable(line, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by compound_check() method */ - if (strncmp(line, "SYLLABLENUM", 11) == 0) { - if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by the controlled compound words */ - if (strncmp(line, "CHECKNUM", 8) == 0) { - checknum = 1; - } - - /* parse in the extra word characters */ - if (strncmp(line, "WORDCHARS", 9) == 0) { - if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, - utf8, afflst->getlinenum())) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the ignored characters (for example, Arabic optional diacretics - * charachters */ - if (strncmp(line, "IGNORE", 6) == 0) { - if (parse_array(line, &ignorechars, &ignorechars_utf16, - &ignorechars_utf16_len, utf8, afflst->getlinenum())) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the typical fault correcting table */ - if (strncmp(line, "REP", 3) == 0) { - if (parse_reptable(line, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the input conversion table */ - if (strncmp(line, "ICONV", 5) == 0) { - if (parse_convtable(line, afflst, &iconvtable, "ICONV")) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the input conversion table */ - if (strncmp(line, "OCONV", 5) == 0) { - if (parse_convtable(line, afflst, &oconvtable, "OCONV")) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the phonetic translation table */ - if (strncmp(line, "PHONE", 5) == 0) { - if (parse_phonetable(line, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the checkcompoundpattern table */ - if (strncmp(line, "CHECKCOMPOUNDPATTERN", 20) == 0) { - if (parse_checkcpdtable(line, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the defcompound table */ - if (strncmp(line, "COMPOUNDRULE", 12) == 0) { - if (parse_defcpdtable(line, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the related character map table */ - if (strncmp(line, "MAP", 3) == 0) { - if (parse_maptable(line, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the word breakpoints table */ - if (strncmp(line, "BREAK", 5) == 0) { - if (parse_breaktable(line, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the language for language specific codes */ - if (strncmp(line, "LANG", 4) == 0) { - if (parse_string(line, &lang, afflst->getlinenum())) { - finishFileMgr(afflst); - return 1; - } - langnum = get_lang_num(lang); - } - - if (strncmp(line, "VERSION", 7) == 0) { - for (line = line + 7; *line == ' ' || *line == '\t'; line++) - ; - version = mystrdup(line); - } - - if (strncmp(line, "MAXNGRAMSUGS", 12) == 0) { - if (parse_num(line, &maxngramsugs, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - if (strncmp(line, "ONLYMAXDIFF", 11) == 0) - onlymaxdiff = 1; - - if (strncmp(line, "MAXDIFF", 7) == 0) { - if (parse_num(line, &maxdiff, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - if (strncmp(line, "MAXCPDSUGS", 10) == 0) { - if (parse_num(line, &maxcpdsugs, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - if (strncmp(line, "NOSPLITSUGS", 11) == 0) { - nosplitsugs = 1; - } - - if (strncmp(line, "FULLSTRIP", 9) == 0) { - fullstrip = 1; - } - - if (strncmp(line, "SUGSWITHDOTS", 12) == 0) { - sugswithdots = 1; - } - - /* parse in the flag used by forbidden words */ - if (strncmp(line, "KEEPCASE", 8) == 0) { - if (parse_flag(line, &keepcase, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by `forceucase' */ - if (strncmp(line, "FORCEUCASE", 10) == 0) { - if (parse_flag(line, &forceucase, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - /* parse in the flag used by `warn' */ - if (strncmp(line, "WARN", 4) == 0) { - if (parse_flag(line, &warn, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - if (strncmp(line, "FORBIDWARN", 10) == 0) { - forbidwarn = 1; - } - - /* parse in the flag used by the affix generator */ - if (strncmp(line, "SUBSTANDARD", 11) == 0) { - if (parse_flag(line, &substandard, afflst)) { - finishFileMgr(afflst); - return 1; - } - } - - if (strncmp(line, "CHECKSHARPS", 11) == 0) { - checksharps = 1; - } - - /* parse this affix: P - prefix, S - suffix */ - ft = ' '; - if (strncmp(line, "PFX", 3) == 0) - ft = complexprefixes ? 'S' : 'P'; - if (strncmp(line, "SFX", 3) == 0) - ft = complexprefixes ? 'P' : 'S'; - if (ft != ' ') { - if (dupflags_ini) { - memset(dupflags, 0, sizeof(dupflags)); - dupflags_ini = 0; - } - if (parse_affix(line, ft, afflst, dupflags)) { - finishFileMgr(afflst); - return 1; - } - } - } - - finishFileMgr(afflst); - // affix trees are sorted now - - // now we can speed up performance greatly taking advantage of the - // relationship between the affixes and the idea of "subsets". - - // View each prefix as a potential leading subset of another and view - // each suffix (reversed) as a potential trailing subset of another. - - // To illustrate this relationship if we know the prefix "ab" is found in the - // word to examine, only prefixes that "ab" is a leading subset of need be - // examined. - // Furthermore is "ab" is not present then none of the prefixes that "ab" is - // is a subset need be examined. - // The same argument goes for suffix string that are reversed. - - // Then to top this off why not examine the first char of the word to quickly - // limit the set of prefixes to examine (i.e. the prefixes to examine must - // be leading supersets of the first character of the word (if they exist) - - // To take advantage of this "subset" relationship, we need to add two links - // from entry. One to take next if the current prefix is found (call it - // nexteq) - // and one to take next if the current prefix is not found (call it nextne). - - // Since we have built ordered lists, all that remains is to properly - // initialize - // the nextne and nexteq pointers that relate them - - process_pfx_order(); - process_sfx_order(); - - /* get encoding for CHECKCOMPOUNDCASE */ - if (!utf8) { - char* enc = get_encoding(); - csconv = get_current_cs(enc); - free(enc); - enc = NULL; - - std::string expw; - if (wordchars) { - expw.assign(wordchars); - free(wordchars); - } - - for (int i = 0; i <= 255; i++) { - if ((csconv[i].cupper != csconv[i].clower) && - (expw.find((char)i) == std::string::npos)) { - expw.push_back((char)i); - } - } - - wordchars = mystrdup(expw.c_str()); - } - - // default BREAK definition - if (numbreak == -1) { - breaktable = (char**)malloc(sizeof(char*) * 3); - if (!breaktable) - return 1; - breaktable[0] = mystrdup("-"); - breaktable[1] = mystrdup("^-"); - breaktable[2] = mystrdup("-$"); - if (breaktable[0] && breaktable[1] && breaktable[2]) - numbreak = 3; - } - return 0; -} - -// we want to be able to quickly access prefix information -// both by prefix flag, and sorted by prefix string itself -// so we need to set up two indexes - -int AffixMgr::build_pfxtree(PfxEntry* pfxptr) { - PfxEntry* ptr; - PfxEntry* pptr; - PfxEntry* ep = pfxptr; - - // get the right starting points - const char* key = ep->getKey(); - const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF); - - // first index by flag which must exist - ptr = pFlag[flg]; - ep->setFlgNxt(ptr); - pFlag[flg] = ep; - - // handle the special case of null affix string - if (strlen(key) == 0) { - // always inset them at head of list at element 0 - ptr = pStart[0]; - ep->setNext(ptr); - pStart[0] = ep; - return 0; - } - - // now handle the normal case - ep->setNextEQ(NULL); - ep->setNextNE(NULL); - - unsigned char sp = *((const unsigned char*)key); - ptr = pStart[sp]; - - // handle the first insert - if (!ptr) { - pStart[sp] = ep; - return 0; - } - - // otherwise use binary tree insertion so that a sorted - // list can easily be generated later - pptr = NULL; - for (;;) { - pptr = ptr; - if (strcmp(ep->getKey(), ptr->getKey()) <= 0) { - ptr = ptr->getNextEQ(); - if (!ptr) { - pptr->setNextEQ(ep); - break; - } - } else { - ptr = ptr->getNextNE(); - if (!ptr) { - pptr->setNextNE(ep); - break; - } - } - } - return 0; -} - -// we want to be able to quickly access suffix information -// both by suffix flag, and sorted by the reverse of the -// suffix string itself; so we need to set up two indexes -int AffixMgr::build_sfxtree(SfxEntry* sfxptr) { - SfxEntry* ptr; - SfxEntry* pptr; - SfxEntry* ep = sfxptr; - - /* get the right starting point */ - const char* key = ep->getKey(); - const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF); - - // first index by flag which must exist - ptr = sFlag[flg]; - ep->setFlgNxt(ptr); - sFlag[flg] = ep; - - // next index by affix string - - // handle the special case of null affix string - if (strlen(key) == 0) { - // always inset them at head of list at element 0 - ptr = sStart[0]; - ep->setNext(ptr); - sStart[0] = ep; - return 0; - } - - // now handle the normal case - ep->setNextEQ(NULL); - ep->setNextNE(NULL); - - unsigned char sp = *((const unsigned char*)key); - ptr = sStart[sp]; - - // handle the first insert - if (!ptr) { - sStart[sp] = ep; - return 0; - } - - // otherwise use binary tree insertion so that a sorted - // list can easily be generated later - pptr = NULL; - for (;;) { - pptr = ptr; - if (strcmp(ep->getKey(), ptr->getKey()) <= 0) { - ptr = ptr->getNextEQ(); - if (!ptr) { - pptr->setNextEQ(ep); - break; - } - } else { - ptr = ptr->getNextNE(); - if (!ptr) { - pptr->setNextNE(ep); - break; - } - } - } - return 0; -} - -// convert from binary tree to sorted list -int AffixMgr::process_pfx_tree_to_list() { - for (int i = 1; i < SETSIZE; i++) { - pStart[i] = process_pfx_in_order(pStart[i], NULL); - } - return 0; -} - -PfxEntry* AffixMgr::process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr) { - if (ptr) { - nptr = process_pfx_in_order(ptr->getNextNE(), nptr); - ptr->setNext(nptr); - nptr = process_pfx_in_order(ptr->getNextEQ(), ptr); - } - return nptr; -} - -// convert from binary tree to sorted list -int AffixMgr::process_sfx_tree_to_list() { - for (int i = 1; i < SETSIZE; i++) { - sStart[i] = process_sfx_in_order(sStart[i], NULL); - } - return 0; -} - -SfxEntry* AffixMgr::process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr) { - if (ptr) { - nptr = process_sfx_in_order(ptr->getNextNE(), nptr); - ptr->setNext(nptr); - nptr = process_sfx_in_order(ptr->getNextEQ(), ptr); - } - return nptr; -} - -// reinitialize the PfxEntry links NextEQ and NextNE to speed searching -// using the idea of leading subsets this time -int AffixMgr::process_pfx_order() { - PfxEntry* ptr; - - // loop through each prefix list starting point - for (int i = 1; i < SETSIZE; i++) { - ptr = pStart[i]; - - // look through the remainder of the list - // and find next entry with affix that - // the current one is not a subset of - // mark that as destination for NextNE - // use next in list that you are a subset - // of as NextEQ - - for (; ptr != NULL; ptr = ptr->getNext()) { - PfxEntry* nptr = ptr->getNext(); - for (; nptr != NULL; nptr = nptr->getNext()) { - if (!isSubset(ptr->getKey(), nptr->getKey())) - break; - } - ptr->setNextNE(nptr); - ptr->setNextEQ(NULL); - if ((ptr->getNext()) && - isSubset(ptr->getKey(), (ptr->getNext())->getKey())) - ptr->setNextEQ(ptr->getNext()); - } - - // now clean up by adding smart search termination strings: - // if you are already a superset of the previous prefix - // but not a subset of the next, search can end here - // so set NextNE properly - - ptr = pStart[i]; - for (; ptr != NULL; ptr = ptr->getNext()) { - PfxEntry* nptr = ptr->getNext(); - PfxEntry* mptr = NULL; - for (; nptr != NULL; nptr = nptr->getNext()) { - if (!isSubset(ptr->getKey(), nptr->getKey())) - break; - mptr = nptr; - } - if (mptr) - mptr->setNextNE(NULL); - } - } - return 0; -} - -// initialize the SfxEntry links NextEQ and NextNE to speed searching -// using the idea of leading subsets this time -int AffixMgr::process_sfx_order() { - SfxEntry* ptr; - - // loop through each prefix list starting point - for (int i = 1; i < SETSIZE; i++) { - ptr = sStart[i]; - - // look through the remainder of the list - // and find next entry with affix that - // the current one is not a subset of - // mark that as destination for NextNE - // use next in list that you are a subset - // of as NextEQ - - for (; ptr != NULL; ptr = ptr->getNext()) { - SfxEntry* nptr = ptr->getNext(); - for (; nptr != NULL; nptr = nptr->getNext()) { - if (!isSubset(ptr->getKey(), nptr->getKey())) - break; - } - ptr->setNextNE(nptr); - ptr->setNextEQ(NULL); - if ((ptr->getNext()) && - isSubset(ptr->getKey(), (ptr->getNext())->getKey())) - ptr->setNextEQ(ptr->getNext()); - } - - // now clean up by adding smart search termination strings: - // if you are already a superset of the previous suffix - // but not a subset of the next, search can end here - // so set NextNE properly - - ptr = sStart[i]; - for (; ptr != NULL; ptr = ptr->getNext()) { - SfxEntry* nptr = ptr->getNext(); - SfxEntry* mptr = NULL; - for (; nptr != NULL; nptr = nptr->getNext()) { - if (!isSubset(ptr->getKey(), nptr->getKey())) - break; - mptr = nptr; - } - if (mptr) - mptr->setNextNE(NULL); - } - } - return 0; -} - -// add flags to the result for dictionary debugging -void AffixMgr::debugflag(char* result, unsigned short flag) { - char* st = encode_flag(flag); - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, MORPH_FLAG, MAXLNLEN); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } -} - -// add flags to the result for dictionary debugging -std::string& AffixMgr::debugflag(std::string& result, unsigned short flag) { - char* st = encode_flag(flag); - result.append(" "); - result.append(MORPH_FLAG); - if (st) { - result.append(st); - free(st); - } - return result; -} - -// calculate the character length of the condition -int AffixMgr::condlen(char* st) { - int l = 0; - bool group = false; - for (; *st; st++) { - if (*st == '[') { - group = true; - l++; - } else if (*st == ']') - group = false; - else if (!group && (!utf8 || (!(*st & 0x80) || ((*st & 0xc0) == 0x80)))) - l++; - } - return l; -} - -int AffixMgr::encodeit(affentry& entry, char* cs) { - if (strcmp(cs, ".") != 0) { - entry.numconds = (char)condlen(cs); - // coverity[buffer_size_warning] - deliberate use of lack of end of conds - // padded by strncpy as long condition flag - strncpy(entry.c.conds, cs, MAXCONDLEN); - if (entry.c.conds[MAXCONDLEN - 1] && cs[MAXCONDLEN]) { - entry.opts += aeLONGCOND; - entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1); - if (!entry.c.l.conds2) - return 1; - } - } else { - entry.numconds = 0; - entry.c.conds[0] = '\0'; - } - return 0; -} - -// return 1 if s1 is a leading subset of s2 (dots are for infixes) -inline int AffixMgr::isSubset(const char* s1, const char* s2) { - while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) { - s1++; - s2++; - } - return (*s1 == '\0'); -} - -// check word for prefixes -struct hentry* AffixMgr::prefix_check(const char* word, - int len, - char in_compound, - const FLAG needflag) { - struct hentry* rv = NULL; - - pfx = NULL; - pfxappnd = NULL; - sfxappnd = NULL; - sfxextra = 0; - - // first handle the special case of 0 length prefixes - PfxEntry* pe = pStart[0]; - while (pe) { - if ( - // fogemorpheme - ((in_compound != IN_CPD_NOT) || - !(pe->getCont() && - (TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) && - // permit prefixes in compounds - ((in_compound != IN_CPD_END) || - (pe->getCont() && - (TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen()))))) { - // check prefix - rv = pe->checkword(word, len, in_compound, needflag); - if (rv) { - pfx = pe; // BUG: pfx not stateless - return rv; - } - } - pe = pe->getNext(); - } - - // now handle the general case - unsigned char sp = *((const unsigned char*)word); - PfxEntry* pptr = pStart[sp]; - - while (pptr) { - if (isSubset(pptr->getKey(), word)) { - if ( - // fogemorpheme - ((in_compound != IN_CPD_NOT) || - !(pptr->getCont() && - (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) && - // permit prefixes in compounds - ((in_compound != IN_CPD_END) || - (pptr->getCont() && (TESTAFF(pptr->getCont(), compoundpermitflag, - pptr->getContLen()))))) { - // check prefix - rv = pptr->checkword(word, len, in_compound, needflag); - if (rv) { - pfx = pptr; // BUG: pfx not stateless - return rv; - } - } - pptr = pptr->getNextEQ(); - } else { - pptr = pptr->getNextNE(); - } - } - - return NULL; -} - -// check word for prefixes -struct hentry* AffixMgr::prefix_check_twosfx(const char* word, - int len, - char in_compound, - const FLAG needflag) { - struct hentry* rv = NULL; - - pfx = NULL; - sfxappnd = NULL; - sfxextra = 0; - - // first handle the special case of 0 length prefixes - PfxEntry* pe = pStart[0]; - - while (pe) { - rv = pe->check_twosfx(word, len, in_compound, needflag); - if (rv) - return rv; - pe = pe->getNext(); - } - - // now handle the general case - unsigned char sp = *((const unsigned char*)word); - PfxEntry* pptr = pStart[sp]; - - while (pptr) { - if (isSubset(pptr->getKey(), word)) { - rv = pptr->check_twosfx(word, len, in_compound, needflag); - if (rv) { - pfx = pptr; - return rv; - } - pptr = pptr->getNextEQ(); - } else { - pptr = pptr->getNextNE(); - } - } - - return NULL; -} - -// check word for prefixes -char* AffixMgr::prefix_check_morph(const char* word, - int len, - char in_compound, - const FLAG needflag) { - char* st; - - char result[MAXLNLEN]; - result[0] = '\0'; - - pfx = NULL; - sfxappnd = NULL; - sfxextra = 0; - - // first handle the special case of 0 length prefixes - PfxEntry* pe = pStart[0]; - while (pe) { - st = pe->check_morph(word, len, in_compound, needflag); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - // if (rv) return rv; - pe = pe->getNext(); - } - - // now handle the general case - unsigned char sp = *((const unsigned char*)word); - PfxEntry* pptr = pStart[sp]; - - while (pptr) { - if (isSubset(pptr->getKey(), word)) { - st = pptr->check_morph(word, len, in_compound, needflag); - if (st) { - // fogemorpheme - if ((in_compound != IN_CPD_NOT) || - !((pptr->getCont() && (TESTAFF(pptr->getCont(), onlyincompound, - pptr->getContLen()))))) { - mystrcat(result, st, MAXLNLEN); - pfx = pptr; - } - free(st); - } - pptr = pptr->getNextEQ(); - } else { - pptr = pptr->getNextNE(); - } - } - - if (*result) - return mystrdup(result); - return NULL; -} - -// check word for prefixes -char* AffixMgr::prefix_check_twosfx_morph(const char* word, - int len, - char in_compound, - const FLAG needflag) { - char* st; - - char result[MAXLNLEN]; - result[0] = '\0'; - - pfx = NULL; - sfxappnd = NULL; - sfxextra = 0; - - // first handle the special case of 0 length prefixes - PfxEntry* pe = pStart[0]; - while (pe) { - st = pe->check_twosfx_morph(word, len, in_compound, needflag); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - pe = pe->getNext(); - } - - // now handle the general case - unsigned char sp = *((const unsigned char*)word); - PfxEntry* pptr = pStart[sp]; - - while (pptr) { - if (isSubset(pptr->getKey(), word)) { - st = pptr->check_twosfx_morph(word, len, in_compound, needflag); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - pfx = pptr; - } - pptr = pptr->getNextEQ(); - } else { - pptr = pptr->getNextNE(); - } - } - - if (*result) - return mystrdup(result); - return NULL; -} - -// Is word a non compound with a REP substitution (see checkcompoundrep)? -int AffixMgr::cpdrep_check(const char* word, int wl) { - const char* r; - - if ((wl < 2) || !numrep) - return 0; - - for (int i = 0; i < numrep; i++) { - r = word; - int lenp = strlen(reptable[i].pattern); - // search every occurence of the pattern in the word - while ((r = strstr(r, reptable[i].pattern)) != NULL) { - std::string candidate(word); - candidate.replace(r - word, lenp, reptable[i].pattern2); - if (candidate_check(candidate.c_str(), candidate.size())) - return 1; - r++; // search for the next letter - } - } - return 0; -} - -// forbid compoundings when there are special patterns at word bound -int AffixMgr::cpdpat_check(const char* word, - int pos, - hentry* r1, - hentry* r2, - const char /*affixed*/) { - int len; - for (int i = 0; i < numcheckcpd; i++) { - if (isSubset(checkcpdtable[i].pattern2, word + pos) && - (!r1 || !checkcpdtable[i].cond || - (r1->astr && TESTAFF(r1->astr, checkcpdtable[i].cond, r1->alen))) && - (!r2 || !checkcpdtable[i].cond2 || - (r2->astr && TESTAFF(r2->astr, checkcpdtable[i].cond2, r2->alen))) && - // zero length pattern => only TESTAFF - // zero pattern (0/flag) => unmodified stem (zero affixes allowed) - (!*(checkcpdtable[i].pattern) || - ((*(checkcpdtable[i].pattern) == '0' && r1->blen <= pos && - strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) || - (*(checkcpdtable[i].pattern) != '0' && - ((len = strlen(checkcpdtable[i].pattern)) != 0) && - strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) { - return 1; - } - } - return 0; -} - -// forbid compounding with neighbouring upper and lower case characters at word -// bounds -int AffixMgr::cpdcase_check(const char* word, int pos) { - if (utf8) { - w_char u, w; - const char* p; - u8_u16(&u, 1, word + pos); - for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--) - ; - u8_u16(&w, 1, p); - unsigned short a = (u.h << 8) + u.l; - unsigned short b = (w.h << 8) + w.l; - if (((unicodetoupper(a, langnum) == a) || - (unicodetoupper(b, langnum) == b)) && - (a != '-') && (b != '-')) - return 1; - } else { - unsigned char a = *(word + pos - 1); - unsigned char b = *(word + pos); - if ((csconv[a].ccase || csconv[b].ccase) && (a != '-') && (b != '-')) - return 1; - } - return 0; -} - -// check compound patterns -int AffixMgr::defcpd_check(hentry*** words, - short wnum, - hentry* rv, - hentry** def, - char all) { - signed short - btpp[MAXWORDLEN]; // metacharacter (*, ?) positions for backtracking - signed short btwp[MAXWORDLEN]; // word positions for metacharacters - int btnum[MAXWORDLEN]; // number of matched characters in metacharacter - // positions - short bt = 0; - int i, j; - int ok; - int w = 0; - - if (!*words) { - w = 1; - *words = def; - } - - if (!*words) { - return 0; - } - - (*words)[wnum] = rv; - - // has the last word COMPOUNDRULE flag? - if (rv->alen == 0) { - (*words)[wnum] = NULL; - if (w) - *words = NULL; - return 0; - } - ok = 0; - for (i = 0; i < numdefcpd; i++) { - for (j = 0; j < defcpdtable[i].len; j++) { - if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' && - TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) { - ok = 1; - break; - } - } - } - if (ok == 0) { - (*words)[wnum] = NULL; - if (w) - *words = NULL; - return 0; - } - - for (i = 0; i < numdefcpd; i++) { - signed short pp = 0; // pattern position - signed short wp = 0; // "words" position - int ok2; - ok = 1; - ok2 = 1; - do { - while ((pp < defcpdtable[i].len) && (wp <= wnum)) { - if (((pp + 1) < defcpdtable[i].len) && - ((defcpdtable[i].def[pp + 1] == '*') || - (defcpdtable[i].def[pp + 1] == '?'))) { - int wend = (defcpdtable[i].def[pp + 1] == '?') ? wp : wnum; - ok2 = 1; - pp += 2; - btpp[bt] = pp; - btwp[bt] = wp; - while (wp <= wend) { - if (!(*words)[wp]->alen || - !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp - 2], - (*words)[wp]->alen)) { - ok2 = 0; - break; - } - wp++; - } - if (wp <= wnum) - ok2 = 0; - btnum[bt] = wp - btwp[bt]; - if (btnum[bt] > 0) - bt++; - if (ok2) - break; - } else { - ok2 = 1; - if (!(*words)[wp] || !(*words)[wp]->alen || - !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp], - (*words)[wp]->alen)) { - ok = 0; - break; - } - pp++; - wp++; - if ((defcpdtable[i].len == pp) && !(wp > wnum)) - ok = 0; - } - } - if (ok && ok2) { - int r = pp; - while ((defcpdtable[i].len > r) && ((r + 1) < defcpdtable[i].len) && - ((defcpdtable[i].def[r + 1] == '*') || - (defcpdtable[i].def[r + 1] == '?'))) - r += 2; - if (defcpdtable[i].len <= r) - return 1; - } - // backtrack - if (bt) - do { - ok = 1; - btnum[bt - 1]--; - pp = btpp[bt - 1]; - wp = btwp[bt - 1] + (signed short)btnum[bt - 1]; - } while ((btnum[bt - 1] < 0) && --bt); - } while (bt); - - if (ok && ok2 && (!all || (defcpdtable[i].len <= pp))) - return 1; - - // check zero ending - while (ok && ok2 && (defcpdtable[i].len > pp) && - ((pp + 1) < defcpdtable[i].len) && - ((defcpdtable[i].def[pp + 1] == '*') || - (defcpdtable[i].def[pp + 1] == '?'))) - pp += 2; - if (ok && ok2 && (defcpdtable[i].len <= pp)) - return 1; - } - (*words)[wnum] = NULL; - if (w) - *words = NULL; - return 0; -} - -inline int AffixMgr::candidate_check(const char* word, int len) { - struct hentry* rv = NULL; - - rv = lookup(word); - if (rv) - return 1; - - // rv = prefix_check(word,len,1); - // if (rv) return 1; - - rv = affix_check(word, len); - if (rv) - return 1; - return 0; -} - -// calculate number of syllable for compound-checking -short AffixMgr::get_syllable(const char* word, int wlen) { - if (cpdmaxsyllable == 0) - return 0; - - short num = 0; - - if (!utf8) { - for (int i = 0; i < wlen; i++) { - if (strchr(cpdvowels, word[i])) - num++; - } - } else if (cpdvowels_utf16) { - w_char w[MAXWORDUTF8LEN]; - int i = u8_u16(w, MAXWORDUTF8LEN, word); - for (; i > 0; i--) { - if (flag_bsearch((unsigned short*)cpdvowels_utf16, - ((unsigned short*)w)[i - 1], cpdvowels_utf16_len)) - num++; - } - } - return num; -} - -void AffixMgr::setcminmax(int* cmin, int* cmax, const char* word, int len) { - if (utf8) { - int i; - for (*cmin = 0, i = 0; (i < cpdmin) && word[*cmin]; i++) { - for ((*cmin)++; (word[*cmin] & 0xc0) == 0x80; (*cmin)++) - ; - } - for (*cmax = len, i = 0; (i < (cpdmin - 1)) && *cmax; i++) { - for ((*cmax)--; (word[*cmax] & 0xc0) == 0x80; (*cmax)--) - ; - } - } else { - *cmin = cpdmin; - *cmax = len - cpdmin + 1; - } -} - -// check if compound word is correctly spelled -// hu_mov_rule = spec. Hungarian rule (XXX) -struct hentry* AffixMgr::compound_check(const char* word, - int len, - short wordnum, - short numsyllable, - short maxwordnum, - short wnum, - hentry** words = NULL, - char hu_mov_rule = 0, - char is_sug = 0, - int* info = NULL) { - int i; - short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2; - struct hentry* rv = NULL; - struct hentry* rv_first; - struct hentry* rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking - char st[MAXWORDUTF8LEN + 4]; - char ch = '\0'; - int cmin; - int cmax; - int striple = 0; - int scpd = 0; - int soldi = 0; - int oldcmin = 0; - int oldcmax = 0; - int oldlen = 0; - int checkedstriple = 0; - int onlycpdrule; - char affixed = 0; - hentry** oldwords = words; - - int checked_prefix; - - setcminmax(&cmin, &cmax, word, len); - - strcpy(st, word); - - for (i = cmin; i < cmax; i++) { - // go to end of the UTF-8 character - if (utf8) { - for (; (st[i] & 0xc0) == 0x80; i++) - ; - if (i >= cmax) - return NULL; - } - - words = oldwords; - onlycpdrule = (words) ? 1 : 0; - - do { // onlycpdrule loop - - oldnumsyllable = numsyllable; - oldwordnum = wordnum; - checked_prefix = 0; - - do { // simplified checkcompoundpattern loop - - if (scpd > 0) { - for (; scpd <= numcheckcpd && - (!checkcpdtable[scpd - 1].pattern3 || - strncmp(word + i, checkcpdtable[scpd - 1].pattern3, - strlen(checkcpdtable[scpd - 1].pattern3)) != 0); - scpd++) - ; - - if (scpd > numcheckcpd) - break; // break simplified checkcompoundpattern loop - strcpy(st + i, checkcpdtable[scpd - 1].pattern); - soldi = i; - i += strlen(checkcpdtable[scpd - 1].pattern); - strcpy(st + i, checkcpdtable[scpd - 1].pattern2); - strcpy(st + i + strlen(checkcpdtable[scpd - 1].pattern2), - word + soldi + strlen(checkcpdtable[scpd - 1].pattern3)); - - oldlen = len; - len += strlen(checkcpdtable[scpd - 1].pattern) + - strlen(checkcpdtable[scpd - 1].pattern2) - - strlen(checkcpdtable[scpd - 1].pattern3); - oldcmin = cmin; - oldcmax = cmax; - setcminmax(&cmin, &cmax, st, len); - - cmax = len - cpdmin + 1; - } - - ch = st[i]; - st[i] = '\0'; - - sfx = NULL; - pfx = NULL; - - // FIRST WORD - - affixed = 1; - rv = lookup(st); // perhaps without prefix - - // search homonym with compound flag - while ((rv) && !hu_mov_rule && - ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || - !((compoundflag && !words && !onlycpdrule && - TESTAFF(rv->astr, compoundflag, rv->alen)) || - (compoundbegin && !wordnum && !onlycpdrule && - TESTAFF(rv->astr, compoundbegin, rv->alen)) || - (compoundmiddle && wordnum && !words && !onlycpdrule && - TESTAFF(rv->astr, compoundmiddle, rv->alen)) || - (numdefcpd && onlycpdrule && - ((!words && !wordnum && - defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0)) || - (words && - defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0))))) || - (scpd != 0 && checkcpdtable[scpd - 1].cond != FLAG_NULL && - !TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)))) { - rv = rv->next_homonym; - } - - if (rv) - affixed = 0; - - if (!rv) { - if (onlycpdrule) - break; - if (compoundflag && - !(rv = prefix_check(st, i, - hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, - compoundflag))) { - if (((rv = suffix_check( - st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundflag, - hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || - (compoundmoresuffixes && - (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && - !hu_mov_rule && sfx->getCont() && - ((compoundforbidflag && - TESTAFF(sfx->getCont(), compoundforbidflag, - sfx->getContLen())) || - (compoundend && - TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { - rv = NULL; - } - } - - if (rv || - (((wordnum == 0) && compoundbegin && - ((rv = suffix_check( - st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, - hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || - (compoundmoresuffixes && - (rv = suffix_check_twosfx( - st, i, 0, NULL, - compoundbegin))) || // twofold suffixes + compound - (rv = prefix_check(st, i, - hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, - compoundbegin)))) || - ((wordnum > 0) && compoundmiddle && - ((rv = suffix_check( - st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, - hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || - (compoundmoresuffixes && - (rv = suffix_check_twosfx( - st, i, 0, NULL, - compoundmiddle))) || // twofold suffixes + compound - (rv = prefix_check(st, i, - hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, - compoundmiddle)))))) - checked_prefix = 1; - // else check forbiddenwords and needaffix - } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) || - TESTAFF(rv->astr, needaffix, rv->alen) || - TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || - (is_sug && nosuggest && - TESTAFF(rv->astr, nosuggest, rv->alen)))) { - st[i] = ch; - // continue; - break; - } - - // check non_compound flag in suffix and prefix - if ((rv) && !hu_mov_rule && - ((pfx && pfx->getCont() && - TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) || - (sfx && sfx->getCont() && - TESTAFF(sfx->getCont(), compoundforbidflag, - sfx->getContLen())))) { - rv = NULL; - } - - // check compoundend flag in suffix and prefix - if ((rv) && !checked_prefix && compoundend && !hu_mov_rule && - ((pfx && pfx->getCont() && - TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())) || - (sfx && sfx->getCont() && - TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { - rv = NULL; - } - - // check compoundmiddle flag in suffix and prefix - if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle && - !hu_mov_rule && - ((pfx && pfx->getCont() && - TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())) || - (sfx && sfx->getCont() && - TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())))) { - rv = NULL; - } - - // check forbiddenwords - if ((rv) && (rv->astr) && - (TESTAFF(rv->astr, forbiddenword, rv->alen) || - TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || - (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) { - return NULL; - } - - // increment word number, if the second root has a compoundroot flag - if ((rv) && compoundroot && - (TESTAFF(rv->astr, compoundroot, rv->alen))) { - wordnum++; - } - - // first word is acceptable in compound words? - if (((rv) && - (checked_prefix || (words && words[wnum]) || - (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || - ((oldwordnum == 0) && compoundbegin && - TESTAFF(rv->astr, compoundbegin, rv->alen)) || - ((oldwordnum > 0) && compoundmiddle && - TESTAFF(rv->astr, compoundmiddle, rv->alen)) // || - // (numdefcpd && ) - - // LANG_hu section: spec. Hungarian rule - || ((langnum == LANG_hu) && hu_mov_rule && - (TESTAFF( - rv->astr, 'F', - rv->alen) || // XXX hardwired Hungarian dictionary codes - TESTAFF(rv->astr, 'G', rv->alen) || - TESTAFF(rv->astr, 'H', rv->alen))) - // END of LANG_hu section - ) && - ( - // test CHECKCOMPOUNDPATTERN conditions - scpd == 0 || checkcpdtable[scpd - 1].cond == FLAG_NULL || - TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)) && - !((checkcompoundtriple && scpd == 0 && - !words && // test triple letters - (word[i - 1] == word[i]) && - (((i > 1) && (word[i - 1] == word[i - 2])) || - ((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0' - )) || - (checkcompoundcase && scpd == 0 && !words && - cpdcase_check(word, i)))) - // LANG_hu section: spec. Hungarian rule - || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && - (rv = affix_check(st, i)) && - (sfx && sfx->getCont() && - ( // XXX hardwired Hungarian dic. codes - TESTAFF(sfx->getCont(), (unsigned short)'x', - sfx->getContLen()) || - TESTAFF( - sfx->getCont(), (unsigned short)'%', - sfx->getContLen()))))) { // first word is ok condition - - // LANG_hu section: spec. Hungarian rule - if (langnum == LANG_hu) { - // calculate syllable number of the word - numsyllable += get_syllable(st, i); - // + 1 word, if syllable number of the prefix > 1 (hungarian - // convention) - if (pfx && (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) - wordnum++; - } - // END of LANG_hu section - - // NEXT WORD(S) - rv_first = rv; - st[i] = ch; - - do { // striple loop - - // check simplifiedtriple - if (simplifiedtriple) { - if (striple) { - checkedstriple = 1; - i--; // check "fahrt" instead of "ahrt" in "Schiffahrt" - } else if (i > 2 && *(word + i - 1) == *(word + i - 2)) - striple = 1; - } - - rv = lookup((st + i)); // perhaps without prefix - - // search homonym with compound flag - while ((rv) && - ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || - !((compoundflag && !words && - TESTAFF(rv->astr, compoundflag, rv->alen)) || - (compoundend && !words && - TESTAFF(rv->astr, compoundend, rv->alen)) || - (numdefcpd && words && - defcpd_check(&words, wnum + 1, rv, NULL, 1))) || - (scpd != 0 && checkcpdtable[scpd - 1].cond2 != FLAG_NULL && - !TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, - rv->alen)))) { - rv = rv->next_homonym; - } - - // check FORCEUCASE - if (rv && forceucase && (rv) && - (TESTAFF(rv->astr, forceucase, rv->alen)) && - !(info && *info & SPELL_ORIGCAP)) - rv = NULL; - - if (rv && words && words[wnum + 1]) - return rv_first; - - oldnumsyllable2 = numsyllable; - oldwordnum2 = wordnum; - - // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary - // code - if ((rv) && (langnum == LANG_hu) && - (TESTAFF(rv->astr, 'I', rv->alen)) && - !(TESTAFF(rv->astr, 'J', rv->alen))) { - numsyllable--; - } - // END of LANG_hu section - - // increment word number, if the second root has a compoundroot flag - if ((rv) && (compoundroot) && - (TESTAFF(rv->astr, compoundroot, rv->alen))) { - wordnum++; - } - - // check forbiddenwords - if ((rv) && (rv->astr) && - (TESTAFF(rv->astr, forbiddenword, rv->alen) || - TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || - (is_sug && nosuggest && - TESTAFF(rv->astr, nosuggest, rv->alen)))) - return NULL; - - // second word is acceptable, as a root? - // hungarian conventions: compounding is acceptable, - // when compound forms consist of 2 words, or if more, - // then the syllable number of root words must be 6, or lesser. - - if ((rv) && - ((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || - (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) && - (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || - ((cpdmaxsyllable != 0) && - (numsyllable + get_syllable(HENTRY_WORD(rv), rv->clen) <= - cpdmaxsyllable))) && - ( - // test CHECKCOMPOUNDPATTERN - !numcheckcpd || scpd != 0 || - !cpdpat_check(word, i, rv_first, rv, 0)) && - ((!checkcompounddup || (rv != rv_first))) - // test CHECKCOMPOUNDPATTERN conditions - && - (scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL || - TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen))) { - // forbid compound word, if it is a non compound word with typical - // fault - if (checkcompoundrep && cpdrep_check(word, len)) - return NULL; - return rv_first; - } - - numsyllable = oldnumsyllable2; - wordnum = oldwordnum2; - - // perhaps second word has prefix or/and suffix - sfx = NULL; - sfxflag = FLAG_NULL; - rv = (compoundflag && !onlycpdrule) - ? affix_check((word + i), strlen(word + i), compoundflag, - IN_CPD_END) - : NULL; - if (!rv && compoundend && !onlycpdrule) { - sfx = NULL; - pfx = NULL; - rv = affix_check((word + i), strlen(word + i), compoundend, - IN_CPD_END); - } - - if (!rv && numdefcpd && words) { - rv = affix_check((word + i), strlen(word + i), 0, IN_CPD_END); - if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1)) - return rv_first; - rv = NULL; - } - - // test CHECKCOMPOUNDPATTERN conditions (allowed forms) - if (rv && - !(scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL || - TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen))) - rv = NULL; - - // test CHECKCOMPOUNDPATTERN conditions (forbidden compounds) - if (rv && numcheckcpd && scpd == 0 && - cpdpat_check(word, i, rv_first, rv, affixed)) - rv = NULL; - - // check non_compound flag in suffix and prefix - if ((rv) && ((pfx && pfx->getCont() && - TESTAFF(pfx->getCont(), compoundforbidflag, - pfx->getContLen())) || - (sfx && sfx->getCont() && - TESTAFF(sfx->getCont(), compoundforbidflag, - sfx->getContLen())))) { - rv = NULL; - } - - // check FORCEUCASE - if (rv && forceucase && (rv) && - (TESTAFF(rv->astr, forceucase, rv->alen)) && - !(info && *info & SPELL_ORIGCAP)) - rv = NULL; - - // check forbiddenwords - if ((rv) && (rv->astr) && - (TESTAFF(rv->astr, forbiddenword, rv->alen) || - TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || - (is_sug && nosuggest && - TESTAFF(rv->astr, nosuggest, rv->alen)))) - return NULL; - - // pfxappnd = prefix of word+i, or NULL - // calculate syllable number of prefix. - // hungarian convention: when syllable number of prefix is more, - // than 1, the prefix+word counts as two words. - - if (langnum == LANG_hu) { - // calculate syllable number of the word - numsyllable += get_syllable(word + i, strlen(word + i)); - - // - affix syllable num. - // XXX only second suffix (inflections, not derivations) - if (sfxappnd) { - char* tmp = myrevstrdup(sfxappnd); - numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra; - free(tmp); - } - - // + 1 word, if syllable number of the prefix > 1 (hungarian - // convention) - if (pfx && - (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) - wordnum++; - - // increment syllable num, if last word has a SYLLABLENUM flag - // and the suffix is beginning `s' - - if (cpdsyllablenum) { - switch (sfxflag) { - case 'c': { - numsyllable += 2; - break; - } - case 'J': { - numsyllable += 1; - break; - } - case 'I': { - if (rv && TESTAFF(rv->astr, 'J', rv->alen)) - numsyllable += 1; - break; - } - } - } - } - - // increment word number, if the second word has a compoundroot flag - if ((rv) && (compoundroot) && - (TESTAFF(rv->astr, compoundroot, rv->alen))) { - wordnum++; - } - - // second word is acceptable, as a word with prefix or/and suffix? - // hungarian conventions: compounding is acceptable, - // when compound forms consist 2 word, otherwise - // the syllable number of root words is 6, or lesser. - if ((rv) && - (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || - ((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) && - ((!checkcompounddup || (rv != rv_first)))) { - // forbid compound word, if it is a non compound word with typical - // fault - if (checkcompoundrep && cpdrep_check(word, len)) - return NULL; - return rv_first; - } - - numsyllable = oldnumsyllable2; - wordnum = oldwordnum2; - - // perhaps second word is a compound word (recursive call) - if (wordnum < maxwordnum) { - rv = compound_check((st + i), strlen(st + i), wordnum + 1, - numsyllable, maxwordnum, wnum + 1, words, 0, - is_sug, info); - - if (rv && numcheckcpd && - ((scpd == 0 && - cpdpat_check(word, i, rv_first, rv, affixed)) || - (scpd != 0 && - !cpdpat_check(word, i, rv_first, rv, affixed)))) - rv = NULL; - } else { - rv = NULL; - } - if (rv) { - // forbid compound word, if it is a non compound word with typical - // fault - if (checkcompoundrep || forbiddenword) { - struct hentry* rv2 = NULL; - - if (checkcompoundrep && cpdrep_check(word, len)) - return NULL; - - // check first part - if (strncmp(rv->word, word + i, rv->blen) == 0) { - char r = *(st + i + rv->blen); - *(st + i + rv->blen) = '\0'; - - if (checkcompoundrep && cpdrep_check(st, i + rv->blen)) { - *(st + i + rv->blen) = r; - continue; - } - - if (forbiddenword) { - rv2 = lookup(word); - if (!rv2) - rv2 = affix_check(word, len); - if (rv2 && rv2->astr && - TESTAFF(rv2->astr, forbiddenword, rv2->alen) && - (strncmp(rv2->word, st, i + rv->blen) == 0)) { - return NULL; - } - } - *(st + i + rv->blen) = r; - } - } - return rv_first; - } - } while (striple && !checkedstriple); // end of striple loop - - if (checkedstriple) { - i++; - checkedstriple = 0; - striple = 0; - } - - } // first word is ok condition - - if (soldi != 0) { - i = soldi; - soldi = 0; - len = oldlen; - cmin = oldcmin; - cmax = oldcmax; - } - scpd++; - - } while (!onlycpdrule && simplifiedcpd && - scpd <= numcheckcpd); // end of simplifiedcpd loop - - scpd = 0; - wordnum = oldwordnum; - numsyllable = oldnumsyllable; - - if (soldi != 0) { - i = soldi; - strcpy(st, word); // XXX add more optim. - soldi = 0; - } else - st[i] = ch; - - } while (numdefcpd && oldwordnum == 0 && - onlycpdrule++ < 1); // end of onlycpd loop - } - - return NULL; -} - -// check if compound word is correctly spelled -// hu_mov_rule = spec. Hungarian rule (XXX) -int AffixMgr::compound_check_morph(const char* word, - int len, - short wordnum, - short numsyllable, - short maxwordnum, - short wnum, - hentry** words, - char hu_mov_rule = 0, - char** result = NULL, - char* partresult = NULL) { - int i; - short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2; - int ok = 0; - - struct hentry* rv = NULL; - struct hentry* rv_first; - struct hentry* rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking - char st[MAXWORDUTF8LEN + 4]; - char ch; - - int checked_prefix; - char presult[MAXLNLEN]; - - int cmin; - int cmax; - - int onlycpdrule; - char affixed = 0; - hentry** oldwords = words; - - setcminmax(&cmin, &cmax, word, len); - - strcpy(st, word); - - for (i = cmin; i < cmax; i++) { - // go to end of the UTF-8 character - if (utf8) { - for (; (st[i] & 0xc0) == 0x80; i++) - ; - if (i >= cmax) - return 0; - } - - words = oldwords; - onlycpdrule = (words) ? 1 : 0; - - do { // onlycpdrule loop - - oldnumsyllable = numsyllable; - oldwordnum = wordnum; - checked_prefix = 0; - - ch = st[i]; - st[i] = '\0'; - sfx = NULL; - - // FIRST WORD - - affixed = 1; - - *presult = '\0'; - if (partresult) - mystrcat(presult, partresult, MAXLNLEN); - - rv = lookup(st); // perhaps without prefix - - // search homonym with compound flag - while ((rv) && !hu_mov_rule && - ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || - !((compoundflag && !words && !onlycpdrule && - TESTAFF(rv->astr, compoundflag, rv->alen)) || - (compoundbegin && !wordnum && !onlycpdrule && - TESTAFF(rv->astr, compoundbegin, rv->alen)) || - (compoundmiddle && wordnum && !words && !onlycpdrule && - TESTAFF(rv->astr, compoundmiddle, rv->alen)) || - (numdefcpd && onlycpdrule && - ((!words && !wordnum && - defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0)) || - (words && - defcpd_check(&words, wnum, rv, (hentry**)&rwords, 0))))))) { - rv = rv->next_homonym; - } - - if (rv) - affixed = 0; - - if (rv) { - sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_PART, st); - if (!HENTRY_FIND(rv, MORPH_STEM)) { - sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_STEM, - st); - } - // store the pointer of the hash entry - // sprintf(presult + strlen(presult), "%c%s%p", MSEP_FLD, - // MORPH_HENTRY, rv); - if (HENTRY_DATA(rv)) { - sprintf(presult + strlen(presult), "%c%s", MSEP_FLD, - HENTRY_DATA2(rv)); - } - } - - if (!rv) { - if (onlycpdrule && strlen(*result) > MAXLNLEN / 10) - break; - if (compoundflag && - !(rv = - prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, - compoundflag))) { - if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, - compoundflag, - hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || - (compoundmoresuffixes && - (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && - !hu_mov_rule && sfx->getCont() && - ((compoundforbidflag && - TESTAFF(sfx->getCont(), compoundforbidflag, - sfx->getContLen())) || - (compoundend && - TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { - rv = NULL; - } - } - - if (rv || - (((wordnum == 0) && compoundbegin && - ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, - compoundbegin, - hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || - (compoundmoresuffixes && - (rv = suffix_check_twosfx( - st, i, 0, NULL, - compoundbegin))) || // twofold suffix+compound - (rv = prefix_check(st, i, - hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, - compoundbegin)))) || - ((wordnum > 0) && compoundmiddle && - ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, - compoundmiddle, - hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || - (compoundmoresuffixes && - (rv = suffix_check_twosfx( - st, i, 0, NULL, - compoundmiddle))) || // twofold suffix+compound - (rv = prefix_check(st, i, - hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, - compoundmiddle)))))) { - // char * p = prefix_check_morph(st, i, 0, compound); - char* p = NULL; - if (compoundflag) - p = affix_check_morph(st, i, compoundflag); - if (!p || (*p == '\0')) { - if (p) - free(p); - p = NULL; - if ((wordnum == 0) && compoundbegin) { - p = affix_check_morph(st, i, compoundbegin); - } else if ((wordnum > 0) && compoundmiddle) { - p = affix_check_morph(st, i, compoundmiddle); - } - } - if (p && (*p != '\0')) { - sprintf(presult + strlen(presult), "%c%s%s%s", MSEP_FLD, MORPH_PART, - st, line_uniq_app(&p, MSEP_REC)); - } - if (p) - free(p); - checked_prefix = 1; - } - // else check forbiddenwords - } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) || - TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || - TESTAFF(rv->astr, needaffix, rv->alen))) { - st[i] = ch; - continue; - } - - // check non_compound flag in suffix and prefix - if ((rv) && !hu_mov_rule && - ((pfx && pfx->getCont() && - TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) || - (sfx && sfx->getCont() && - TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())))) { - continue; - } - - // check compoundend flag in suffix and prefix - if ((rv) && !checked_prefix && compoundend && !hu_mov_rule && - ((pfx && pfx->getCont() && - TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())) || - (sfx && sfx->getCont() && - TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { - continue; - } - - // check compoundmiddle flag in suffix and prefix - if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle && - !hu_mov_rule && - ((pfx && pfx->getCont() && - TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())) || - (sfx && sfx->getCont() && - TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())))) { - rv = NULL; - } - - // check forbiddenwords - if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) || - TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) - continue; - - // increment word number, if the second root has a compoundroot flag - if ((rv) && (compoundroot) && - (TESTAFF(rv->astr, compoundroot, rv->alen))) { - wordnum++; - } - - // first word is acceptable in compound words? - if (((rv) && - (checked_prefix || (words && words[wnum]) || - (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || - ((oldwordnum == 0) && compoundbegin && - TESTAFF(rv->astr, compoundbegin, rv->alen)) || - ((oldwordnum > 0) && compoundmiddle && - TESTAFF(rv->astr, compoundmiddle, rv->alen)) - // LANG_hu section: spec. Hungarian rule - || ((langnum == LANG_hu) && // hu_mov_rule - hu_mov_rule && (TESTAFF(rv->astr, 'F', rv->alen) || - TESTAFF(rv->astr, 'G', rv->alen) || - TESTAFF(rv->astr, 'H', rv->alen))) - // END of LANG_hu section - ) && - !((checkcompoundtriple && !words && // test triple letters - (word[i - 1] == word[i]) && - (((i > 1) && (word[i - 1] == word[i - 2])) || - ((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0' - )) || - ( - // test CHECKCOMPOUNDPATTERN - numcheckcpd && !words && - cpdpat_check(word, i, rv, NULL, affixed)) || - (checkcompoundcase && !words && cpdcase_check(word, i)))) - // LANG_hu section: spec. Hungarian rule - || - ((!rv) && (langnum == LANG_hu) && hu_mov_rule && - (rv = affix_check(st, i)) && - (sfx && sfx->getCont() && - (TESTAFF(sfx->getCont(), (unsigned short)'x', sfx->getContLen()) || - TESTAFF(sfx->getCont(), (unsigned short)'%', sfx->getContLen())))) - // END of LANG_hu section - ) { - // LANG_hu section: spec. Hungarian rule - if (langnum == LANG_hu) { - // calculate syllable number of the word - numsyllable += get_syllable(st, i); - - // + 1 word, if syllable number of the prefix > 1 (hungarian - // convention) - if (pfx && (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) - wordnum++; - } - // END of LANG_hu section - - // NEXT WORD(S) - rv_first = rv; - rv = lookup((word + i)); // perhaps without prefix - - // search homonym with compound flag - while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || - !((compoundflag && !words && - TESTAFF(rv->astr, compoundflag, rv->alen)) || - (compoundend && !words && - TESTAFF(rv->astr, compoundend, rv->alen)) || - (numdefcpd && words && - defcpd_check(&words, wnum + 1, rv, NULL, 1))))) { - rv = rv->next_homonym; - } - - if (rv && words && words[wnum + 1]) { - mystrcat(*result, presult, MAXLNLEN); - mystrcat(*result, " ", MAXLNLEN); - mystrcat(*result, MORPH_PART, MAXLNLEN); - mystrcat(*result, word + i, MAXLNLEN); - if (complexprefixes && HENTRY_DATA(rv)) - mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN); - if (!HENTRY_FIND(rv, MORPH_STEM)) { - mystrcat(*result, " ", MAXLNLEN); - mystrcat(*result, MORPH_STEM, MAXLNLEN); - mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN); - } - // store the pointer of the hash entry - // sprintf(*result + strlen(*result), " %s%p", - // MORPH_HENTRY, rv); - if (!complexprefixes && HENTRY_DATA(rv)) { - mystrcat(*result, " ", MAXLNLEN); - mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN); - } - mystrcat(*result, "\n", MAXLNLEN); - return 0; - } - - oldnumsyllable2 = numsyllable; - oldwordnum2 = wordnum; - - // LANG_hu section: spec. Hungarian rule - if ((rv) && (langnum == LANG_hu) && - (TESTAFF(rv->astr, 'I', rv->alen)) && - !(TESTAFF(rv->astr, 'J', rv->alen))) { - numsyllable--; - } - // END of LANG_hu section - // increment word number, if the second root has a compoundroot flag - if ((rv) && (compoundroot) && - (TESTAFF(rv->astr, compoundroot, rv->alen))) { - wordnum++; - } - - // check forbiddenwords - if ((rv) && (rv->astr) && - (TESTAFF(rv->astr, forbiddenword, rv->alen) || - TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) { - st[i] = ch; - continue; - } - - // second word is acceptable, as a root? - // hungarian conventions: compounding is acceptable, - // when compound forms consist of 2 words, or if more, - // then the syllable number of root words must be 6, or lesser. - if ((rv) && - ((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || - (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) && - (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || - ((cpdmaxsyllable != 0) && - (numsyllable + get_syllable(HENTRY_WORD(rv), rv->blen) <= - cpdmaxsyllable))) && - ((!checkcompounddup || (rv != rv_first)))) { - // bad compound word - mystrcat(*result, presult, MAXLNLEN); - mystrcat(*result, " ", MAXLNLEN); - mystrcat(*result, MORPH_PART, MAXLNLEN); - mystrcat(*result, word + i, MAXLNLEN); - - if (HENTRY_DATA(rv)) { - if (complexprefixes) - mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN); - if (!HENTRY_FIND(rv, MORPH_STEM)) { - mystrcat(*result, " ", MAXLNLEN); - mystrcat(*result, MORPH_STEM, MAXLNLEN); - mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN); - } - // store the pointer of the hash entry - // sprintf(*result + strlen(*result), " - // %s%p", MORPH_HENTRY, rv); - if (!complexprefixes) { - mystrcat(*result, " ", MAXLNLEN); - mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN); - } - } - mystrcat(*result, "\n", MAXLNLEN); - ok = 1; - } - - numsyllable = oldnumsyllable2; - wordnum = oldwordnum2; - - // perhaps second word has prefix or/and suffix - sfx = NULL; - sfxflag = FLAG_NULL; - - if (compoundflag && !onlycpdrule) - rv = affix_check((word + i), strlen(word + i), compoundflag); - else - rv = NULL; - - if (!rv && compoundend && !onlycpdrule) { - sfx = NULL; - pfx = NULL; - rv = affix_check((word + i), strlen(word + i), compoundend); - } - - if (!rv && numdefcpd && words) { - rv = affix_check((word + i), strlen(word + i), 0, IN_CPD_END); - if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) { - char* m = NULL; - if (compoundflag) - m = affix_check_morph((word + i), strlen(word + i), compoundflag); - if ((!m || *m == '\0') && compoundend) { - if (m) - free(m); - m = affix_check_morph((word + i), strlen(word + i), compoundend); - } - mystrcat(*result, presult, MAXLNLEN); - if (m || (*m != '\0')) { - char m2[MAXLNLEN]; - sprintf(m2, "%c%s%s%s", MSEP_FLD, MORPH_PART, word + i, - line_uniq_app(&m, MSEP_REC)); - mystrcat(*result, m2, MAXLNLEN); - } - if (m) - free(m); - mystrcat(*result, "\n", MAXLNLEN); - ok = 1; - } - } - - // check non_compound flag in suffix and prefix - if ((rv) && - ((pfx && pfx->getCont() && - TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) || - (sfx && sfx->getCont() && - TESTAFF(sfx->getCont(), compoundforbidflag, - sfx->getContLen())))) { - rv = NULL; - } - - // check forbiddenwords - if ((rv) && (rv->astr) && - (TESTAFF(rv->astr, forbiddenword, rv->alen) || - TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)) && - (!TESTAFF(rv->astr, needaffix, rv->alen))) { - st[i] = ch; - continue; - } - - if (langnum == LANG_hu) { - // calculate syllable number of the word - numsyllable += get_syllable(word + i, strlen(word + i)); - - // - affix syllable num. - // XXX only second suffix (inflections, not derivations) - if (sfxappnd) { - char* tmp = myrevstrdup(sfxappnd); - numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra; - free(tmp); - } - - // + 1 word, if syllable number of the prefix > 1 (hungarian - // convention) - if (pfx && (get_syllable(pfx->getKey(), strlen(pfx->getKey())) > 1)) - wordnum++; - - // increment syllable num, if last word has a SYLLABLENUM flag - // and the suffix is beginning `s' - - if (cpdsyllablenum) { - switch (sfxflag) { - case 'c': { - numsyllable += 2; - break; - } - case 'J': { - numsyllable += 1; - break; - } - case 'I': { - if (rv && TESTAFF(rv->astr, 'J', rv->alen)) - numsyllable += 1; - break; - } - } - } - } - - // increment word number, if the second word has a compoundroot flag - if ((rv) && (compoundroot) && - (TESTAFF(rv->astr, compoundroot, rv->alen))) { - wordnum++; - } - // second word is acceptable, as a word with prefix or/and suffix? - // hungarian conventions: compounding is acceptable, - // when compound forms consist 2 word, otherwise - // the syllable number of root words is 6, or lesser. - if ((rv) && - (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || - ((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) && - ((!checkcompounddup || (rv != rv_first)))) { - char* m = NULL; - if (compoundflag) - m = affix_check_morph((word + i), strlen(word + i), compoundflag); - if ((!m || *m == '\0') && compoundend) { - if (m) - free(m); - m = affix_check_morph((word + i), strlen(word + i), compoundend); - } - mystrcat(*result, presult, MAXLNLEN); - if (m && (*m != '\0')) { - char m2[MAXLNLEN]; - sprintf(m2, "%c%s%s%s", MSEP_FLD, MORPH_PART, word + i, - line_uniq_app(&m, MSEP_REC)); - mystrcat(*result, m2, MAXLNLEN); - } - if (m) - free(m); - if (strlen(*result) + 1 < MAXLNLEN) - sprintf(*result + strlen(*result), "%c", MSEP_REC); - ok = 1; - } - - numsyllable = oldnumsyllable2; - wordnum = oldwordnum2; - - // perhaps second word is a compound word (recursive call) - if ((wordnum < maxwordnum) && (ok == 0)) { - compound_check_morph((word + i), strlen(word + i), wordnum + 1, - numsyllable, maxwordnum, wnum + 1, words, 0, - result, presult); - } else { - rv = NULL; - } - } - st[i] = ch; - wordnum = oldwordnum; - numsyllable = oldnumsyllable; - - } while (numdefcpd && oldwordnum == 0 && - onlycpdrule++ < 1); // end of onlycpd loop - } - return 0; -} - -// return 1 if s1 (reversed) is a leading subset of end of s2 -/* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int - len) - { - while ((len > 0) && *s1 && (*s1 == *end_of_s2)) { - s1++; - end_of_s2--; - len--; - } - return (*s1 == '\0'); - } - */ - -inline int AffixMgr::isRevSubset(const char* s1, - const char* end_of_s2, - int len) { - while ((len > 0) && (*s1 != '\0') && ((*s1 == *end_of_s2) || (*s1 == '.'))) { - s1++; - end_of_s2--; - len--; - } - return (*s1 == '\0'); -} - -// check word for suffixes - -struct hentry* AffixMgr::suffix_check(const char* word, - int len, - int sfxopts, - PfxEntry* ppfx, - char** wlst, - int maxSug, - int* ns, - const FLAG cclass, - const FLAG needflag, - char in_compound) { - struct hentry* rv = NULL; - PfxEntry* ep = ppfx; - - // first handle the special case of 0 length suffixes - SfxEntry* se = sStart[0]; - - while (se) { - if (!cclass || se->getCont()) { - // suffixes are not allowed in beginning of compounds - if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass - // except when signed with compoundpermitflag flag - (se->getCont() && compoundpermitflag && - TESTAFF(se->getCont(), compoundpermitflag, se->getContLen()))) && - (!circumfix || - // no circumfix flag in prefix and suffix - ((!ppfx || !(ep->getCont()) || - !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && - (!se->getCont() || - !(TESTAFF(se->getCont(), circumfix, se->getContLen())))) || - // circumfix flag in prefix AND suffix - ((ppfx && (ep->getCont()) && - TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && - (se->getCont() && - (TESTAFF(se->getCont(), circumfix, se->getContLen()))))) && - // fogemorpheme - (in_compound || - !(se->getCont() && - (TESTAFF(se->getCont(), onlyincompound, se->getContLen())))) && - // needaffix on prefix or first suffix - (cclass || - !(se->getCont() && - TESTAFF(se->getCont(), needaffix, se->getContLen())) || - (ppfx && - !((ep->getCont()) && - TESTAFF(ep->getCont(), needaffix, ep->getContLen()))))) { - rv = se->checkword(word, len, sfxopts, ppfx, wlst, maxSug, ns, - (FLAG)cclass, needflag, - (in_compound ? 0 : onlyincompound)); - if (rv) { - sfx = se; // BUG: sfx not stateless - return rv; - } - } - } - se = se->getNext(); - } - - // now handle the general case - if (len == 0) - return NULL; // FULLSTRIP - unsigned char sp = *((const unsigned char*)(word + len - 1)); - SfxEntry* sptr = sStart[sp]; - - while (sptr) { - if (isRevSubset(sptr->getKey(), word + len - 1, len)) { - // suffixes are not allowed in beginning of compounds - if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass - // except when signed with compoundpermitflag flag - (sptr->getCont() && compoundpermitflag && - TESTAFF(sptr->getCont(), compoundpermitflag, - sptr->getContLen()))) && - (!circumfix || - // no circumfix flag in prefix and suffix - ((!ppfx || !(ep->getCont()) || - !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && - (!sptr->getCont() || - !(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())))) || - // circumfix flag in prefix AND suffix - ((ppfx && (ep->getCont()) && - TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && - (sptr->getCont() && - (TESTAFF(sptr->getCont(), circumfix, sptr->getContLen()))))) && - // fogemorpheme - (in_compound || - !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, - sptr->getContLen()))))) && - // needaffix on prefix or first suffix - (cclass || - !(sptr->getCont() && - TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) || - (ppfx && - !((ep->getCont()) && - TESTAFF(ep->getCont(), needaffix, ep->getContLen()))))) - if (in_compound != IN_CPD_END || ppfx || - !(sptr->getCont() && - TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))) { - rv = sptr->checkword(word, len, sfxopts, ppfx, wlst, maxSug, ns, - cclass, needflag, - (in_compound ? 0 : onlyincompound)); - if (rv) { - sfx = sptr; // BUG: sfx not stateless - sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless - if (!sptr->getCont()) - sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless - // LANG_hu section: spec. Hungarian rule - else if (langnum == LANG_hu && sptr->getKeyLen() && - sptr->getKey()[0] == 'i' && sptr->getKey()[1] != 'y' && - sptr->getKey()[1] != 't') { - sfxextra = 1; - } - // END of LANG_hu section - return rv; - } - } - sptr = sptr->getNextEQ(); - } else { - sptr = sptr->getNextNE(); - } - } - - return NULL; -} - -// check word for two-level suffixes - -struct hentry* AffixMgr::suffix_check_twosfx(const char* word, - int len, - int sfxopts, - PfxEntry* ppfx, - const FLAG needflag) { - struct hentry* rv = NULL; - - // first handle the special case of 0 length suffixes - SfxEntry* se = sStart[0]; - while (se) { - if (contclasses[se->getFlag()]) { - rv = se->check_twosfx(word, len, sfxopts, ppfx, needflag); - if (rv) - return rv; - } - se = se->getNext(); - } - - // now handle the general case - if (len == 0) - return NULL; // FULLSTRIP - unsigned char sp = *((const unsigned char*)(word + len - 1)); - SfxEntry* sptr = sStart[sp]; - - while (sptr) { - if (isRevSubset(sptr->getKey(), word + len - 1, len)) { - if (contclasses[sptr->getFlag()]) { - rv = sptr->check_twosfx(word, len, sfxopts, ppfx, needflag); - if (rv) { - sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless - if (!sptr->getCont()) - sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless - return rv; - } - } - sptr = sptr->getNextEQ(); - } else { - sptr = sptr->getNextNE(); - } - } - - return NULL; -} - -char* AffixMgr::suffix_check_twosfx_morph(const char* word, - int len, - int sfxopts, - PfxEntry* ppfx, - const FLAG needflag) { - std::string result; - std::string result2; - std::string result3; - - char* st; - - // first handle the special case of 0 length suffixes - SfxEntry* se = sStart[0]; - while (se) { - if (contclasses[se->getFlag()]) { - st = se->check_twosfx_morph(word, len, sfxopts, ppfx, needflag); - if (st) { - if (ppfx) { - if (ppfx->getMorph()) { - result.append(ppfx->getMorph()); - result.append(" "); - } else - debugflag(result, ppfx->getFlag()); - } - result.append(st); - free(st); - if (se->getMorph()) { - result.append(" "); - result.append(se->getMorph()); - } else - debugflag(result, se->getFlag()); - result.append("\n"); - } - } - se = se->getNext(); - } - - // now handle the general case - if (len == 0) - return NULL; // FULLSTRIP - unsigned char sp = *((const unsigned char*)(word + len - 1)); - SfxEntry* sptr = sStart[sp]; - - while (sptr) { - if (isRevSubset(sptr->getKey(), word + len - 1, len)) { - if (contclasses[sptr->getFlag()]) { - st = sptr->check_twosfx_morph(word, len, sfxopts, ppfx, needflag); - if (st) { - sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless - if (!sptr->getCont()) - sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless - result2.assign(st); - free(st); - - result3.clear(); - - if (sptr->getMorph()) { - result3.append(" "); - result3.append(sptr->getMorph()); - } else - debugflag(result3, sptr->getFlag()); - strlinecat(result2, result3); - result2.append("\n"); - result.append(result2); - } - } - sptr = sptr->getNextEQ(); - } else { - sptr = sptr->getNextNE(); - } - } - - if (!result.empty()) - return mystrdup(result.c_str()); - - return NULL; -} - -char* AffixMgr::suffix_check_morph(const char* word, - int len, - int sfxopts, - PfxEntry* ppfx, - const FLAG cclass, - const FLAG needflag, - char in_compound) { - char result[MAXLNLEN]; - - struct hentry* rv = NULL; - - result[0] = '\0'; - - PfxEntry* ep = ppfx; - - // first handle the special case of 0 length suffixes - SfxEntry* se = sStart[0]; - while (se) { - if (!cclass || se->getCont()) { - // suffixes are not allowed in beginning of compounds - if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass - // except when signed with compoundpermitflag flag - (se->getCont() && compoundpermitflag && - TESTAFF(se->getCont(), compoundpermitflag, se->getContLen()))) && - (!circumfix || - // no circumfix flag in prefix and suffix - ((!ppfx || !(ep->getCont()) || - !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && - (!se->getCont() || - !(TESTAFF(se->getCont(), circumfix, se->getContLen())))) || - // circumfix flag in prefix AND suffix - ((ppfx && (ep->getCont()) && - TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && - (se->getCont() && - (TESTAFF(se->getCont(), circumfix, se->getContLen()))))) && - // fogemorpheme - (in_compound || - !((se->getCont() && - (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) && - // needaffix on prefix or first suffix - (cclass || - !(se->getCont() && - TESTAFF(se->getCont(), needaffix, se->getContLen())) || - (ppfx && - !((ep->getCont()) && - TESTAFF(ep->getCont(), needaffix, ep->getContLen())))))) - rv = se->checkword(word, len, sfxopts, ppfx, NULL, 0, 0, cclass, - needflag); - while (rv) { - if (ppfx) { - if (ppfx->getMorph()) { - mystrcat(result, ppfx->getMorph(), MAXLNLEN); - mystrcat(result, " ", MAXLNLEN); - } else - debugflag(result, ppfx->getFlag()); - } - if (complexprefixes && HENTRY_DATA(rv)) - mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); - if (!HENTRY_FIND(rv, MORPH_STEM)) { - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, MORPH_STEM, MAXLNLEN); - mystrcat(result, HENTRY_WORD(rv), MAXLNLEN); - } - // store the pointer of the hash entry - // sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, - // rv); - - if (!complexprefixes && HENTRY_DATA(rv)) { - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); - } - if (se->getMorph()) { - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, se->getMorph(), MAXLNLEN); - } else - debugflag(result, se->getFlag()); - mystrcat(result, "\n", MAXLNLEN); - rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); - } - } - se = se->getNext(); - } - - // now handle the general case - if (len == 0) - return NULL; // FULLSTRIP - unsigned char sp = *((const unsigned char*)(word + len - 1)); - SfxEntry* sptr = sStart[sp]; - - while (sptr) { - if (isRevSubset(sptr->getKey(), word + len - 1, len)) { - // suffixes are not allowed in beginning of compounds - if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass - // except when signed with compoundpermitflag flag - (sptr->getCont() && compoundpermitflag && - TESTAFF(sptr->getCont(), compoundpermitflag, - sptr->getContLen()))) && - (!circumfix || - // no circumfix flag in prefix and suffix - ((!ppfx || !(ep->getCont()) || - !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && - (!sptr->getCont() || - !(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())))) || - // circumfix flag in prefix AND suffix - ((ppfx && (ep->getCont()) && - TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && - (sptr->getCont() && - (TESTAFF(sptr->getCont(), circumfix, sptr->getContLen()))))) && - // fogemorpheme - (in_compound || - !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, - sptr->getContLen()))))) && - // needaffix on first suffix - (cclass || - !(sptr->getCont() && - TESTAFF(sptr->getCont(), needaffix, sptr->getContLen()))))) - rv = sptr->checkword(word, len, sfxopts, ppfx, NULL, 0, 0, cclass, - needflag); - while (rv) { - if (ppfx) { - if (ppfx->getMorph()) { - mystrcat(result, ppfx->getMorph(), MAXLNLEN); - mystrcat(result, " ", MAXLNLEN); - } else - debugflag(result, ppfx->getFlag()); - } - if (complexprefixes && HENTRY_DATA(rv)) - mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); - if (!HENTRY_FIND(rv, MORPH_STEM)) { - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, MORPH_STEM, MAXLNLEN); - mystrcat(result, HENTRY_WORD(rv), MAXLNLEN); - } - // store the pointer of the hash entry - // sprintf(result + strlen(result), " %s%p", - // MORPH_HENTRY, rv); - - if (!complexprefixes && HENTRY_DATA(rv)) { - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); - } - - if (sptr->getMorph()) { - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, sptr->getMorph(), MAXLNLEN); - } else - debugflag(result, sptr->getFlag()); - mystrcat(result, "\n", MAXLNLEN); - rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); - } - sptr = sptr->getNextEQ(); - } else { - sptr = sptr->getNextNE(); - } - } - - if (*result) - return mystrdup(result); - return NULL; -} - -// check if word with affixes is correctly spelled -struct hentry* AffixMgr::affix_check(const char* word, - int len, - const FLAG needflag, - char in_compound) { - struct hentry* rv = NULL; - - // check all prefixes (also crossed with suffixes if allowed) - rv = prefix_check(word, len, in_compound, needflag); - if (rv) - return rv; - - // if still not found check all suffixes - rv = suffix_check(word, len, 0, NULL, NULL, 0, NULL, FLAG_NULL, needflag, - in_compound); - - if (havecontclass) { - sfx = NULL; - pfx = NULL; - - if (rv) - return rv; - // if still not found check all two-level suffixes - rv = suffix_check_twosfx(word, len, 0, NULL, needflag); - - if (rv) - return rv; - // if still not found check all two-level suffixes - rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag); - } - - return rv; -} - -// check if word with affixes is correctly spelled -char* AffixMgr::affix_check_morph(const char* word, - int len, - const FLAG needflag, - char in_compound) { - char result[MAXLNLEN]; - char* st = NULL; - - *result = '\0'; - - // check all prefixes (also crossed with suffixes if allowed) - st = prefix_check_morph(word, len, in_compound); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - - // if still not found check all suffixes - st = suffix_check_morph(word, len, 0, NULL, '\0', needflag, in_compound); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - - if (havecontclass) { - sfx = NULL; - pfx = NULL; - // if still not found check all two-level suffixes - st = suffix_check_twosfx_morph(word, len, 0, NULL, needflag); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - - // if still not found check all two-level suffixes - st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT, needflag); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - } - - return mystrdup(result); -} - -char* AffixMgr::morphgen(const char* ts, - int wl, - const unsigned short* ap, - unsigned short al, - const char* morph, - const char* targetmorph, - int level) { - // handle suffixes - if (!morph) - return NULL; - - // check substandard flag - if (TESTAFF(ap, substandard, al)) - return NULL; - - if (morphcmp(morph, targetmorph) == 0) - return mystrdup(ts); - - size_t stemmorphcatpos; - std::string mymorph; - - // use input suffix fields, if exist - if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) { - mymorph.assign(morph); - mymorph.append(" "); - stemmorphcatpos = mymorph.size(); - } else { - stemmorphcatpos = std::string::npos; - } - - for (int i = 0; i < al; i++) { - const unsigned char c = (unsigned char)(ap[i] & 0x00FF); - SfxEntry* sptr = sFlag[c]; - while (sptr) { - if (sptr->getFlag() == ap[i] && sptr->getMorph() && - ((sptr->getContLen() == 0) || - // don't generate forms with substandard affixes - !TESTAFF(sptr->getCont(), substandard, sptr->getContLen()))) { - const char* stemmorph; - if (stemmorphcatpos != std::string::npos) { - mymorph.replace(stemmorphcatpos, std::string::npos, sptr->getMorph()); - stemmorph = mymorph.c_str(); - } else { - stemmorph = sptr->getMorph(); - } - - int cmp = morphcmp(stemmorph, targetmorph); - - if (cmp == 0) { - char* newword = sptr->add(ts, wl); - if (newword) { - hentry* check = pHMgr->lookup(newword); // XXX extra dic - if (!check || !check->astr || - !(TESTAFF(check->astr, forbiddenword, check->alen) || - TESTAFF(check->astr, ONLYUPCASEFLAG, check->alen))) { - return newword; - } - free(newword); - } - } - - // recursive call for secondary suffixes - if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) && - // (get_sfxcount(stemmorph) < targetcount) && - !TESTAFF(sptr->getCont(), substandard, sptr->getContLen())) { - char* newword = sptr->add(ts, wl); - if (newword) { - char* newword2 = - morphgen(newword, strlen(newword), sptr->getCont(), - sptr->getContLen(), stemmorph, targetmorph, 1); - - if (newword2) { - free(newword); - return newword2; - } - free(newword); - newword = NULL; - } - } - } - sptr = sptr->getFlgNxt(); - } - } - return NULL; -} - -int AffixMgr::expand_rootword(struct guessword* wlst, - int maxn, - const char* ts, - int wl, - const unsigned short* ap, - unsigned short al, - const char* bad, - int badl, - char* phon) { - int nh = 0; - // first add root word to list - if ((nh < maxn) && - !(al && ((needaffix && TESTAFF(ap, needaffix, al)) || - (onlyincompound && TESTAFF(ap, onlyincompound, al))))) { - wlst[nh].word = mystrdup(ts); - if (!wlst[nh].word) - return 0; - wlst[nh].allow = (1 == 0); - wlst[nh].orig = NULL; - nh++; - // add special phonetic version - if (phon && (nh < maxn)) { - wlst[nh].word = mystrdup(phon); - if (!wlst[nh].word) - return nh - 1; - wlst[nh].allow = (1 == 0); - wlst[nh].orig = mystrdup(ts); - if (!wlst[nh].orig) - return nh - 1; - nh++; - } - } - - // handle suffixes - for (int i = 0; i < al; i++) { - const unsigned char c = (unsigned char)(ap[i] & 0x00FF); - SfxEntry* sptr = sFlag[c]; - while (sptr) { - if ((sptr->getFlag() == ap[i]) && - (!sptr->getKeyLen() || - ((badl > sptr->getKeyLen()) && - (strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) && - // check needaffix flag - !(sptr->getCont() && - ((needaffix && - TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) || - (circumfix && - TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) || - (onlyincompound && - TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) { - char* newword = sptr->add(ts, wl); - if (newword) { - if (nh < maxn) { - wlst[nh].word = newword; - wlst[nh].allow = sptr->allowCross(); - wlst[nh].orig = NULL; - nh++; - // add special phonetic version - if (phon && (nh < maxn)) { - std::string prefix(phon); - std::string key(sptr->getKey()); - reverseword(key); - prefix.append(key); - wlst[nh].word = mystrdup(prefix.c_str()); - if (!wlst[nh].word) - return nh - 1; - wlst[nh].allow = (1 == 0); - wlst[nh].orig = mystrdup(newword); - if (!wlst[nh].orig) - return nh - 1; - nh++; - } - } else { - free(newword); - } - } - } - sptr = sptr->getFlgNxt(); - } - } - - int n = nh; - - // handle cross products of prefixes and suffixes - for (int j = 1; j < n; j++) - if (wlst[j].allow) { - for (int k = 0; k < al; k++) { - const unsigned char c = (unsigned char)(ap[k] & 0x00FF); - PfxEntry* cptr = pFlag[c]; - while (cptr) { - if ((cptr->getFlag() == ap[k]) && cptr->allowCross() && - (!cptr->getKeyLen() || - ((badl > cptr->getKeyLen()) && - (strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) { - int l1 = strlen(wlst[j].word); - char* newword = cptr->add(wlst[j].word, l1); - if (newword) { - if (nh < maxn) { - wlst[nh].word = newword; - wlst[nh].allow = cptr->allowCross(); - wlst[nh].orig = NULL; - nh++; - } else { - free(newword); - } - } - } - cptr = cptr->getFlgNxt(); - } - } - } - - // now handle pure prefixes - for (int m = 0; m < al; m++) { - const unsigned char c = (unsigned char)(ap[m] & 0x00FF); - PfxEntry* ptr = pFlag[c]; - while (ptr) { - if ((ptr->getFlag() == ap[m]) && - (!ptr->getKeyLen() || - ((badl > ptr->getKeyLen()) && - (strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) && - // check needaffix flag - !(ptr->getCont() && - ((needaffix && - TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())) || - (circumfix && - TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) || - (onlyincompound && - TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen()))))) { - char* newword = ptr->add(ts, wl); - if (newword) { - if (nh < maxn) { - wlst[nh].word = newword; - wlst[nh].allow = ptr->allowCross(); - wlst[nh].orig = NULL; - nh++; - } else { - free(newword); - } - } - } - ptr = ptr->getFlgNxt(); - } - } - - return nh; -} - -// return length of replacing table -int AffixMgr::get_numrep() const { - return numrep; -} - -// return replacing table -struct replentry* AffixMgr::get_reptable() const { - if (!reptable) - return NULL; - return reptable; -} - -// return iconv table -RepList* AffixMgr::get_iconvtable() const { - if (!iconvtable) - return NULL; - return iconvtable; -} - -// return oconv table -RepList* AffixMgr::get_oconvtable() const { - if (!oconvtable) - return NULL; - return oconvtable; -} - -// return replacing table -struct phonetable* AffixMgr::get_phonetable() const { - if (!phone) - return NULL; - return phone; -} - -// return length of character map table -int AffixMgr::get_nummap() const { - return nummap; -} - -// return character map table -struct mapentry* AffixMgr::get_maptable() const { - if (!maptable) - return NULL; - return maptable; -} - -// return length of word break table -int AffixMgr::get_numbreak() const { - return numbreak; -} - -// return character map table -char** AffixMgr::get_breaktable() const { - if (!breaktable) - return NULL; - return breaktable; -} - -// return text encoding of dictionary -char* AffixMgr::get_encoding() { - if (!encoding) - encoding = mystrdup(SPELL_ENCODING); - return mystrdup(encoding); -} - -// return text encoding of dictionary -int AffixMgr::get_langnum() const { - return langnum; -} - -// return double prefix option -int AffixMgr::get_complexprefixes() const { - return complexprefixes; -} - -// return FULLSTRIP option -int AffixMgr::get_fullstrip() const { - return fullstrip; -} - -FLAG AffixMgr::get_keepcase() const { - return keepcase; -} - -FLAG AffixMgr::get_forceucase() const { - return forceucase; -} - -FLAG AffixMgr::get_warn() const { - return warn; -} - -int AffixMgr::get_forbidwarn() const { - return forbidwarn; -} - -int AffixMgr::get_checksharps() const { - return checksharps; -} - -char* AffixMgr::encode_flag(unsigned short aflag) const { - return pHMgr->encode_flag(aflag); -} - -// return the preferred ignore string for suggestions -char* AffixMgr::get_ignore() const { - if (!ignorechars) - return NULL; - return ignorechars; -} - -// return the preferred ignore string for suggestions -unsigned short* AffixMgr::get_ignore_utf16(int* len) const { - *len = ignorechars_utf16_len; - return ignorechars_utf16; -} - -// return the keyboard string for suggestions -char* AffixMgr::get_key_string() { - if (!keystring) - keystring = mystrdup(SPELL_KEYSTRING); - return mystrdup(keystring); -} - -// return the preferred try string for suggestions -char* AffixMgr::get_try_string() const { - if (!trystring) - return NULL; - return mystrdup(trystring); -} - -// return the preferred try string for suggestions -const char* AffixMgr::get_wordchars() const { - return wordchars; -} - -unsigned short* AffixMgr::get_wordchars_utf16(int* len) const { - *len = wordchars_utf16_len; - return wordchars_utf16; -} - -// is there compounding? -int AffixMgr::get_compound() const { - return compoundflag || compoundbegin || numdefcpd; -} - -// return the compound words control flag -FLAG AffixMgr::get_compoundflag() const { - return compoundflag; -} - -// return the forbidden words control flag -FLAG AffixMgr::get_forbiddenword() const { - return forbiddenword; -} - -// return the forbidden words control flag -FLAG AffixMgr::get_nosuggest() const { - return nosuggest; -} - -// return the forbidden words control flag -FLAG AffixMgr::get_nongramsuggest() const { - return nongramsuggest; -} - -// return the forbidden words flag modify flag -FLAG AffixMgr::get_needaffix() const { - return needaffix; -} - -// return the onlyincompound flag -FLAG AffixMgr::get_onlyincompound() const { - return onlyincompound; -} - -// return the compound word signal flag -FLAG AffixMgr::get_compoundroot() const { - return compoundroot; -} - -// return the compound begin signal flag -FLAG AffixMgr::get_compoundbegin() const { - return compoundbegin; -} - -// return the value of checknum -int AffixMgr::get_checknum() const { - return checknum; -} - -// return the value of prefix -const char* AffixMgr::get_prefix() const { - if (pfx) - return pfx->getKey(); - return NULL; -} - -// return the value of suffix -const char* AffixMgr::get_suffix() const { - return sfxappnd; -} - -// return the value of suffix -const char* AffixMgr::get_version() const { - return version; -} - -// return lemma_present flag -FLAG AffixMgr::get_lemma_present() const { - return lemma_present; -} - -// utility method to look up root words in hash table -struct hentry* AffixMgr::lookup(const char* word) { - int i; - struct hentry* he = NULL; - for (i = 0; i < *maxdic && !he; i++) { - he = (alldic[i])->lookup(word); - } - return he; -} - -// return the value of suffix -int AffixMgr::have_contclass() const { - return havecontclass; -} - -// return utf8 -int AffixMgr::get_utf8() const { - return utf8; -} - -int AffixMgr::get_maxngramsugs(void) const { - return maxngramsugs; -} - -int AffixMgr::get_maxcpdsugs(void) const { - return maxcpdsugs; -} - -int AffixMgr::get_maxdiff(void) const { - return maxdiff; -} - -int AffixMgr::get_onlymaxdiff(void) const { - return onlymaxdiff; -} - -// return nosplitsugs -int AffixMgr::get_nosplitsugs(void) const { - return nosplitsugs; -} - -// return sugswithdots -int AffixMgr::get_sugswithdots(void) const { - return sugswithdots; -} - -/* parse flag */ -int AffixMgr::parse_flag(char* line, unsigned short* out, FileMgr* af) { - char* s = NULL; - if (*out != FLAG_NULL && !(*out >= DEFAULTFLAGS)) { - HUNSPELL_WARNING( - stderr, - "error: line %d: multiple definitions of an affix file parameter\n", - af->getlinenum()); - return 1; - } - if (parse_string(line, &s, af->getlinenum())) - return 1; - *out = pHMgr->decode_flag(s); - free(s); - return 0; -} - -/* parse num */ -int AffixMgr::parse_num(char* line, int* out, FileMgr* af) { - char* s = NULL; - if (*out != -1) { - HUNSPELL_WARNING( - stderr, - "error: line %d: multiple definitions of an affix file parameter\n", - af->getlinenum()); - return 1; - } - if (parse_string(line, &s, af->getlinenum())) - return 1; - *out = atoi(s); - free(s); - return 0; -} - -/* parse in the max syllablecount of compound words and */ -int AffixMgr::parse_cpdsyllable(char* line, FileMgr* af) { - char* tp = line; - char* piece; - int i = 0; - int np = 0; - w_char w[MAXWORDLEN]; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - cpdmaxsyllable = atoi(piece); - np++; - break; - } - case 2: { - if (!utf8) { - cpdvowels = mystrdup(piece); - } else { - int n = u8_u16(w, MAXWORDLEN, piece); - if (n > 0) { - flag_qsort((unsigned short*)w, 0, n); - cpdvowels_utf16 = (w_char*)malloc(n * sizeof(w_char)); - if (!cpdvowels_utf16) - return 1; - memcpy(cpdvowels_utf16, w, n * sizeof(w_char)); - } - cpdvowels_utf16_len = n; - } - np++; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (np < 2) { - HUNSPELL_WARNING(stderr, - "error: line %d: missing compoundsyllable information\n", - af->getlinenum()); - return 1; - } - if (np == 2) - cpdvowels = mystrdup("aeiouAEIOU"); - return 0; -} - -/* parse in the typical fault correcting table */ -int AffixMgr::parse_reptable(char* line, FileMgr* af) { - if (numrep != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", - af->getlinenum()); - return 1; - } - char* tp = line; - char* piece; - int i = 0; - int np = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - numrep = atoi(piece); - if (numrep < 1) { - HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", - af->getlinenum()); - return 1; - } - reptable = (replentry*)malloc(numrep * sizeof(struct replentry)); - if (!reptable) - return 1; - np++; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (np != 2) { - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", - af->getlinenum()); - return 1; - } - - /* now parse the numrep lines to read in the remainder of the table */ - char* nl; - for (int j = 0; j < numrep; j++) { - if ((nl = af->getline()) == NULL) - return 1; - mychomp(nl); - tp = nl; - i = 0; - reptable[j].pattern = NULL; - reptable[j].pattern2 = NULL; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - if (strncmp(piece, "REP", 3) != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - numrep = 0; - return 1; - } - break; - } - case 1: { - if (*piece == '^') - reptable[j].start = true; - else - reptable[j].start = false; - reptable[j].pattern = - mystrrep(mystrdup(piece + int(reptable[j].start)), "_", " "); - int lr = strlen(reptable[j].pattern) - 1; - if (reptable[j].pattern[lr] == '$') { - reptable[j].end = true; - reptable[j].pattern[lr] = '\0'; - } else - reptable[j].end = false; - break; - } - case 2: { - reptable[j].pattern2 = mystrrep(mystrdup(piece), "_", " "); - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - numrep = 0; - return 1; - } - } - return 0; -} - -/* parse in the typical fault correcting table */ -int AffixMgr::parse_convtable(char* line, - FileMgr* af, - RepList** rl, - const char* keyword) { - if (*rl) { - HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", - af->getlinenum()); - return 1; - } - char* tp = line; - char* piece; - int i = 0; - int np = 0; - int numrl = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - numrl = atoi(piece); - if (numrl < 1) { - HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", - af->getlinenum()); - return 1; - } - *rl = new RepList(numrl); - if (!*rl) - return 1; - np++; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (np != 2) { - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", - af->getlinenum()); - return 1; - } - - /* now parse the num lines to read in the remainder of the table */ - char* nl; - for (int j = 0; j < numrl; j++) { - if (!(nl = af->getline())) - return 1; - mychomp(nl); - tp = nl; - i = 0; - char* pattern = NULL; - char* pattern2 = NULL; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - if (strncmp(piece, keyword, strlen(keyword)) != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - delete *rl; - *rl = NULL; - return 1; - } - break; - } - case 1: { - pattern = mystrrep(mystrdup(piece), "_", " "); - break; - } - case 2: { - pattern2 = mystrrep(mystrdup(piece), "_", " "); - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (!pattern || !pattern2) { - if (pattern) - free(pattern); - if (pattern2) - free(pattern2); - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - return 1; - } - (*rl)->add(pattern, pattern2); - } - return 0; -} - -/* parse in the typical fault correcting table */ -int AffixMgr::parse_phonetable(char* line, FileMgr* af) { - if (phone) { - HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", - af->getlinenum()); - return 1; - } - char* tp = line; - char* piece; - int i = 0; - int np = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - phone = (phonetable*)malloc(sizeof(struct phonetable)); - if (!phone) - return 1; - phone->num = atoi(piece); - phone->rules = NULL; - phone->utf8 = (char)utf8; - if (phone->num < 1) { - HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", - af->getlinenum()); - return 1; - } - phone->rules = (char**)malloc(2 * (phone->num + 1) * sizeof(char*)); - if (!phone->rules) { - free(phone); - phone = NULL; - return 1; - } - np++; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (np != 2) { - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", - af->getlinenum()); - return 1; - } - - /* now parse the phone->num lines to read in the remainder of the table */ - char* nl; - for (int j = 0; j < phone->num; j++) { - if (!(nl = af->getline())) - return 1; - mychomp(nl); - tp = nl; - i = 0; - phone->rules[j * 2] = NULL; - phone->rules[j * 2 + 1] = NULL; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - if (strncmp(piece, "PHONE", 5) != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - phone->num = 0; - return 1; - } - break; - } - case 1: { - phone->rules[j * 2] = mystrrep(mystrdup(piece), "_", ""); - break; - } - case 2: { - phone->rules[j * 2 + 1] = mystrrep(mystrdup(piece), "_", ""); - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if ((!(phone->rules[j * 2])) || (!(phone->rules[j * 2 + 1]))) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - phone->num = 0; - return 1; - } - } - phone->rules[phone->num * 2] = mystrdup(""); - phone->rules[phone->num * 2 + 1] = mystrdup(""); - init_phonet_hash(*phone); - return 0; -} - -/* parse in the checkcompoundpattern table */ -int AffixMgr::parse_checkcpdtable(char* line, FileMgr* af) { - if (numcheckcpd != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", - af->getlinenum()); - return 1; - } - char* tp = line; - char* piece; - int i = 0; - int np = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - numcheckcpd = atoi(piece); - if (numcheckcpd < 1) { - HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", - af->getlinenum()); - return 1; - } - checkcpdtable = - (patentry*)malloc(numcheckcpd * sizeof(struct patentry)); - if (!checkcpdtable) - return 1; - np++; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (np != 2) { - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", - af->getlinenum()); - return 1; - } - - /* now parse the numcheckcpd lines to read in the remainder of the table */ - char* nl; - for (int j = 0; j < numcheckcpd; j++) { - if (!(nl = af->getline())) - return 1; - mychomp(nl); - tp = nl; - i = 0; - checkcpdtable[j].pattern = NULL; - checkcpdtable[j].pattern2 = NULL; - checkcpdtable[j].pattern3 = NULL; - checkcpdtable[j].cond = FLAG_NULL; - checkcpdtable[j].cond2 = FLAG_NULL; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - if (strncmp(piece, "CHECKCOMPOUNDPATTERN", 20) != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - numcheckcpd = 0; - return 1; - } - break; - } - case 1: { - checkcpdtable[j].pattern = mystrdup(piece); - char* p = strchr(checkcpdtable[j].pattern, '/'); - if (p) { - *p = '\0'; - checkcpdtable[j].cond = pHMgr->decode_flag(p + 1); - } - break; - } - case 2: { - checkcpdtable[j].pattern2 = mystrdup(piece); - char* p = strchr(checkcpdtable[j].pattern2, '/'); - if (p) { - *p = '\0'; - checkcpdtable[j].cond2 = pHMgr->decode_flag(p + 1); - } - break; - } - case 3: { - checkcpdtable[j].pattern3 = mystrdup(piece); - simplifiedcpd = 1; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if ((!(checkcpdtable[j].pattern)) || (!(checkcpdtable[j].pattern2))) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - numcheckcpd = 0; - return 1; - } - } - return 0; -} - -/* parse in the compound rule table */ -int AffixMgr::parse_defcpdtable(char* line, FileMgr* af) { - if (numdefcpd != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", - af->getlinenum()); - return 1; - } - char* tp = line; - char* piece; - int i = 0; - int np = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - numdefcpd = atoi(piece); - if (numdefcpd < 1) { - HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", - af->getlinenum()); - return 1; - } - defcpdtable = (flagentry*)malloc(numdefcpd * sizeof(flagentry)); - if (!defcpdtable) - return 1; - np++; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (np != 2) { - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", - af->getlinenum()); - return 1; - } - - /* now parse the numdefcpd lines to read in the remainder of the table */ - char* nl; - for (int j = 0; j < numdefcpd; j++) { - if (!(nl = af->getline())) - return 1; - mychomp(nl); - tp = nl; - i = 0; - defcpdtable[j].def = NULL; - defcpdtable[j].len = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - if (strncmp(piece, "COMPOUNDRULE", 12) != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - numdefcpd = 0; - return 1; - } - break; - } - case 1: { // handle parenthesized flags - if (strchr(piece, '(')) { - defcpdtable[j].def = (FLAG*)malloc(strlen(piece) * sizeof(FLAG)); - defcpdtable[j].len = 0; - int end = 0; - FLAG* conv; - while (!end) { - char* par = piece + 1; - while (*par != '(' && *par != ')' && *par != '\0') - par++; - if (*par == '\0') - end = 1; - else - *par = '\0'; - if (*piece == '(') - piece++; - if (*piece == '*' || *piece == '?') { - defcpdtable[j].def[defcpdtable[j].len++] = (FLAG)*piece; - } else if (*piece != '\0') { - int l = pHMgr->decode_flags(&conv, piece, af); - for (int k = 0; k < l; k++) - defcpdtable[j].def[defcpdtable[j].len++] = conv[k]; - free(conv); - } - piece = par + 1; - } - } else { - defcpdtable[j].len = - pHMgr->decode_flags(&(defcpdtable[j].def), piece, af); - } - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (!defcpdtable[j].len) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - numdefcpd = 0; - return 1; - } - } - return 0; -} - -/* parse in the character map table */ -int AffixMgr::parse_maptable(char* line, FileMgr* af) { - if (nummap != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", - af->getlinenum()); - return 1; - } - char* tp = line; - char* piece; - int i = 0; - int np = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - nummap = atoi(piece); - if (nummap < 1) { - HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", - af->getlinenum()); - return 1; - } - maptable = (mapentry*)malloc(nummap * sizeof(struct mapentry)); - if (!maptable) - return 1; - np++; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (np != 2) { - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", - af->getlinenum()); - return 1; - } - - /* now parse the nummap lines to read in the remainder of the table */ - char* nl; - for (int j = 0; j < nummap; j++) { - if (!(nl = af->getline())) - return 1; - mychomp(nl); - tp = nl; - i = 0; - maptable[j].set = NULL; - maptable[j].len = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - if (strncmp(piece, "MAP", 3) != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - nummap = 0; - return 1; - } - break; - } - case 1: { - int setn = 0; - maptable[j].len = strlen(piece); - maptable[j].set = (char**)malloc(maptable[j].len * sizeof(char*)); - if (!maptable[j].set) - return 1; - for (int k = 0; k < maptable[j].len; k++) { - int chl = 1; - int chb = k; - if (piece[k] == '(') { - char* parpos = strchr(piece + k, ')'); - if (parpos != NULL) { - chb = k + 1; - chl = (int)(parpos - piece) - k - 1; - k = k + chl + 1; - } - } else { - if (utf8 && (piece[k] & 0xc0) == 0xc0) { - for (k++; utf8 && (piece[k] & 0xc0) == 0x80; k++) - ; - chl = k - chb; - k--; - } - } - maptable[j].set[setn] = (char*)malloc(chl + 1); - if (!maptable[j].set[setn]) - return 1; - strncpy(maptable[j].set[setn], piece + chb, chl); - maptable[j].set[setn][chl] = '\0'; - setn++; - } - maptable[j].len = setn; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (!maptable[j].set || !maptable[j].len) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - nummap = 0; - return 1; - } - } - return 0; -} - -/* parse in the word breakpoint table */ -int AffixMgr::parse_breaktable(char* line, FileMgr* af) { - if (numbreak > -1) { - HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", - af->getlinenum()); - return 1; - } - char* tp = line; - char* piece; - int i = 0; - int np = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - numbreak = atoi(piece); - if (numbreak < 0) { - HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", - af->getlinenum()); - return 1; - } - if (numbreak == 0) - return 0; - breaktable = (char**)malloc(numbreak * sizeof(char*)); - if (!breaktable) - return 1; - np++; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (np != 2) { - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", - af->getlinenum()); - return 1; - } - - /* now parse the numbreak lines to read in the remainder of the table */ - char* nl; - for (int j = 0; j < numbreak; j++) { - if (!(nl = af->getline())) - return 1; - mychomp(nl); - tp = nl; - i = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - if (strncmp(piece, "BREAK", 5) != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - numbreak = 0; - return 1; - } - break; - } - case 1: { - breaktable[j] = mystrdup(piece); - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (!breaktable) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - numbreak = 0; - return 1; - } - } - return 0; -} - -void AffixMgr::reverse_condition(char* piece) { - int neg = 0; - for (char* k = piece + strlen(piece) - 1; k >= piece; k--) { - switch (*k) { - case '[': { - if (neg) - *(k + 1) = '['; - else - *k = ']'; - break; - } - case ']': { - *k = '['; - if (neg) - *(k + 1) = '^'; - neg = 0; - break; - } - case '^': { - if (*(k + 1) == ']') - neg = 1; - else - *(k + 1) = *k; - break; - } - default: { - if (neg) - *(k + 1) = *k; - } - } - } -} - -int AffixMgr::parse_affix(char* line, - const char at, - FileMgr* af, - char* dupflags) { - int numents = 0; // number of affentry structures to parse - - unsigned short aflag = 0; // affix char identifier - - char ff = 0; - std::vector affentries; - - char* tp = line; - char* nl = line; - char* piece; - int i = 0; - -// checking lines with bad syntax -#ifdef DEBUG - int basefieldnum = 0; -#endif - - // split affix header line into pieces - - int np = 0; - - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - // piece 1 - is type of affix - case 0: { - np++; - break; - } - - // piece 2 - is affix char - case 1: { - np++; - aflag = pHMgr->decode_flag(piece); - if (((at == 'S') && (dupflags[aflag] & dupSFX)) || - ((at == 'P') && (dupflags[aflag] & dupPFX))) { - HUNSPELL_WARNING( - stderr, - "error: line %d: multiple definitions of an affix flag\n", - af->getlinenum()); - // return 1; XXX permissive mode for bad dictionaries - } - dupflags[aflag] += (char)((at == 'S') ? dupSFX : dupPFX); - break; - } - // piece 3 - is cross product indicator - case 2: { - np++; - if (*piece == 'Y') - ff = aeXPRODUCT; - break; - } - - // piece 4 - is number of affentries - case 3: { - np++; - numents = atoi(piece); - if ((numents <= 0) || ((::std::numeric_limits::max() / - sizeof(struct affentry)) < numents)) { - char* err = pHMgr->encode_flag(aflag); - if (err) { - HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", - af->getlinenum()); - free(err); - } - return 1; - } - affentries.resize(numents); - affentries[0].opts = ff; - if (utf8) - affentries[0].opts += aeUTF8; - if (pHMgr->is_aliasf()) - affentries[0].opts += aeALIASF; - if (pHMgr->is_aliasm()) - affentries[0].opts += aeALIASM; - affentries[0].aflag = aflag; - } - - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - // check to make sure we parsed enough pieces - if (np != 4) { - char* err = pHMgr->encode_flag(aflag); - if (err) { - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", - af->getlinenum()); - free(err); - } - return 1; - } - - // now parse numents affentries for this affix - std::vector::iterator start = affentries.begin(); - std::vector::iterator end = affentries.end(); - for (std::vector::iterator entry = start; entry != end; ++entry) { - if ((nl = af->getline()) == NULL) - return 1; - mychomp(nl); - tp = nl; - i = 0; - np = 0; - - // split line into pieces - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - // piece 1 - is type - case 0: { - np++; - if (entry != start) - entry->opts = start->opts & - (char)(aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM); - break; - } - - // piece 2 - is affix char - case 1: { - np++; - if (pHMgr->decode_flag(piece) != aflag) { - char* err = pHMgr->encode_flag(aflag); - if (err) { - HUNSPELL_WARNING(stderr, - "error: line %d: affix %s is corrupt\n", - af->getlinenum(), err); - free(err); - } - return 1; - } - - if (entry != start) - entry->aflag = start->aflag; - break; - } - - // piece 3 - is string to strip or 0 for null - case 2: { - np++; - if (complexprefixes) { - if (utf8) - reverseword_utf(piece); - else - reverseword(piece); - } - entry->strip = mystrdup(piece); - entry->stripl = (unsigned char)strlen(entry->strip); - if (strcmp(entry->strip, "0") == 0) { - free(entry->strip); - entry->strip = mystrdup(""); - entry->stripl = 0; - } - break; - } - - // piece 4 - is affix string or 0 for null - case 3: { - char* dash; - entry->morphcode = NULL; - entry->contclass = NULL; - entry->contclasslen = 0; - np++; - dash = strchr(piece, '/'); - if (dash) { - *dash = '\0'; - - if (ignorechars) { - if (utf8) { - remove_ignored_chars_utf(piece, ignorechars_utf16, - ignorechars_utf16_len); - } else { - remove_ignored_chars(piece, ignorechars); - } - } - - if (complexprefixes) { - if (utf8) - reverseword_utf(piece); - else - reverseword(piece); - } - entry->appnd = mystrdup(piece); - - if (pHMgr->is_aliasf()) { - int index = atoi(dash + 1); - entry->contclasslen = (unsigned short)pHMgr->get_aliasf( - index, &(entry->contclass), af); - if (!entry->contclasslen) - HUNSPELL_WARNING(stderr, - "error: bad affix flag alias: \"%s\"\n", - dash + 1); - } else { - entry->contclasslen = (unsigned short)pHMgr->decode_flags( - &(entry->contclass), dash + 1, af); - flag_qsort(entry->contclass, 0, entry->contclasslen); - } - *dash = '/'; - - havecontclass = 1; - for (unsigned short _i = 0; _i < entry->contclasslen; _i++) { - contclasses[(entry->contclass)[_i]] = 1; - } - } else { - if (ignorechars) { - if (utf8) { - remove_ignored_chars_utf(piece, ignorechars_utf16, - ignorechars_utf16_len); - } else { - remove_ignored_chars(piece, ignorechars); - } - } - - if (complexprefixes) { - if (utf8) - reverseword_utf(piece); - else - reverseword(piece); - } - entry->appnd = mystrdup(piece); - } - - entry->appndl = (unsigned char)strlen(entry->appnd); - if (strcmp(entry->appnd, "0") == 0) { - free(entry->appnd); - entry->appnd = mystrdup(""); - entry->appndl = 0; - } - break; - } - - // piece 5 - is the conditions descriptions - case 4: { - np++; - if (complexprefixes) { - if (utf8) - reverseword_utf(piece); - else - reverseword(piece); - reverse_condition(piece); - } - if (entry->stripl && (strcmp(piece, ".") != 0) && - redundant_condition(at, entry->strip, entry->stripl, piece, - af->getlinenum())) - strcpy(piece, "."); - if (at == 'S') { - reverseword(piece); - reverse_condition(piece); - } - if (encodeit(*entry, piece)) - return 1; - break; - } - - case 5: { - np++; - if (pHMgr->is_aliasm()) { - int index = atoi(piece); - entry->morphcode = pHMgr->get_aliasm(index); - } else { - if (complexprefixes) { // XXX - fix me for morph. gen. - if (utf8) - reverseword_utf(piece); - else - reverseword(piece); - } - // add the remaining of the line - if (*tp) { - *(tp - 1) = ' '; - tp = tp + strlen(tp); - } - entry->morphcode = mystrdup(piece); - if (!entry->morphcode) - return 1; - } - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - // check to make sure we parsed enough pieces - if (np < 4) { - char* err = pHMgr->encode_flag(aflag); - if (err) { - HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n", - af->getlinenum(), err); - free(err); - } - return 1; - } - -#ifdef DEBUG - // detect unnecessary fields, excepting comments - if (basefieldnum) { - int fieldnum = - !(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6); - if (fieldnum != basefieldnum) - HUNSPELL_WARNING(stderr, "warning: line %d: bad field number\n", - af->getlinenum()); - } else { - basefieldnum = - !(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6); - } -#endif - } - - // now create SfxEntry or PfxEntry objects and use links to - // build an ordered (sorted by affix string) list - for (std::vector::iterator entry = start; entry != end; ++entry) { - if (at == 'P') { - PfxEntry* pfxptr = new PfxEntry(this, &(*entry)); - build_pfxtree(pfxptr); - } else { - SfxEntry* sfxptr = new SfxEntry(this, &(*entry)); - build_sfxtree(sfxptr); - } - } - return 0; -} - -int AffixMgr::redundant_condition(char ft, - char* strip, - int stripl, - const char* cond, - int linenum) { - int condl = strlen(cond); - int i; - int j; - int neg; - int in; - if (ft == 'P') { // prefix - if (strncmp(strip, cond, condl) == 0) - return 1; - if (utf8) { - } else { - for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) { - if (cond[j] != '[') { - if (cond[j] != strip[i]) { - HUNSPELL_WARNING(stderr, - "warning: line %d: incompatible stripping " - "characters and condition\n", - linenum); - return 0; - } - } else { - neg = (cond[j + 1] == '^') ? 1 : 0; - in = 0; - do { - j++; - if (strip[i] == cond[j]) - in = 1; - } while ((j < (condl - 1)) && (cond[j] != ']')); - if (j == (condl - 1) && (cond[j] != ']')) { - HUNSPELL_WARNING(stderr, - "error: line %d: missing ] in condition:\n%s\n", - linenum, cond); - return 0; - } - if ((!neg && !in) || (neg && in)) { - HUNSPELL_WARNING(stderr, - "warning: line %d: incompatible stripping " - "characters and condition\n", - linenum); - return 0; - } - } - } - if (j >= condl) - return 1; - } - } else { // suffix - if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0) - return 1; - if (utf8) { - } else { - for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) { - if (cond[j] != ']') { - if (cond[j] != strip[i]) { - HUNSPELL_WARNING(stderr, - "warning: line %d: incompatible stripping " - "characters and condition\n", - linenum); - return 0; - } - } else { - in = 0; - do { - j--; - if (strip[i] == cond[j]) - in = 1; - } while ((j > 0) && (cond[j] != '[')); - if ((j == 0) && (cond[j] != '[')) { - HUNSPELL_WARNING(stderr, - "error: line: %d: missing ] in condition:\n%s\n", - linenum, cond); - return 0; - } - neg = (cond[j + 1] == '^') ? 1 : 0; - if ((!neg && !in) || (neg && in)) { - HUNSPELL_WARNING(stderr, - "warning: line %d: incompatible stripping " - "characters and condition\n", - linenum); - return 0; - } - } - } - if (j < 0) - return 1; - } - } - return 0; -} - -int AffixMgr::get_suffix_words(short unsigned* suff, - int len, - const char* root_word, - char** slst) { - int suff_words_cnt = 0; - short unsigned* start_ptr = suff; - for (int j = 0; j < SETSIZE; j++) { - SfxEntry* ptr = sStart[j]; - while (ptr) { - suff = start_ptr; - for (int i = 0; i < len; i++) { - if ((*suff) == ptr->getFlag()) { - std::string nw(root_word); - nw.append(ptr->getAffix()); - hentry* ht = ptr->checkword(nw.c_str(), nw.size(), 0, NULL, NULL, 0, - NULL, 0, 0, 0); - if (ht) { - slst[suff_words_cnt] = (char*)malloc(MAXWORDUTF8LEN * sizeof(char)); - if (slst[suff_words_cnt]) { - strcpy(slst[suff_words_cnt], nw.c_str()); - suff_words_cnt++; - } - } - } - suff++; - } - ptr = ptr->getNext(); - } - } - return suff_words_cnt; -} diff --git a/libs/hunspell/src/csutil.c++ b/libs/hunspell/src/csutil.c++ new file mode 100644 index 0000000000..d7411bb216 --- /dev/null +++ b/libs/hunspell/src/csutil.c++ @@ -0,0 +1,3194 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Hunspell, based on MySpell. + * + * The Initial Developers of the Original Code are + * Kevin Hendricks (MySpell) and Németh László (Hunspell). + * Portions created by the Initial Developers are Copyright (C) 2002-2005 + * the Initial Developers. All Rights Reserved. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include "csutil.hxx" +#include "atypes.hxx" +#include "langnum.hxx" + +// Unicode character encoding information +struct unicode_info { + unsigned short c; + unsigned short cupper; + unsigned short clower; +}; + +#ifdef _WIN32 +#include +#include +#endif + +#ifdef OPENOFFICEORG +#include +#else +#ifndef MOZILLA_CLIENT +#include "utf_info.cxx" +#define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info))) +#endif +#endif + +#ifdef MOZILLA_CLIENT +#include "nsCOMPtr.h" +#include "nsIUnicodeEncoder.h" +#include "nsIUnicodeDecoder.h" +#include "nsUnicharUtils.h" +#include "mozilla/dom/EncodingUtils.h" + +using mozilla::dom::EncodingUtils; +#endif + +struct unicode_info2 { + char cletter; + unsigned short cupper; + unsigned short clower; +}; + +static struct unicode_info2* utf_tbl = NULL; +static int utf_tbl_count = + 0; // utf_tbl can be used by multiple Hunspell instances + +FILE* myfopen(const char* path, const char* mode) { +#ifdef _WIN32 +#define WIN32_LONG_PATH_PREFIX "\\\\?\\" + if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) { + int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0); + wchar_t* buff = (wchar_t*)malloc(len * sizeof(wchar_t)); + wchar_t* buff2 = (wchar_t*)malloc(len * sizeof(wchar_t)); + FILE* f = NULL; + if (buff && buff2) { + MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len); + if (_wfullpath(buff2, buff, len) != NULL) { + f = _wfopen(buff2, (strcmp(mode, "r") == 0) ? L"r" : L"rb"); + } + free(buff); + free(buff2); + } + return f; + } +#endif + return fopen(path, mode); +} + +/* only UTF-16 (BMP) implementation */ +char* u16_u8(char* dest, int size, const w_char* src, int srclen) { + signed char* u8 = (signed char*)dest; + signed char* u8_max = (signed char*)(u8 + size); + const w_char* u2 = src; + const w_char* u2_max = src + srclen; + while ((u2 < u2_max) && (u8 < u8_max)) { + if (u2->h) { // > 0xFF + // XXX 4-byte haven't implemented yet. + if (u2->h >= 0x08) { // >= 0x800 (3-byte UTF-8 character) + *u8 = 0xe0 + (u2->h >> 4); + u8++; + if (u8 < u8_max) { + *u8 = 0x80 + ((u2->h & 0xf) << 2) + (u2->l >> 6); + u8++; + if (u8 < u8_max) { + *u8 = 0x80 + (u2->l & 0x3f); + u8++; + } + } + } else { // < 0x800 (2-byte UTF-8 character) + *u8 = 0xc0 + (u2->h << 2) + (u2->l >> 6); + u8++; + if (u8 < u8_max) { + *u8 = 0x80 + (u2->l & 0x3f); + u8++; + } + } + } else { // <= 0xFF + if (u2->l & 0x80) { // >0x80 (2-byte UTF-8 character) + *u8 = 0xc0 + (u2->l >> 6); + u8++; + if (u8 < u8_max) { + *u8 = 0x80 + (u2->l & 0x3f); + u8++; + } + } else { // < 0x80 (1-byte UTF-8 character) + *u8 = u2->l; + u8++; + } + } + u2++; + } + *u8 = '\0'; + return dest; +} + +std::string& u16_u8(std::string& dest, const std::vector& src) { + dest.clear(); + std::vector::const_iterator u2 = src.begin(); + std::vector::const_iterator u2_max = src.end(); + while (u2 < u2_max) { + signed char u8; + if (u2->h) { // > 0xFF + // XXX 4-byte haven't implemented yet. + if (u2->h >= 0x08) { // >= 0x800 (3-byte UTF-8 character) + u8 = 0xe0 + (u2->h >> 4); + dest.push_back(u8); + u8 = 0x80 + ((u2->h & 0xf) << 2) + (u2->l >> 6); + dest.push_back(u8); + u8 = 0x80 + (u2->l & 0x3f); + dest.push_back(u8); + } else { // < 0x800 (2-byte UTF-8 character) + u8 = 0xc0 + (u2->h << 2) + (u2->l >> 6); + dest.push_back(u8); + u8 = 0x80 + (u2->l & 0x3f); + dest.push_back(u8); + } + } else { // <= 0xFF + if (u2->l & 0x80) { // >0x80 (2-byte UTF-8 character) + u8 = 0xc0 + (u2->l >> 6); + dest.push_back(u8); + u8 = 0x80 + (u2->l & 0x3f); + dest.push_back(u8); + } else { // < 0x80 (1-byte UTF-8 character) + u8 = u2->l; + dest.push_back(u8); + } + } + ++u2; + } + return dest; +} + +/* only UTF-16 (BMP) implementation */ +int u8_u16(w_char* dest, int size, const char* src) { + const signed char* u8 = (const signed char*)src; + w_char* u2 = dest; + w_char* u2_max = u2 + size; + + while ((u2 < u2_max) && *u8) { + switch ((*u8) & 0xf0) { + case 0x00: + case 0x10: + case 0x20: + case 0x30: + case 0x40: + case 0x50: + case 0x60: + case 0x70: { + u2->h = 0; + u2->l = *u8; + break; + } + case 0x80: + case 0x90: + case 0xa0: + case 0xb0: { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Unexpected continuation bytes " + "in %ld. character position\n%s\n", + static_cast(u8 - (signed char*)src), src); + u2->h = 0xff; + u2->l = 0xfd; + break; + } + case 0xc0: + case 0xd0: { // 2-byte UTF-8 codes + if ((*(u8 + 1) & 0xc0) == 0x80) { + u2->h = (*u8 & 0x1f) >> 2; + u2->l = (*u8 << 6) + (*(u8 + 1) & 0x3f); + u8++; + } else { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Missing continuation byte in " + "%ld. character position:\n%s\n", + static_cast(u8 - (signed char*)src), src); + u2->h = 0xff; + u2->l = 0xfd; + } + break; + } + case 0xe0: { // 3-byte UTF-8 codes + if ((*(u8 + 1) & 0xc0) == 0x80) { + u2->h = ((*u8 & 0x0f) << 4) + ((*(u8 + 1) & 0x3f) >> 2); + u8++; + if ((*(u8 + 1) & 0xc0) == 0x80) { + u2->l = (*u8 << 6) + (*(u8 + 1) & 0x3f); + u8++; + } else { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Missing continuation byte " + "in %ld. character position:\n%s\n", + static_cast(u8 - (signed char*)src), src); + u2->h = 0xff; + u2->l = 0xfd; + } + } else { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Missing continuation byte in " + "%ld. character position:\n%s\n", + static_cast(u8 - (signed char*)src), src); + u2->h = 0xff; + u2->l = 0xfd; + } + break; + } + case 0xf0: { // 4 or more byte UTF-8 codes + HUNSPELL_WARNING( + stderr, "This UTF-8 encoding can't convert to UTF-16:\n%s\n", src); + u2->h = 0xff; + u2->l = 0xfd; + return -1; + } + } + u8++; + u2++; + } + return (int)(u2 - dest); +} + +int u8_u16(std::vector& dest, const std::string& src) { + dest.clear(); + std::string::const_iterator u8 = src.begin(); + std::string::const_iterator u8_max = src.end(); + + while (u8 < u8_max) { + w_char u2; + switch ((*u8) & 0xf0) { + case 0x00: + case 0x10: + case 0x20: + case 0x30: + case 0x40: + case 0x50: + case 0x60: + case 0x70: { + u2.h = 0; + u2.l = *u8; + break; + } + case 0x80: + case 0x90: + case 0xa0: + case 0xb0: { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Unexpected continuation bytes " + "in %ld. character position\n%s\n", + static_cast(std::distance(src.begin(), u8)), + src.c_str()); + u2.h = 0xff; + u2.l = 0xfd; + break; + } + case 0xc0: + case 0xd0: { // 2-byte UTF-8 codes + if ((*(u8 + 1) & 0xc0) == 0x80) { + u2.h = (*u8 & 0x1f) >> 2; + u2.l = (*u8 << 6) + (*(u8 + 1) & 0x3f); + ++u8; + } else { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Missing continuation byte in " + "%ld. character position:\n%s\n", + static_cast(std::distance(src.begin(), u8)), + src.c_str()); + u2.h = 0xff; + u2.l = 0xfd; + } + break; + } + case 0xe0: { // 3-byte UTF-8 codes + if ((*(u8 + 1) & 0xc0) == 0x80) { + u2.h = ((*u8 & 0x0f) << 4) + ((*(u8 + 1) & 0x3f) >> 2); + ++u8; + if ((*(u8 + 1) & 0xc0) == 0x80) { + u2.l = (*u8 << 6) + (*(u8 + 1) & 0x3f); + ++u8; + } else { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Missing continuation byte " + "in %ld. character position:\n%s\n", + static_cast(std::distance(src.begin(), u8)), + src.c_str()); + u2.h = 0xff; + u2.l = 0xfd; + } + } else { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Missing continuation byte in " + "%ld. character position:\n%s\n", + static_cast(std::distance(src.begin(), u8)), + src.c_str()); + u2.h = 0xff; + u2.l = 0xfd; + } + break; + } + case 0xf0: { // 4 or more byte UTF-8 codes + HUNSPELL_WARNING(stderr, + "This UTF-8 encoding can't convert to UTF-16:\n%s\n", + src.c_str()); + u2.h = 0xff; + u2.l = 0xfd; + dest.push_back(u2); + return -1; + } + } + dest.push_back(u2); + ++u8; + } + + return dest.size(); +} + +void flag_qsort(unsigned short flags[], int begin, int end) { + unsigned short reg; + if (end > begin) { + unsigned short pivot = flags[begin]; + int l = begin + 1; + int r = end; + while (l < r) { + if (flags[l] <= pivot) { + l++; + } else { + r--; + reg = flags[l]; + flags[l] = flags[r]; + flags[r] = reg; + } + } + l--; + reg = flags[begin]; + flags[begin] = flags[l]; + flags[l] = reg; + + flag_qsort(flags, begin, l); + flag_qsort(flags, r, end); + } +} + +int flag_bsearch(unsigned short flags[], unsigned short flag, int length) { + int mid; + int left = 0; + int right = length - 1; + while (left <= right) { + mid = (left + right) / 2; + if (flags[mid] == flag) + return 1; + if (flag < flags[mid]) + right = mid - 1; + else + left = mid + 1; + } + return 0; +} + +// strip strings into token based on single char delimiter +// acts like strsep() but only uses a delim char and not +// a delim string +// default delimiter: white space characters + +char* mystrsep(char** stringp, const char delim) { + char* mp = *stringp; + if (*mp != '\0') { + char* dp; + if (delim) { + dp = strchr(mp, delim); + } else { + // don't use isspace() here, the string can be in some random charset + // that's way different than the locale's + for (dp = mp; (*dp && *dp != ' ' && *dp != '\t'); dp++) + ; + if (!*dp) + dp = NULL; + } + if (dp) { + *stringp = dp + 1; + *dp = '\0'; + } else { + *stringp = mp + strlen(mp); + } + return mp; + } + return NULL; +} + +// replaces strdup with ansi version +char* mystrdup(const char* s) { + char* d = NULL; + if (s) { + size_t sl = strlen(s) + 1; + d = (char*)malloc(sl); + if (d) { + memcpy(d, s, sl); + } else { + HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); + } + } + return d; +} + +// strcat for limited length destination string +char* mystrcat(char* dest, const char* st, int max) { + int len; + int len2; + if (dest == NULL || st == NULL) + return dest; + len = strlen(dest); + len2 = strlen(st); + if (len + len2 + 1 > max) + return dest; + strcpy(dest + len, st); + return dest; +} + +// remove cross-platform text line end characters +void mychomp(char* s) { + size_t k = strlen(s); + if ((k > 0) && ((*(s + k - 1) == '\r') || (*(s + k - 1) == '\n'))) + *(s + k - 1) = '\0'; + if ((k > 1) && (*(s + k - 2) == '\r')) + *(s + k - 2) = '\0'; +} + +// does an ansi strdup of the reverse of a string +char* myrevstrdup(const char* s) { + char* d = NULL; + if (s) { + size_t sl = strlen(s); + d = (char*)malloc(sl + 1); + if (d) { + const char* p = s + sl - 1; + char* q = d; + while (p >= s) + *q++ = *p--; + *q = '\0'; + } else { + HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); + } + } + return d; +} + +// break text to lines +// return number of lines +int line_tok(const char* text, char*** lines, char breakchar) { + int linenum = 0; + if (!text) { + return linenum; + } + char* dup = mystrdup(text); + char* p = strchr(dup, breakchar); + while (p) { + linenum++; + *p = '\0'; + p++; + p = strchr(p, breakchar); + } + linenum++; + *lines = (char**)malloc(linenum * sizeof(char*)); + if (!(*lines)) { + free(dup); + return 0; + } + + p = dup; + int l = 0; + for (int i = 0; i < linenum; i++) { + if (*p != '\0') { + (*lines)[l] = mystrdup(p); + if (!(*lines)[l]) { + for (i = 0; i < l; i++) + free((*lines)[i]); + free(dup); + return 0; + } + l++; + } + p += strlen(p) + 1; + } + free(dup); + if (!l) { + free(*lines); + *lines = NULL; + } + return l; +} + +// uniq line in place +char* line_uniq(char* text, char breakchar) { + char** lines; + int linenum = line_tok(text, &lines, breakchar); + int i; + strcpy(text, lines[0]); + for (i = 1; i < linenum; i++) { + int dup = 0; + for (int j = 0; j < i; j++) { + if (strcmp(lines[i], lines[j]) == 0) { + dup = 1; + break; + } + } + if (!dup) { + if ((i > 1) || (*(lines[0]) != '\0')) { + sprintf(text + strlen(text), "%c", breakchar); + } + strcat(text, lines[i]); + } + } + for (i = 0; i < linenum; i++) { + free(lines[i]); + } + free(lines); + return text; +} + +// uniq and boundary for compound analysis: "1\n\2\n\1" -> " ( \1 | \2 ) " +char* line_uniq_app(char** text, char breakchar) { + if (!strchr(*text, breakchar)) { + return *text; + } + + char** lines; + int i; + int linenum = line_tok(*text, &lines, breakchar); + int dup = 0; + for (i = 0; i < linenum; i++) { + for (int j = 0; j < (i - 1); j++) { + if (strcmp(lines[i], lines[j]) == 0) { + *(lines[i]) = '\0'; + dup++; + break; + } + } + } + if ((linenum - dup) == 1) { + strcpy(*text, lines[0]); + freelist(&lines, linenum); + return *text; + } + char* newtext = (char*)malloc(strlen(*text) + 2 * linenum + 3 + 1); + if (newtext) { + free(*text); + *text = newtext; + } else { + freelist(&lines, linenum); + return *text; + } + strcpy(*text, " ( "); + for (i = 0; i < linenum; i++) + if (*(lines[i])) { + sprintf(*text + strlen(*text), "%s%s", lines[i], " | "); + } + (*text)[strlen(*text) - 2] = ')'; // " ) " + freelist(&lines, linenum); + return *text; +} + +// append s to ends of every lines in text +void strlinecat(char* dest, const char* s) { + char* dup = mystrdup(dest); + char* source = dup; + int len = strlen(s); + if (dup) { + while (*source) { + if (*source == '\n') { + strncpy(dest, s, len); + dest += len; + } + *dest = *source; + source++; + dest++; + } + strcpy(dest, s); + free(dup); + } +} + +// append s to ends of every lines in text +std::string& strlinecat(std::string& str, const std::string& apd) { + size_t pos = 0; + while ((pos = str.find('\n', pos)) != std::string::npos) { + str.insert(pos, apd); + pos += apd.length() + 1; + } + str.append(apd); + return str; +} + +// change \n to char c +char* tr(char* text, char oldc, char newc) { + char* p; + for (p = text; *p; p++) + if (*p == oldc) + *p = newc; + return text; +} + +// morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields +// in the first line of the inputs +// return 0, if inputs equal +// return 1, if inputs may equal with a secondary suffix +// otherwise return -1 +int morphcmp(const char* s, const char* t) { + int se = 0; + int te = 0; + const char* sl; + const char* tl; + const char* olds; + const char* oldt; + if (!s || !t) + return 1; + olds = s; + sl = strchr(s, '\n'); + s = strstr(s, MORPH_DERI_SFX); + if (!s || (sl && sl < s)) + s = strstr(olds, MORPH_INFL_SFX); + if (!s || (sl && sl < s)) { + s = strstr(olds, MORPH_TERM_SFX); + olds = NULL; + } + oldt = t; + tl = strchr(t, '\n'); + t = strstr(t, MORPH_DERI_SFX); + if (!t || (tl && tl < t)) + t = strstr(oldt, MORPH_INFL_SFX); + if (!t || (tl && tl < t)) { + t = strstr(oldt, MORPH_TERM_SFX); + oldt = NULL; + } + while (s && t && (!sl || sl > s) && (!tl || tl > t)) { + s += MORPH_TAG_LEN; + t += MORPH_TAG_LEN; + se = 0; + te = 0; + while ((*s == *t) && !se && !te) { + s++; + t++; + switch (*s) { + case ' ': + case '\n': + case '\t': + case '\0': + se = 1; + } + switch (*t) { + case ' ': + case '\n': + case '\t': + case '\0': + te = 1; + } + } + if (!se || !te) { + // not terminal suffix difference + if (olds) + return -1; + return 1; + } + olds = s; + s = strstr(s, MORPH_DERI_SFX); + if (!s || (sl && sl < s)) + s = strstr(olds, MORPH_INFL_SFX); + if (!s || (sl && sl < s)) { + s = strstr(olds, MORPH_TERM_SFX); + olds = NULL; + } + oldt = t; + t = strstr(t, MORPH_DERI_SFX); + if (!t || (tl && tl < t)) + t = strstr(oldt, MORPH_INFL_SFX); + if (!t || (tl && tl < t)) { + t = strstr(oldt, MORPH_TERM_SFX); + oldt = NULL; + } + } + if (!s && !t && se && te) + return 0; + return 1; +} + +int get_sfxcount(const char* morph) { + if (!morph || !*morph) + return 0; + int n = 0; + const char* old = morph; + morph = strstr(morph, MORPH_DERI_SFX); + if (!morph) + morph = strstr(old, MORPH_INFL_SFX); + if (!morph) + morph = strstr(old, MORPH_TERM_SFX); + while (morph) { + n++; + old = morph; + morph = strstr(morph + 1, MORPH_DERI_SFX); + if (!morph) + morph = strstr(old + 1, MORPH_INFL_SFX); + if (!morph) + morph = strstr(old + 1, MORPH_TERM_SFX); + } + return n; +} + +int fieldlen(const char* r) { + int n = 0; + while (r && *r != ' ' && *r != '\t' && *r != '\0' && *r != '\n') { + r++; + n++; + } + return n; +} + +char* copy_field(char* dest, const char* morph, const char* var) { + if (!morph) + return NULL; + const char* beg = strstr(morph, var); + if (beg) { + char* d = dest; + for (beg += MORPH_TAG_LEN; + *beg != ' ' && *beg != '\t' && *beg != '\n' && *beg != '\0'; + d++, beg++) { + *d = *beg; + } + *d = '\0'; + return dest; + } + return NULL; +} + +bool copy_field(std::string& dest, + const std::string& morph, + const std::string& var) { + if (morph.empty()) + return false; + size_t pos = morph.find(var); + if (pos == std::string::npos) + return false; + dest.clear(); + std::string beg(morph.substr(pos + MORPH_TAG_LEN, std::string::npos)); + + for (size_t i = 0; i < beg.size(); ++i) { + const char c(beg[i]); + if (c == ' ' || c == '\t' || c == '\n') + break; + dest.push_back(c); + } + + return true; +} + +std::string& mystrrep(std::string& str, + const std::string& search, + const std::string& replace) { + size_t pos = 0; + while ((pos = str.find(search, pos)) != std::string::npos) { + str.replace(pos, search.length(), replace); + pos += replace.length(); + } + return str; +} + +char* mystrrep(char* word, const char* pat, const char* rep) { + char* pos = strstr(word, pat); + if (pos) { + int replen = strlen(rep); + int patlen = strlen(pat); + while (pos) { + if (replen < patlen) { + char* end = word + strlen(word); + char* next = pos + replen; + char* prev = pos + strlen(pat); + for (; prev < end;* next = *prev, prev++, next++) + ; + *next = '\0'; + } else if (replen > patlen) { + char* end = pos + patlen; + char* next = word + strlen(word) + replen - patlen; + char* prev = next - replen + patlen; + for (; prev >= end;* next = *prev, prev--, next--) + ; + } + strncpy(pos, rep, replen); + pos = strstr(word, pat); + } + } + return word; +} + +// reverse word +int reverseword(char* word) { + char r; + for (char *dest = word + strlen(word) - 1; word < dest; word++, dest--) { + r = *word; + *word = *dest; + *dest = r; + } + return 0; +} + +// reverse word +std::string& reverseword(std::string& word) { + std::reverse(word.begin(), word.end()); + return word; +} + +// reverse word (error: 1) +int reverseword_utf(char* word) { + w_char w[MAXWORDLEN]; + w_char* p; + w_char r; + int l = u8_u16(w, MAXWORDLEN, word); + if (l == -1) + return 1; + p = w; + for (w_char *dest = w + l - 1; p < dest; p++, dest--) { + r = *p; + *p = *dest; + *dest = r; + } + u16_u8(word, MAXWORDUTF8LEN, w, l); + return 0; +} + +// reverse word +std::string& reverseword_utf(std::string& word) { + std::vector w; + u8_u16(w, word); + std::reverse(w.begin(), w.end()); + u16_u8(word, w); + return word; +} + +int uniqlist(char** list, int n) { + int i; + if (n < 2) + return n; + for (i = 0; i < n; i++) { + for (int j = 0; j < i; j++) { + if (list[j] && list[i] && (strcmp(list[j], list[i]) == 0)) { + free(list[i]); + list[i] = NULL; + break; + } + } + } + int m = 1; + for (i = 1; i < n; i++) + if (list[i]) { + list[m] = list[i]; + m++; + } + return m; +} + +void freelist(char*** list, int n) { + if (list && *list) { + for (int i = 0; i < n; i++) + free((*list)[i]); + free(*list); + *list = NULL; + } +} + +namespace { +unsigned char cupper(const struct cs_info* csconv, int nIndex) { + if (nIndex < 0 || nIndex > 255) + return nIndex; + return csconv[nIndex].cupper; +} + +unsigned char clower(const struct cs_info* csconv, int nIndex) { + if (nIndex < 0 || nIndex > 255) + return nIndex; + return csconv[nIndex].clower; +} + +unsigned char ccase(const struct cs_info* csconv, int nIndex) { + if (nIndex < 0 || nIndex > 255) + return nIndex; + return csconv[nIndex].ccase; +} +} + +// convert null terminated string to all caps +void mkallcap(char* p, const struct cs_info* csconv) { + while (*p != '\0') { + *p = cupper(csconv, static_cast(*p)); + p++; + } +} + +// convert std::string to all caps +std::string& mkallcap(std::string& s, const struct cs_info* csconv) { + for (std::string::iterator aI = s.begin(), aEnd = s.end(); aI != aEnd; ++aI) { + *aI = cupper(csconv, static_cast(*aI)); + } + return s; +} + +// convert null terminated string to all little +void mkallsmall(char* p, const struct cs_info* csconv) { + while (*p != '\0') { + *p = clower(csconv, static_cast(*p)); + p++; + } +} + +// convert std::string to all little +std::string& mkallsmall(std::string& s, const struct cs_info* csconv) { + for (std::string::iterator aI = s.begin(), aEnd = s.end(); aI != aEnd; ++aI) { + *aI = clower(csconv, static_cast(*aI)); + } + return s; +} + +void mkallsmall_utf(w_char* u, int nc, int langnum) { + for (int i = 0; i < nc; i++) { + unsigned short idx = (u[i].h << 8) + u[i].l; + if (idx != unicodetolower(idx, langnum)) { + u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); + u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); + } + } +} + +std::vector& mkallsmall_utf(std::vector& u, + int nc, + int langnum) { + for (int i = 0; i < nc; i++) { + unsigned short idx = (u[i].h << 8) + u[i].l; + if (idx != unicodetolower(idx, langnum)) { + u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); + u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); + } + } + return u; +} + +void mkallcap_utf(w_char* u, int nc, int langnum) { + for (int i = 0; i < nc; i++) { + unsigned short idx = (u[i].h << 8) + u[i].l; + if (idx != unicodetoupper(idx, langnum)) { + u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); + u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); + } + } +} + +std::vector& mkallcap_utf(std::vector& u, int nc, int langnum) { + for (int i = 0; i < nc; i++) { + unsigned short idx = (u[i].h << 8) + u[i].l; + if (idx != unicodetoupper(idx, langnum)) { + u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); + u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); + } + } + return u; +} + +// convert null terminated string to have initial capital +void mkinitcap(char* p, const struct cs_info* csconv) { + if (*p != '\0') + *p = cupper(csconv, static_cast(*p)); +} + +// conversion function for protected memory +void store_pointer(char* dest, char* source) { + memcpy(dest, &source, sizeof(char*)); +} + +// conversion function for protected memory +char* get_stored_pointer(const char* s) { + char* p; + memcpy(&p, s, sizeof(char*)); + return p; +} + +#ifndef MOZILLA_CLIENT +// convert null terminated string to all caps using encoding +void enmkallcap(char* d, const char* p, const char* encoding) + +{ + struct cs_info* csconv = get_current_cs(encoding); + while (*p != '\0') { + *d++ = cupper(csconv, static_cast(*p)); + p++; + } + *d = '\0'; +} + +// convert null terminated string to all little using encoding +void enmkallsmall(char* d, const char* p, const char* encoding) { + struct cs_info* csconv = get_current_cs(encoding); + while (*p != '\0') { + *d++ = clower(csconv, static_cast(*p)); + p++; + } + *d = '\0'; +} + +// convert null terminated string to have initial capital using encoding +void enmkinitcap(char* d, const char* p, const char* encoding) { + struct cs_info* csconv = get_current_cs(encoding); + memcpy(d, p, (strlen(p) + 1)); + if (*p != '\0') + *d = cupper(csconv, static_cast(*p)); +} + +// these are simple character mappings for the +// encodings supported +// supplying isupper, tolower, and toupper + +static struct cs_info iso1_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso2_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x01, 0xb3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x01, 0xb5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x01, 0xb9, 0xa9}, {0x01, 0xba, 0xaa}, + {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, + {0x01, 0xbe, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xa3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xa5}, {0x00, 0xb6, 0xa6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xa9}, + {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xae}, {0x00, 0xbf, 0xaf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso3_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x01, 0x69, 0xa9}, {0x01, 0xba, 0xaa}, + {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xa6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0x49}, + {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xaf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x00, 0xd0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso4_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x01, 0xb3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x01, 0xb5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x01, 0xb9, 0xa9}, {0x01, 0xba, 0xaa}, + {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, + {0x01, 0xbe, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xa3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xa5}, {0x00, 0xb6, 0xa6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xa9}, + {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xae}, {0x00, 0xbf, 0xbf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso5_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xf1, 0xa1}, + {0x01, 0xf2, 0xa2}, {0x01, 0xf3, 0xa3}, {0x01, 0xf4, 0xa4}, + {0x01, 0xf5, 0xa5}, {0x01, 0xf6, 0xa6}, {0x01, 0xf7, 0xa7}, + {0x01, 0xf8, 0xa8}, {0x01, 0xf9, 0xa9}, {0x01, 0xfa, 0xaa}, + {0x01, 0xfb, 0xab}, {0x01, 0xfc, 0xac}, {0x00, 0xad, 0xad}, + {0x01, 0xfe, 0xae}, {0x01, 0xff, 0xaf}, {0x01, 0xd0, 0xb0}, + {0x01, 0xd1, 0xb1}, {0x01, 0xd2, 0xb2}, {0x01, 0xd3, 0xb3}, + {0x01, 0xd4, 0xb4}, {0x01, 0xd5, 0xb5}, {0x01, 0xd6, 0xb6}, + {0x01, 0xd7, 0xb7}, {0x01, 0xd8, 0xb8}, {0x01, 0xd9, 0xb9}, + {0x01, 0xda, 0xba}, {0x01, 0xdb, 0xbb}, {0x01, 0xdc, 0xbc}, + {0x01, 0xdd, 0xbd}, {0x01, 0xde, 0xbe}, {0x01, 0xdf, 0xbf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x00, 0xd0, 0xb0}, {0x00, 0xd1, 0xb1}, + {0x00, 0xd2, 0xb2}, {0x00, 0xd3, 0xb3}, {0x00, 0xd4, 0xb4}, + {0x00, 0xd5, 0xb5}, {0x00, 0xd6, 0xb6}, {0x00, 0xd7, 0xb7}, + {0x00, 0xd8, 0xb8}, {0x00, 0xd9, 0xb9}, {0x00, 0xda, 0xba}, + {0x00, 0xdb, 0xbb}, {0x00, 0xdc, 0xbc}, {0x00, 0xdd, 0xbd}, + {0x00, 0xde, 0xbe}, {0x00, 0xdf, 0xbf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xa1}, {0x00, 0xf2, 0xa2}, + {0x00, 0xf3, 0xa3}, {0x00, 0xf4, 0xa4}, {0x00, 0xf5, 0xa5}, + {0x00, 0xf6, 0xa6}, {0x00, 0xf7, 0xa7}, {0x00, 0xf8, 0xa8}, + {0x00, 0xf9, 0xa9}, {0x00, 0xfa, 0xaa}, {0x00, 0xfb, 0xab}, + {0x00, 0xfc, 0xac}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xae}, + {0x00, 0xff, 0xaf}}; + +static struct cs_info iso6_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, + {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, + {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, + {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, + {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, + {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, + {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, + {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, + {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, + {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, + {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, + {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, + {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, + {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, + {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso7_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x01, 0xdc, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x01, 0xdd, 0xb8}, {0x01, 0xde, 0xb9}, + {0x01, 0xdf, 0xba}, {0x00, 0xbb, 0xbb}, {0x01, 0xfc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x01, 0xfd, 0xbe}, {0x01, 0xfe, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x00, 0xdc, 0xb6}, {0x00, 0xdd, 0xb8}, + {0x00, 0xde, 0xb9}, {0x00, 0xdf, 0xba}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd3}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xbc}, {0x00, 0xfd, 0xbe}, {0x00, 0xfe, 0xbf}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso8_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, + {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, + {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, + {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, + {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, + {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, + {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, + {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, + {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, + {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, + {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, + {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, + {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, + {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, + {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso9_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0xfd, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0xdd}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0x69, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0x49}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso10_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, + {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, + {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, + {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, + {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, + {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, + {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, + {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, + {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, + {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, + {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, + {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, + {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, + {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, + {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, + {0x00, 0xff, 0xff}}; + +static struct cs_info koi8r_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xb3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x01, 0xa3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xe0}, {0x00, 0xc1, 0xe1}, {0x00, 0xc2, 0xe2}, + {0x00, 0xc3, 0xe3}, {0x00, 0xc4, 0xe4}, {0x00, 0xc5, 0xe5}, + {0x00, 0xc6, 0xe6}, {0x00, 0xc7, 0xe7}, {0x00, 0xc8, 0xe8}, + {0x00, 0xc9, 0xe9}, {0x00, 0xca, 0xea}, {0x00, 0xcb, 0xeb}, + {0x00, 0xcc, 0xec}, {0x00, 0xcd, 0xed}, {0x00, 0xce, 0xee}, + {0x00, 0xcf, 0xef}, {0x00, 0xd0, 0xf0}, {0x00, 0xd1, 0xf1}, + {0x00, 0xd2, 0xf2}, {0x00, 0xd3, 0xf3}, {0x00, 0xd4, 0xf4}, + {0x00, 0xd5, 0xf5}, {0x00, 0xd6, 0xf6}, {0x00, 0xd7, 0xf7}, + {0x00, 0xd8, 0xf8}, {0x00, 0xd9, 0xf9}, {0x00, 0xda, 0xfa}, + {0x00, 0xdb, 0xfb}, {0x00, 0xdc, 0xfc}, {0x00, 0xdd, 0xfd}, + {0x00, 0xde, 0xfe}, {0x00, 0xdf, 0xff}, {0x01, 0xc0, 0xe0}, + {0x01, 0xc1, 0xe1}, {0x01, 0xc2, 0xe2}, {0x01, 0xc3, 0xe3}, + {0x01, 0xc4, 0xe4}, {0x01, 0xc5, 0xe5}, {0x01, 0xc6, 0xe6}, + {0x01, 0xc7, 0xe7}, {0x01, 0xc8, 0xe8}, {0x01, 0xc9, 0xe9}, + {0x01, 0xca, 0xea}, {0x01, 0xcb, 0xeb}, {0x01, 0xcc, 0xec}, + {0x01, 0xcd, 0xed}, {0x01, 0xce, 0xee}, {0x01, 0xcf, 0xef}, + {0x01, 0xd0, 0xf0}, {0x01, 0xd1, 0xf1}, {0x01, 0xd2, 0xf2}, + {0x01, 0xd3, 0xf3}, {0x01, 0xd4, 0xf4}, {0x01, 0xd5, 0xf5}, + {0x01, 0xd6, 0xf6}, {0x01, 0xd7, 0xf7}, {0x01, 0xd8, 0xf8}, + {0x01, 0xd9, 0xf9}, {0x01, 0xda, 0xfa}, {0x01, 0xdb, 0xfb}, + {0x01, 0xdc, 0xfc}, {0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe}, + {0x01, 0xdf, 0xff}}; + +static struct cs_info koi8u_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xb3}, {0x00, 0xa4, 0xb4}, /* ie */ + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xb6}, /* i */ + {0x00, 0xa7, 0xb7}, /* ii */ + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xbd}, /* g'' */ + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x01, 0xa3, 0xb3}, + {0x00, 0xb4, 0xb4}, /* IE */ + {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, /* I */ + {0x00, 0xb7, 0xb7}, /* II */ + {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, {0x00, 0xba, 0xba}, + {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, {0x00, 0xbd, 0xbd}, + {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, {0x00, 0xc0, 0xe0}, + {0x00, 0xc1, 0xe1}, {0x00, 0xc2, 0xe2}, {0x00, 0xc3, 0xe3}, + {0x00, 0xc4, 0xe4}, {0x00, 0xc5, 0xe5}, {0x00, 0xc6, 0xe6}, + {0x00, 0xc7, 0xe7}, {0x00, 0xc8, 0xe8}, {0x00, 0xc9, 0xe9}, + {0x00, 0xca, 0xea}, {0x00, 0xcb, 0xeb}, {0x00, 0xcc, 0xec}, + {0x00, 0xcd, 0xed}, {0x00, 0xce, 0xee}, {0x00, 0xcf, 0xef}, + {0x00, 0xd0, 0xf0}, {0x00, 0xd1, 0xf1}, {0x00, 0xd2, 0xf2}, + {0x00, 0xd3, 0xf3}, {0x00, 0xd4, 0xf4}, {0x00, 0xd5, 0xf5}, + {0x00, 0xd6, 0xf6}, {0x00, 0xd7, 0xf7}, {0x00, 0xd8, 0xf8}, + {0x00, 0xd9, 0xf9}, {0x00, 0xda, 0xfa}, {0x00, 0xdb, 0xfb}, + {0x00, 0xdc, 0xfc}, {0x00, 0xdd, 0xfd}, {0x00, 0xde, 0xfe}, + {0x00, 0xdf, 0xff}, {0x01, 0xc0, 0xe0}, {0x01, 0xc1, 0xe1}, + {0x01, 0xc2, 0xe2}, {0x01, 0xc3, 0xe3}, {0x01, 0xc4, 0xe4}, + {0x01, 0xc5, 0xe5}, {0x01, 0xc6, 0xe6}, {0x01, 0xc7, 0xe7}, + {0x01, 0xc8, 0xe8}, {0x01, 0xc9, 0xe9}, {0x01, 0xca, 0xea}, + {0x01, 0xcb, 0xeb}, {0x01, 0xcc, 0xec}, {0x01, 0xcd, 0xed}, + {0x01, 0xce, 0xee}, {0x01, 0xcf, 0xef}, {0x01, 0xd0, 0xf0}, + {0x01, 0xd1, 0xf1}, {0x01, 0xd2, 0xf2}, {0x01, 0xd3, 0xf3}, + {0x01, 0xd4, 0xf4}, {0x01, 0xd5, 0xf5}, {0x01, 0xd6, 0xf6}, + {0x01, 0xd7, 0xf7}, {0x01, 0xd8, 0xf8}, {0x01, 0xd9, 0xf9}, + {0x01, 0xda, 0xfa}, {0x01, 0xdb, 0xfb}, {0x01, 0xdc, 0xfc}, + {0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe}, {0x01, 0xdf, 0xff}}; + +static struct cs_info cp1251_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x01, 0x90, 0x80}, + {0x01, 0x83, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x81}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x01, 0x9a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x01, 0x9c, 0x8c}, + {0x01, 0x9d, 0x8d}, {0x01, 0x9e, 0x8e}, {0x01, 0x9f, 0x8f}, + {0x00, 0x90, 0x80}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x8a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x8c}, {0x00, 0x9d, 0x8d}, {0x00, 0x9e, 0x8e}, + {0x00, 0x9f, 0x8f}, {0x00, 0xa0, 0xa0}, {0x01, 0xa2, 0xa1}, + {0x00, 0xa2, 0xa1}, {0x01, 0xbc, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x01, 0xb4, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x01, 0xb8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x01, 0xba, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x01, 0xb3, 0xb2}, {0x00, 0xb3, 0xb2}, + {0x00, 0xb4, 0xa5}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xa8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xa3}, + {0x01, 0xbe, 0xbd}, {0x00, 0xbe, 0xbd}, {0x00, 0xbf, 0xaf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x01, 0xff, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xdf}}; + +static struct cs_info iso13_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0A, 0x0A}, {0x00, 0x0B, 0x0B}, + {0x00, 0x0C, 0x0C}, {0x00, 0x0D, 0x0D}, {0x00, 0x0E, 0x0E}, + {0x00, 0x0F, 0x0F}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1A, 0x1A}, + {0x00, 0x1B, 0x1B}, {0x00, 0x1C, 0x1C}, {0x00, 0x1D, 0x1D}, + {0x00, 0x1E, 0x1E}, {0x00, 0x1F, 0x1F}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2A, 0x2A}, {0x00, 0x2B, 0x2B}, {0x00, 0x2C, 0x2C}, + {0x00, 0x2D, 0x2D}, {0x00, 0x2E, 0x2E}, {0x00, 0x2F, 0x2F}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3A, 0x3A}, {0x00, 0x3B, 0x3B}, + {0x00, 0x3C, 0x3C}, {0x00, 0x3D, 0x3D}, {0x00, 0x3E, 0x3E}, + {0x00, 0x3F, 0x3F}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6A, 0x4A}, + {0x01, 0x6B, 0x4B}, {0x01, 0x6C, 0x4C}, {0x01, 0x6D, 0x4D}, + {0x01, 0x6E, 0x4E}, {0x01, 0x6F, 0x4F}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7A, 0x5A}, {0x00, 0x5B, 0x5B}, {0x00, 0x5C, 0x5C}, + {0x00, 0x5D, 0x5D}, {0x00, 0x5E, 0x5E}, {0x00, 0x5F, 0x5F}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6A, 0x4A}, {0x00, 0x6B, 0x4B}, + {0x00, 0x6C, 0x4C}, {0x00, 0x6D, 0x4D}, {0x00, 0x6E, 0x4E}, + {0x00, 0x6F, 0x4F}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7A, 0x5A}, + {0x00, 0x7B, 0x7B}, {0x00, 0x7C, 0x7C}, {0x00, 0x7D, 0x7D}, + {0x00, 0x7E, 0x7E}, {0x00, 0x7F, 0x7F}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8A, 0x8A}, {0x00, 0x8B, 0x8B}, {0x00, 0x8C, 0x8C}, + {0x00, 0x8D, 0x8D}, {0x00, 0x8E, 0x8E}, {0x00, 0x8F, 0x8F}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9A, 0x9A}, {0x00, 0x9B, 0x9B}, + {0x00, 0x9C, 0x9C}, {0x00, 0x9D, 0x9D}, {0x00, 0x9E, 0x9E}, + {0x00, 0x9F, 0x9F}, {0x00, 0xA0, 0xA0}, {0x00, 0xA1, 0xA1}, + {0x00, 0xA2, 0xA2}, {0x00, 0xA3, 0xA3}, {0x00, 0xA4, 0xA4}, + {0x00, 0xA5, 0xA5}, {0x00, 0xA6, 0xA6}, {0x00, 0xA7, 0xA7}, + {0x01, 0xB8, 0xA8}, {0x00, 0xA9, 0xA9}, {0x01, 0xBA, 0xAA}, + {0x00, 0xAB, 0xAB}, {0x00, 0xAC, 0xAC}, {0x00, 0xAD, 0xAD}, + {0x00, 0xAE, 0xAE}, {0x01, 0xBF, 0xAF}, {0x00, 0xB0, 0xB0}, + {0x00, 0xB1, 0xB1}, {0x00, 0xB2, 0xB2}, {0x00, 0xB3, 0xB3}, + {0x00, 0xB4, 0xB4}, {0x00, 0xB5, 0xB5}, {0x00, 0xB6, 0xB6}, + {0x00, 0xB7, 0xB7}, {0x00, 0xB8, 0xA8}, {0x00, 0xB9, 0xB9}, + {0x00, 0xBA, 0xAA}, {0x00, 0xBB, 0xBB}, {0x00, 0xBC, 0xBC}, + {0x00, 0xBD, 0xBD}, {0x00, 0xBE, 0xBE}, {0x00, 0xBF, 0xAF}, + {0x01, 0xE0, 0xC0}, {0x01, 0xE1, 0xC1}, {0x01, 0xE2, 0xC2}, + {0x01, 0xE3, 0xC3}, {0x01, 0xE4, 0xC4}, {0x01, 0xE5, 0xC5}, + {0x01, 0xE6, 0xC6}, {0x01, 0xE7, 0xC7}, {0x01, 0xE8, 0xC8}, + {0x01, 0xE9, 0xC9}, {0x01, 0xEA, 0xCA}, {0x01, 0xEB, 0xCB}, + {0x01, 0xEC, 0xCC}, {0x01, 0xED, 0xCD}, {0x01, 0xEE, 0xCE}, + {0x01, 0xEF, 0xCF}, {0x01, 0xF0, 0xD0}, {0x01, 0xF1, 0xD1}, + {0x01, 0xF2, 0xD2}, {0x01, 0xF3, 0xD3}, {0x01, 0xF4, 0xD4}, + {0x01, 0xF5, 0xD5}, {0x01, 0xF6, 0xD6}, {0x00, 0xD7, 0xD7}, + {0x01, 0xF8, 0xD8}, {0x01, 0xF9, 0xD9}, {0x01, 0xFA, 0xDA}, + {0x01, 0xFB, 0xDB}, {0x01, 0xFC, 0xDC}, {0x01, 0xFD, 0xDD}, + {0x01, 0xFE, 0xDE}, {0x00, 0xDF, 0xDF}, {0x00, 0xE0, 0xC0}, + {0x00, 0xE1, 0xC1}, {0x00, 0xE2, 0xC2}, {0x00, 0xE3, 0xC3}, + {0x00, 0xE4, 0xC4}, {0x00, 0xE5, 0xC5}, {0x00, 0xE6, 0xC6}, + {0x00, 0xE7, 0xC7}, {0x00, 0xE8, 0xC8}, {0x00, 0xE9, 0xC9}, + {0x00, 0xEA, 0xCA}, {0x00, 0xEB, 0xCB}, {0x00, 0xEC, 0xCC}, + {0x00, 0xED, 0xCD}, {0x00, 0xEE, 0xCE}, {0x00, 0xEF, 0xCF}, + {0x00, 0xF0, 0xD0}, {0x00, 0xF1, 0xD1}, {0x00, 0xF2, 0xD2}, + {0x00, 0xF3, 0xD3}, {0x00, 0xF4, 0xD4}, {0x00, 0xF5, 0xD5}, + {0x00, 0xF6, 0xD6}, {0x00, 0xF7, 0xF7}, {0x00, 0xF8, 0xD8}, + {0x00, 0xF9, 0xD9}, {0x00, 0xFA, 0xDA}, {0x00, 0xFB, 0xDB}, + {0x00, 0xFC, 0xDC}, {0x00, 0xFD, 0xDD}, {0x00, 0xFE, 0xDE}, + {0x00, 0xFF, 0xFF}}; + +static struct cs_info iso14_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xa2, 0xa1}, + {0x00, 0xa2, 0xa1}, {0x00, 0xa3, 0xa3}, {0x01, 0xa5, 0xa4}, + {0x00, 0xa5, 0xa4}, {0x01, 0xa6, 0xab}, {0x00, 0xa7, 0xa7}, + {0x01, 0xb8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x01, 0xba, 0xaa}, + {0x00, 0xab, 0xa6}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x01, 0xff, 0xaf}, {0x01, 0xb1, 0xb0}, + {0x00, 0xb1, 0xb0}, {0x01, 0xb3, 0xb2}, {0x00, 0xb3, 0xb2}, + {0x01, 0xb5, 0xb4}, {0x00, 0xb5, 0xb4}, {0x00, 0xb6, 0xb6}, + {0x01, 0xb9, 0xb7}, {0x00, 0xb8, 0xa8}, {0x00, 0xb9, 0xb6}, + {0x00, 0xba, 0xaa}, {0x01, 0xbf, 0xbb}, {0x00, 0xbc, 0xac}, + {0x01, 0xbe, 0xbd}, {0x00, 0xbe, 0xbd}, {0x00, 0xbf, 0xbb}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso15_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x01, 0xa8, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa6}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x01, 0xb8, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb4}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x01, 0xbd, 0xbc}, + {0x00, 0xbd, 0xbc}, {0x01, 0xff, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xbe}}; + +static struct cs_info iscii_devanagari_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, + {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, + {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, + {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, + {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, + {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, + {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, + {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, + {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, + {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, + {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, + {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, + {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, + {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, + {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, + {0x00, 0xff, 0xff}}; + +static struct cs_info tis620_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, + {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, + {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, + {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, + {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, + {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, + {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, + {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, + {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, + {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, + {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, + {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, + {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, + {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, + {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, + {0x00, 0xff, 0xff}}; + +struct enc_entry { + const char* enc_name; + struct cs_info* cs_table; +}; + +static struct enc_entry encds[] = { + {"iso88591", iso1_tbl}, // ISO-8859-1 + {"iso88592", iso2_tbl}, // ISO-8859-2 + {"iso88593", iso3_tbl}, // ISO-8859-3 + {"iso88594", iso4_tbl}, // ISO-8859-4 + {"iso88595", iso5_tbl}, // ISO-8859-5 + {"iso88596", iso6_tbl}, // ISO-8859-6 + {"iso88597", iso7_tbl}, // ISO-8859-7 + {"iso88598", iso8_tbl}, // ISO-8859-8 + {"iso88599", iso9_tbl}, // ISO-8859-9 + {"iso885910", iso10_tbl}, // ISO-8859-10 + {"tis620", tis620_tbl}, // TIS-620/ISO-8859-11 + {"tis6202533", tis620_tbl}, // TIS-620/ISO-8859-11 + {"iso885911", tis620_tbl}, // TIS-620/ISO-8859-11 + {"iso885913", iso13_tbl}, // ISO-8859-13 + {"iso885914", iso14_tbl}, // ISO-8859-14 + {"iso885915", iso15_tbl}, // ISO-8859-15 + {"koi8r", koi8r_tbl}, // KOI8-R + {"koi8u", koi8u_tbl}, // KOI8-U + {"cp1251", cp1251_tbl}, // CP-1251 + {"microsoftcp1251", cp1251_tbl}, // microsoft-cp1251 + {"xisciias", iscii_devanagari_tbl}, // x-iscii-as + {"isciidevanagari", iscii_devanagari_tbl} // ISCII-DEVANAGARI +}; + +/* map to lower case and remove non alphanumeric chars */ +static void toAsciiLowerAndRemoveNonAlphanumeric(const char* pName, + char* pBuf) { + while (*pName) { + /* A-Z */ + if ((*pName >= 0x41) && (*pName <= 0x5A)) { + *pBuf = (*pName) + 0x20; /* toAsciiLower */ + pBuf++; + } + /* a-z, 0-9 */ + else if (((*pName >= 0x61) && (*pName <= 0x7A)) || + ((*pName >= 0x30) && (*pName <= 0x39))) { + *pBuf = *pName; + pBuf++; + } + + pName++; + } + + *pBuf = '\0'; +} + +struct cs_info* get_current_cs(const char* es) { + char* normalized_encoding = new char[strlen(es) + 1]; + toAsciiLowerAndRemoveNonAlphanumeric(es, normalized_encoding); + + struct cs_info* ccs = NULL; + int n = sizeof(encds) / sizeof(encds[0]); + for (int i = 0; i < n; i++) { + if (strcmp(normalized_encoding, encds[i].enc_name) == 0) { + ccs = encds[i].cs_table; + break; + } + } + + delete[] normalized_encoding; + + if (!ccs) { + HUNSPELL_WARNING(stderr, + "error: unknown encoding %s: using %s as fallback\n", es, + encds[0].enc_name); + ccs = encds[0].cs_table; + } + + return ccs; +} +#else +// XXX This function was rewritten for mozilla. Instead of storing the +// conversion tables static in this file, create them when needed +// with help the mozilla backend. +struct cs_info* get_current_cs(const char* es) { + struct cs_info* ccs = new cs_info[256]; + // Initialze the array with dummy data so that we wouldn't need + // to return null in case of failures. + for (int i = 0; i <= 0xff; ++i) { + ccs[i].ccase = false; + ccs[i].clower = i; + ccs[i].cupper = i; + } + + nsCOMPtr encoder; + nsCOMPtr decoder; + + nsresult rv; + + nsAutoCString label(es); + nsAutoCString encoding; + if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) { + return ccs; + } + encoder = EncodingUtils::EncoderForEncoding(encoding); + decoder = EncodingUtils::DecoderForEncoding(encoding); + encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nullptr, '?'); + decoder->SetInputErrorBehavior(decoder->kOnError_Signal); + + for (unsigned int i = 0; i <= 0xff; ++i) { + bool success = false; + // We want to find the upper/lowercase equivalents of each byte + // in this 1-byte character encoding. Call our encoding/decoding + // APIs separately for each byte since they may reject some of the + // bytes, and we want to handle errors separately for each byte. + char lower, upper; + do { + if (i == 0) + break; + const char source = char(i); + char16_t uni, uniCased; + int32_t charLength = 1, uniLength = 1; + + rv = decoder->Convert(&source, &charLength, &uni, &uniLength); + // Explicitly check NS_OK because we don't want to allow + // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. + if (rv != NS_OK || charLength != 1 || uniLength != 1) + break; + uniCased = ToLowerCase(uni); + rv = encoder->Convert(&uniCased, &uniLength, &lower, &charLength); + // Explicitly check NS_OK because we don't want to allow + // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. + if (rv != NS_OK || charLength != 1 || uniLength != 1) + break; + + uniCased = ToUpperCase(uni); + rv = encoder->Convert(&uniCased, &uniLength, &upper, &charLength); + // Explicitly check NS_OK because we don't want to allow + // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. + if (rv != NS_OK || charLength != 1 || uniLength != 1) + break; + + success = true; + } while (0); + + if (success) { + ccs[i].cupper = upper; + ccs[i].clower = lower; + } else { + ccs[i].cupper = i; + ccs[i].clower = i; + } + + if (ccs[i].clower != (unsigned char)i) + ccs[i].ccase = true; + else + ccs[i].ccase = false; + } + + return ccs; +} +#endif + +// primitive isalpha() replacement for tokenization +char* get_casechars(const char* enc) { + struct cs_info* csconv = get_current_cs(enc); + char expw[MAXLNLEN]; + char* p = expw; + for (int i = 0; i <= 255; i++) { + if (cupper(csconv, i) != clower(csconv, i)) { + *p = static_cast(i); + p++; + } + } + *p = '\0'; +#ifdef MOZILLA_CLIENT + delete[] csconv; +#endif + return mystrdup(expw); +} + +// language to encoding default map + +struct lang_map { + const char* lang; + int num; +}; + +static struct lang_map lang2enc[] = + {{"ar", LANG_ar}, {"az", LANG_az}, + {"az_AZ", LANG_az}, // for back-compatibility + {"bg", LANG_bg}, {"ca", LANG_ca}, + {"cs", LANG_cs}, {"da", LANG_da}, + {"de", LANG_de}, {"el", LANG_el}, + {"en", LANG_en}, {"es", LANG_es}, + {"eu", LANG_eu}, {"gl", LANG_gl}, + {"fr", LANG_fr}, {"hr", LANG_hr}, + {"hu", LANG_hu}, {"hu_HU", LANG_hu}, // for back-compatibility + {"it", LANG_it}, {"la", LANG_la}, + {"lv", LANG_lv}, {"nl", LANG_nl}, + {"pl", LANG_pl}, {"pt", LANG_pt}, + {"sv", LANG_sv}, {"tr", LANG_tr}, + {"tr_TR", LANG_tr}, // for back-compatibility + {"ru", LANG_ru}, {"uk", LANG_uk}}; + +int get_lang_num(const char* lang) { + int n = sizeof(lang2enc) / sizeof(lang2enc[0]); + for (int i = 0; i < n; i++) { + if (strcmp(lang, lang2enc[i].lang) == 0) { + return lang2enc[i].num; + } + } + return LANG_xx; +} + +#ifndef OPENOFFICEORG +#ifndef MOZILLA_CLIENT +int initialize_utf_tbl() { + utf_tbl_count++; + if (utf_tbl) + return 0; + utf_tbl = (unicode_info2*)malloc(CONTSIZE * sizeof(unicode_info2)); + if (utf_tbl) { + size_t j; + for (j = 0; j < CONTSIZE; j++) { + utf_tbl[j].cletter = 0; + utf_tbl[j].clower = (unsigned short)j; + utf_tbl[j].cupper = (unsigned short)j; + } + for (j = 0; j < UTF_LST_LEN; j++) { + utf_tbl[utf_lst[j].c].cletter = 1; + utf_tbl[utf_lst[j].c].clower = utf_lst[j].clower; + utf_tbl[utf_lst[j].c].cupper = utf_lst[j].cupper; + } + } else + return 1; + return 0; +} +#endif +#endif + +void free_utf_tbl() { + if (utf_tbl_count > 0) + utf_tbl_count--; + if (utf_tbl && (utf_tbl_count == 0)) { + free(utf_tbl); + utf_tbl = NULL; + } +} + +unsigned short unicodetoupper(unsigned short c, int langnum) { + // In Azeri and Turkish, I and i dictinct letters: + // There are a dotless lower case i pair of upper `I', + // and an upper I with dot pair of lower `i'. + if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr))) + return 0x0130; +#ifdef OPENOFFICEORG + return static_cast(u_toupper(c)); +#else +#ifdef MOZILLA_CLIENT + return ToUpperCase((char16_t)c); +#else + return (utf_tbl) ? utf_tbl[c].cupper : c; +#endif +#endif +} + +unsigned short unicodetolower(unsigned short c, int langnum) { + // In Azeri and Turkish, I and i dictinct letters: + // There are a dotless lower case i pair of upper `I', + // and an upper I with dot pair of lower `i'. + if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr))) + return 0x0131; +#ifdef OPENOFFICEORG + return static_cast(u_tolower(c)); +#else +#ifdef MOZILLA_CLIENT + return ToLowerCase((char16_t)c); +#else + return (utf_tbl) ? utf_tbl[c].clower : c; +#endif +#endif +} + +int unicodeisalpha(unsigned short c) { +#ifdef OPENOFFICEORG + return u_isalpha(c); +#else + return (utf_tbl) ? utf_tbl[c].cletter : 0; +#endif +} + +/* get type of capitalization */ +int get_captype(char* word, int nl, cs_info* csconv) { + // now determine the capitalization type of the first nl letters + int ncap = 0; + int nneutral = 0; + int firstcap = 0; + if (csconv == NULL) + return NOCAP; + for (char* q = word; *q != '\0'; q++) { + unsigned char nIndex = static_cast(*q); + if (ccase(csconv, nIndex)) + ncap++; + if (cupper(csconv, nIndex) == clower(csconv, nIndex)) + nneutral++; + } + if (ncap) { + unsigned char nIndex = static_cast(word[0]); + firstcap = csconv[nIndex].ccase; + } + + // now finally set the captype + if (ncap == 0) { + return NOCAP; + } else if ((ncap == 1) && firstcap) { + return INITCAP; + } else if ((ncap == nl) || ((ncap + nneutral) == nl)) { + return ALLCAP; + } else if ((ncap > 1) && firstcap) { + return HUHINITCAP; + } + return HUHCAP; +} + +int get_captype_utf8(w_char* word, int nl, int langnum) { + // now determine the capitalization type of the first nl letters + int ncap = 0; + int nneutral = 0; + int firstcap = 0; + unsigned short idx; + // don't check too long words + if (nl >= MAXWORDLEN) + return 0; + // big Unicode character (non BMP area) + if (nl == -1) + return NOCAP; + for (int i = 0; i < nl; i++) { + idx = (word[i].h << 8) + word[i].l; + if (idx != unicodetolower(idx, langnum)) + ncap++; + if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) + nneutral++; + } + if (ncap) { + idx = (word[0].h << 8) + word[0].l; + firstcap = (idx != unicodetolower(idx, langnum)); + } + + // now finally set the captype + if (ncap == 0) { + return NOCAP; + } else if ((ncap == 1) && firstcap) { + return INITCAP; + } else if ((ncap == nl) || ((ncap + nneutral) == nl)) { + return ALLCAP; + } else if ((ncap > 1) && firstcap) { + return HUHINITCAP; + } + return HUHCAP; +} + +// strip all ignored characters in the string +void remove_ignored_chars_utf(char* word, + unsigned short ignored_chars[], + int ignored_len) { + w_char w[MAXWORDLEN]; + w_char w2[MAXWORDLEN]; + int i; + int j; + int len = u8_u16(w, MAXWORDLEN, word); + for (i = 0, j = 0; i < len; i++) { + if (!flag_bsearch(ignored_chars, ((unsigned short*)w)[i], ignored_len)) { + w2[j] = w[i]; + j++; + } + } + if (j < i) + u16_u8(word, MAXWORDUTF8LEN, w2, j); +} + +namespace { +union w_s { + w_char w; + unsigned short s; +}; + +unsigned short asushort(w_char in) { + w_s c; + c.w = in; + return c.s; +} +} + +// strip all ignored characters in the string +std::string& remove_ignored_chars_utf(std::string& word, + unsigned short ignored_chars[], + int ignored_len) { + std::vector w; + std::vector w2; + u8_u16(w, word); + + for (size_t i = 0; i < w.size(); ++i) { + if (!flag_bsearch(ignored_chars, asushort(w[i]), ignored_len)) + w2.push_back(w[i]); + } + + u16_u8(word, w2); + return word; +} + +// strip all ignored characters in the string +void remove_ignored_chars(char* word, char* ignored_chars) { + for (char* p = word; *p != '\0'; p++) { + if (!strchr(ignored_chars, *p)) { + *word = *p; + word++; + } + } + *word = '\0'; +} + +namespace { +class is_any_of { + public: + is_any_of(const std::string& in) : chars(in) {} + + bool operator()(char c) { return chars.find(c) != std::string::npos; } + + private: + const std::string& chars; +}; +} + +// strip all ignored characters in the string +std::string& remove_ignored_chars(std::string& word, + const std::string& ignored_chars) { + word.erase( + std::remove_if(word.begin(), word.end(), is_any_of(ignored_chars))); + return word; +} + +int parse_string(char* line, char** out, int ln) { + char* tp = line; + char* piece; + int i = 0; + int np = 0; + if (*out) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions\n", ln); + return 1; + } + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + *out = mystrdup(piece); + if (!*out) + return 1; + np++; + break; + } + default: + break; + } + i++; + } + // free(piece); + piece = mystrsep(&tp, 0); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", ln); + return 1; + } + return 0; +} + +int parse_array(char* line, + char** out, + unsigned short** out_utf16, + int* out_utf16_len, + int utf8, + int ln) { + if (parse_string(line, out, ln)) + return 1; + if (utf8) { + w_char w[MAXWORDLEN]; + int n = u8_u16(w, MAXWORDLEN, *out); + if (n > 0) { + flag_qsort((unsigned short*)w, 0, n); + *out_utf16 = (unsigned short*)malloc(n * sizeof(unsigned short)); + if (!*out_utf16) + return 1; + memcpy(*out_utf16, w, n * sizeof(unsigned short)); + } + *out_utf16_len = n; + } + return 0; +} diff --git a/libs/hunspell/src/csutil.cxx b/libs/hunspell/src/csutil.cxx deleted file mode 100644 index d7411bb216..0000000000 --- a/libs/hunspell/src/csutil.cxx +++ /dev/null @@ -1,3194 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -/* - * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada - * And Contributors. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. All modifications to the source code must be clearly marked as - * such. Binary redistributions based on modified source code - * must be clearly marked as modified versions in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include - -#include "csutil.hxx" -#include "atypes.hxx" -#include "langnum.hxx" - -// Unicode character encoding information -struct unicode_info { - unsigned short c; - unsigned short cupper; - unsigned short clower; -}; - -#ifdef _WIN32 -#include -#include -#endif - -#ifdef OPENOFFICEORG -#include -#else -#ifndef MOZILLA_CLIENT -#include "utf_info.cxx" -#define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info))) -#endif -#endif - -#ifdef MOZILLA_CLIENT -#include "nsCOMPtr.h" -#include "nsIUnicodeEncoder.h" -#include "nsIUnicodeDecoder.h" -#include "nsUnicharUtils.h" -#include "mozilla/dom/EncodingUtils.h" - -using mozilla::dom::EncodingUtils; -#endif - -struct unicode_info2 { - char cletter; - unsigned short cupper; - unsigned short clower; -}; - -static struct unicode_info2* utf_tbl = NULL; -static int utf_tbl_count = - 0; // utf_tbl can be used by multiple Hunspell instances - -FILE* myfopen(const char* path, const char* mode) { -#ifdef _WIN32 -#define WIN32_LONG_PATH_PREFIX "\\\\?\\" - if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) { - int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0); - wchar_t* buff = (wchar_t*)malloc(len * sizeof(wchar_t)); - wchar_t* buff2 = (wchar_t*)malloc(len * sizeof(wchar_t)); - FILE* f = NULL; - if (buff && buff2) { - MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len); - if (_wfullpath(buff2, buff, len) != NULL) { - f = _wfopen(buff2, (strcmp(mode, "r") == 0) ? L"r" : L"rb"); - } - free(buff); - free(buff2); - } - return f; - } -#endif - return fopen(path, mode); -} - -/* only UTF-16 (BMP) implementation */ -char* u16_u8(char* dest, int size, const w_char* src, int srclen) { - signed char* u8 = (signed char*)dest; - signed char* u8_max = (signed char*)(u8 + size); - const w_char* u2 = src; - const w_char* u2_max = src + srclen; - while ((u2 < u2_max) && (u8 < u8_max)) { - if (u2->h) { // > 0xFF - // XXX 4-byte haven't implemented yet. - if (u2->h >= 0x08) { // >= 0x800 (3-byte UTF-8 character) - *u8 = 0xe0 + (u2->h >> 4); - u8++; - if (u8 < u8_max) { - *u8 = 0x80 + ((u2->h & 0xf) << 2) + (u2->l >> 6); - u8++; - if (u8 < u8_max) { - *u8 = 0x80 + (u2->l & 0x3f); - u8++; - } - } - } else { // < 0x800 (2-byte UTF-8 character) - *u8 = 0xc0 + (u2->h << 2) + (u2->l >> 6); - u8++; - if (u8 < u8_max) { - *u8 = 0x80 + (u2->l & 0x3f); - u8++; - } - } - } else { // <= 0xFF - if (u2->l & 0x80) { // >0x80 (2-byte UTF-8 character) - *u8 = 0xc0 + (u2->l >> 6); - u8++; - if (u8 < u8_max) { - *u8 = 0x80 + (u2->l & 0x3f); - u8++; - } - } else { // < 0x80 (1-byte UTF-8 character) - *u8 = u2->l; - u8++; - } - } - u2++; - } - *u8 = '\0'; - return dest; -} - -std::string& u16_u8(std::string& dest, const std::vector& src) { - dest.clear(); - std::vector::const_iterator u2 = src.begin(); - std::vector::const_iterator u2_max = src.end(); - while (u2 < u2_max) { - signed char u8; - if (u2->h) { // > 0xFF - // XXX 4-byte haven't implemented yet. - if (u2->h >= 0x08) { // >= 0x800 (3-byte UTF-8 character) - u8 = 0xe0 + (u2->h >> 4); - dest.push_back(u8); - u8 = 0x80 + ((u2->h & 0xf) << 2) + (u2->l >> 6); - dest.push_back(u8); - u8 = 0x80 + (u2->l & 0x3f); - dest.push_back(u8); - } else { // < 0x800 (2-byte UTF-8 character) - u8 = 0xc0 + (u2->h << 2) + (u2->l >> 6); - dest.push_back(u8); - u8 = 0x80 + (u2->l & 0x3f); - dest.push_back(u8); - } - } else { // <= 0xFF - if (u2->l & 0x80) { // >0x80 (2-byte UTF-8 character) - u8 = 0xc0 + (u2->l >> 6); - dest.push_back(u8); - u8 = 0x80 + (u2->l & 0x3f); - dest.push_back(u8); - } else { // < 0x80 (1-byte UTF-8 character) - u8 = u2->l; - dest.push_back(u8); - } - } - ++u2; - } - return dest; -} - -/* only UTF-16 (BMP) implementation */ -int u8_u16(w_char* dest, int size, const char* src) { - const signed char* u8 = (const signed char*)src; - w_char* u2 = dest; - w_char* u2_max = u2 + size; - - while ((u2 < u2_max) && *u8) { - switch ((*u8) & 0xf0) { - case 0x00: - case 0x10: - case 0x20: - case 0x30: - case 0x40: - case 0x50: - case 0x60: - case 0x70: { - u2->h = 0; - u2->l = *u8; - break; - } - case 0x80: - case 0x90: - case 0xa0: - case 0xb0: { - HUNSPELL_WARNING(stderr, - "UTF-8 encoding error. Unexpected continuation bytes " - "in %ld. character position\n%s\n", - static_cast(u8 - (signed char*)src), src); - u2->h = 0xff; - u2->l = 0xfd; - break; - } - case 0xc0: - case 0xd0: { // 2-byte UTF-8 codes - if ((*(u8 + 1) & 0xc0) == 0x80) { - u2->h = (*u8 & 0x1f) >> 2; - u2->l = (*u8 << 6) + (*(u8 + 1) & 0x3f); - u8++; - } else { - HUNSPELL_WARNING(stderr, - "UTF-8 encoding error. Missing continuation byte in " - "%ld. character position:\n%s\n", - static_cast(u8 - (signed char*)src), src); - u2->h = 0xff; - u2->l = 0xfd; - } - break; - } - case 0xe0: { // 3-byte UTF-8 codes - if ((*(u8 + 1) & 0xc0) == 0x80) { - u2->h = ((*u8 & 0x0f) << 4) + ((*(u8 + 1) & 0x3f) >> 2); - u8++; - if ((*(u8 + 1) & 0xc0) == 0x80) { - u2->l = (*u8 << 6) + (*(u8 + 1) & 0x3f); - u8++; - } else { - HUNSPELL_WARNING(stderr, - "UTF-8 encoding error. Missing continuation byte " - "in %ld. character position:\n%s\n", - static_cast(u8 - (signed char*)src), src); - u2->h = 0xff; - u2->l = 0xfd; - } - } else { - HUNSPELL_WARNING(stderr, - "UTF-8 encoding error. Missing continuation byte in " - "%ld. character position:\n%s\n", - static_cast(u8 - (signed char*)src), src); - u2->h = 0xff; - u2->l = 0xfd; - } - break; - } - case 0xf0: { // 4 or more byte UTF-8 codes - HUNSPELL_WARNING( - stderr, "This UTF-8 encoding can't convert to UTF-16:\n%s\n", src); - u2->h = 0xff; - u2->l = 0xfd; - return -1; - } - } - u8++; - u2++; - } - return (int)(u2 - dest); -} - -int u8_u16(std::vector& dest, const std::string& src) { - dest.clear(); - std::string::const_iterator u8 = src.begin(); - std::string::const_iterator u8_max = src.end(); - - while (u8 < u8_max) { - w_char u2; - switch ((*u8) & 0xf0) { - case 0x00: - case 0x10: - case 0x20: - case 0x30: - case 0x40: - case 0x50: - case 0x60: - case 0x70: { - u2.h = 0; - u2.l = *u8; - break; - } - case 0x80: - case 0x90: - case 0xa0: - case 0xb0: { - HUNSPELL_WARNING(stderr, - "UTF-8 encoding error. Unexpected continuation bytes " - "in %ld. character position\n%s\n", - static_cast(std::distance(src.begin(), u8)), - src.c_str()); - u2.h = 0xff; - u2.l = 0xfd; - break; - } - case 0xc0: - case 0xd0: { // 2-byte UTF-8 codes - if ((*(u8 + 1) & 0xc0) == 0x80) { - u2.h = (*u8 & 0x1f) >> 2; - u2.l = (*u8 << 6) + (*(u8 + 1) & 0x3f); - ++u8; - } else { - HUNSPELL_WARNING(stderr, - "UTF-8 encoding error. Missing continuation byte in " - "%ld. character position:\n%s\n", - static_cast(std::distance(src.begin(), u8)), - src.c_str()); - u2.h = 0xff; - u2.l = 0xfd; - } - break; - } - case 0xe0: { // 3-byte UTF-8 codes - if ((*(u8 + 1) & 0xc0) == 0x80) { - u2.h = ((*u8 & 0x0f) << 4) + ((*(u8 + 1) & 0x3f) >> 2); - ++u8; - if ((*(u8 + 1) & 0xc0) == 0x80) { - u2.l = (*u8 << 6) + (*(u8 + 1) & 0x3f); - ++u8; - } else { - HUNSPELL_WARNING(stderr, - "UTF-8 encoding error. Missing continuation byte " - "in %ld. character position:\n%s\n", - static_cast(std::distance(src.begin(), u8)), - src.c_str()); - u2.h = 0xff; - u2.l = 0xfd; - } - } else { - HUNSPELL_WARNING(stderr, - "UTF-8 encoding error. Missing continuation byte in " - "%ld. character position:\n%s\n", - static_cast(std::distance(src.begin(), u8)), - src.c_str()); - u2.h = 0xff; - u2.l = 0xfd; - } - break; - } - case 0xf0: { // 4 or more byte UTF-8 codes - HUNSPELL_WARNING(stderr, - "This UTF-8 encoding can't convert to UTF-16:\n%s\n", - src.c_str()); - u2.h = 0xff; - u2.l = 0xfd; - dest.push_back(u2); - return -1; - } - } - dest.push_back(u2); - ++u8; - } - - return dest.size(); -} - -void flag_qsort(unsigned short flags[], int begin, int end) { - unsigned short reg; - if (end > begin) { - unsigned short pivot = flags[begin]; - int l = begin + 1; - int r = end; - while (l < r) { - if (flags[l] <= pivot) { - l++; - } else { - r--; - reg = flags[l]; - flags[l] = flags[r]; - flags[r] = reg; - } - } - l--; - reg = flags[begin]; - flags[begin] = flags[l]; - flags[l] = reg; - - flag_qsort(flags, begin, l); - flag_qsort(flags, r, end); - } -} - -int flag_bsearch(unsigned short flags[], unsigned short flag, int length) { - int mid; - int left = 0; - int right = length - 1; - while (left <= right) { - mid = (left + right) / 2; - if (flags[mid] == flag) - return 1; - if (flag < flags[mid]) - right = mid - 1; - else - left = mid + 1; - } - return 0; -} - -// strip strings into token based on single char delimiter -// acts like strsep() but only uses a delim char and not -// a delim string -// default delimiter: white space characters - -char* mystrsep(char** stringp, const char delim) { - char* mp = *stringp; - if (*mp != '\0') { - char* dp; - if (delim) { - dp = strchr(mp, delim); - } else { - // don't use isspace() here, the string can be in some random charset - // that's way different than the locale's - for (dp = mp; (*dp && *dp != ' ' && *dp != '\t'); dp++) - ; - if (!*dp) - dp = NULL; - } - if (dp) { - *stringp = dp + 1; - *dp = '\0'; - } else { - *stringp = mp + strlen(mp); - } - return mp; - } - return NULL; -} - -// replaces strdup with ansi version -char* mystrdup(const char* s) { - char* d = NULL; - if (s) { - size_t sl = strlen(s) + 1; - d = (char*)malloc(sl); - if (d) { - memcpy(d, s, sl); - } else { - HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); - } - } - return d; -} - -// strcat for limited length destination string -char* mystrcat(char* dest, const char* st, int max) { - int len; - int len2; - if (dest == NULL || st == NULL) - return dest; - len = strlen(dest); - len2 = strlen(st); - if (len + len2 + 1 > max) - return dest; - strcpy(dest + len, st); - return dest; -} - -// remove cross-platform text line end characters -void mychomp(char* s) { - size_t k = strlen(s); - if ((k > 0) && ((*(s + k - 1) == '\r') || (*(s + k - 1) == '\n'))) - *(s + k - 1) = '\0'; - if ((k > 1) && (*(s + k - 2) == '\r')) - *(s + k - 2) = '\0'; -} - -// does an ansi strdup of the reverse of a string -char* myrevstrdup(const char* s) { - char* d = NULL; - if (s) { - size_t sl = strlen(s); - d = (char*)malloc(sl + 1); - if (d) { - const char* p = s + sl - 1; - char* q = d; - while (p >= s) - *q++ = *p--; - *q = '\0'; - } else { - HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); - } - } - return d; -} - -// break text to lines -// return number of lines -int line_tok(const char* text, char*** lines, char breakchar) { - int linenum = 0; - if (!text) { - return linenum; - } - char* dup = mystrdup(text); - char* p = strchr(dup, breakchar); - while (p) { - linenum++; - *p = '\0'; - p++; - p = strchr(p, breakchar); - } - linenum++; - *lines = (char**)malloc(linenum * sizeof(char*)); - if (!(*lines)) { - free(dup); - return 0; - } - - p = dup; - int l = 0; - for (int i = 0; i < linenum; i++) { - if (*p != '\0') { - (*lines)[l] = mystrdup(p); - if (!(*lines)[l]) { - for (i = 0; i < l; i++) - free((*lines)[i]); - free(dup); - return 0; - } - l++; - } - p += strlen(p) + 1; - } - free(dup); - if (!l) { - free(*lines); - *lines = NULL; - } - return l; -} - -// uniq line in place -char* line_uniq(char* text, char breakchar) { - char** lines; - int linenum = line_tok(text, &lines, breakchar); - int i; - strcpy(text, lines[0]); - for (i = 1; i < linenum; i++) { - int dup = 0; - for (int j = 0; j < i; j++) { - if (strcmp(lines[i], lines[j]) == 0) { - dup = 1; - break; - } - } - if (!dup) { - if ((i > 1) || (*(lines[0]) != '\0')) { - sprintf(text + strlen(text), "%c", breakchar); - } - strcat(text, lines[i]); - } - } - for (i = 0; i < linenum; i++) { - free(lines[i]); - } - free(lines); - return text; -} - -// uniq and boundary for compound analysis: "1\n\2\n\1" -> " ( \1 | \2 ) " -char* line_uniq_app(char** text, char breakchar) { - if (!strchr(*text, breakchar)) { - return *text; - } - - char** lines; - int i; - int linenum = line_tok(*text, &lines, breakchar); - int dup = 0; - for (i = 0; i < linenum; i++) { - for (int j = 0; j < (i - 1); j++) { - if (strcmp(lines[i], lines[j]) == 0) { - *(lines[i]) = '\0'; - dup++; - break; - } - } - } - if ((linenum - dup) == 1) { - strcpy(*text, lines[0]); - freelist(&lines, linenum); - return *text; - } - char* newtext = (char*)malloc(strlen(*text) + 2 * linenum + 3 + 1); - if (newtext) { - free(*text); - *text = newtext; - } else { - freelist(&lines, linenum); - return *text; - } - strcpy(*text, " ( "); - for (i = 0; i < linenum; i++) - if (*(lines[i])) { - sprintf(*text + strlen(*text), "%s%s", lines[i], " | "); - } - (*text)[strlen(*text) - 2] = ')'; // " ) " - freelist(&lines, linenum); - return *text; -} - -// append s to ends of every lines in text -void strlinecat(char* dest, const char* s) { - char* dup = mystrdup(dest); - char* source = dup; - int len = strlen(s); - if (dup) { - while (*source) { - if (*source == '\n') { - strncpy(dest, s, len); - dest += len; - } - *dest = *source; - source++; - dest++; - } - strcpy(dest, s); - free(dup); - } -} - -// append s to ends of every lines in text -std::string& strlinecat(std::string& str, const std::string& apd) { - size_t pos = 0; - while ((pos = str.find('\n', pos)) != std::string::npos) { - str.insert(pos, apd); - pos += apd.length() + 1; - } - str.append(apd); - return str; -} - -// change \n to char c -char* tr(char* text, char oldc, char newc) { - char* p; - for (p = text; *p; p++) - if (*p == oldc) - *p = newc; - return text; -} - -// morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields -// in the first line of the inputs -// return 0, if inputs equal -// return 1, if inputs may equal with a secondary suffix -// otherwise return -1 -int morphcmp(const char* s, const char* t) { - int se = 0; - int te = 0; - const char* sl; - const char* tl; - const char* olds; - const char* oldt; - if (!s || !t) - return 1; - olds = s; - sl = strchr(s, '\n'); - s = strstr(s, MORPH_DERI_SFX); - if (!s || (sl && sl < s)) - s = strstr(olds, MORPH_INFL_SFX); - if (!s || (sl && sl < s)) { - s = strstr(olds, MORPH_TERM_SFX); - olds = NULL; - } - oldt = t; - tl = strchr(t, '\n'); - t = strstr(t, MORPH_DERI_SFX); - if (!t || (tl && tl < t)) - t = strstr(oldt, MORPH_INFL_SFX); - if (!t || (tl && tl < t)) { - t = strstr(oldt, MORPH_TERM_SFX); - oldt = NULL; - } - while (s && t && (!sl || sl > s) && (!tl || tl > t)) { - s += MORPH_TAG_LEN; - t += MORPH_TAG_LEN; - se = 0; - te = 0; - while ((*s == *t) && !se && !te) { - s++; - t++; - switch (*s) { - case ' ': - case '\n': - case '\t': - case '\0': - se = 1; - } - switch (*t) { - case ' ': - case '\n': - case '\t': - case '\0': - te = 1; - } - } - if (!se || !te) { - // not terminal suffix difference - if (olds) - return -1; - return 1; - } - olds = s; - s = strstr(s, MORPH_DERI_SFX); - if (!s || (sl && sl < s)) - s = strstr(olds, MORPH_INFL_SFX); - if (!s || (sl && sl < s)) { - s = strstr(olds, MORPH_TERM_SFX); - olds = NULL; - } - oldt = t; - t = strstr(t, MORPH_DERI_SFX); - if (!t || (tl && tl < t)) - t = strstr(oldt, MORPH_INFL_SFX); - if (!t || (tl && tl < t)) { - t = strstr(oldt, MORPH_TERM_SFX); - oldt = NULL; - } - } - if (!s && !t && se && te) - return 0; - return 1; -} - -int get_sfxcount(const char* morph) { - if (!morph || !*morph) - return 0; - int n = 0; - const char* old = morph; - morph = strstr(morph, MORPH_DERI_SFX); - if (!morph) - morph = strstr(old, MORPH_INFL_SFX); - if (!morph) - morph = strstr(old, MORPH_TERM_SFX); - while (morph) { - n++; - old = morph; - morph = strstr(morph + 1, MORPH_DERI_SFX); - if (!morph) - morph = strstr(old + 1, MORPH_INFL_SFX); - if (!morph) - morph = strstr(old + 1, MORPH_TERM_SFX); - } - return n; -} - -int fieldlen(const char* r) { - int n = 0; - while (r && *r != ' ' && *r != '\t' && *r != '\0' && *r != '\n') { - r++; - n++; - } - return n; -} - -char* copy_field(char* dest, const char* morph, const char* var) { - if (!morph) - return NULL; - const char* beg = strstr(morph, var); - if (beg) { - char* d = dest; - for (beg += MORPH_TAG_LEN; - *beg != ' ' && *beg != '\t' && *beg != '\n' && *beg != '\0'; - d++, beg++) { - *d = *beg; - } - *d = '\0'; - return dest; - } - return NULL; -} - -bool copy_field(std::string& dest, - const std::string& morph, - const std::string& var) { - if (morph.empty()) - return false; - size_t pos = morph.find(var); - if (pos == std::string::npos) - return false; - dest.clear(); - std::string beg(morph.substr(pos + MORPH_TAG_LEN, std::string::npos)); - - for (size_t i = 0; i < beg.size(); ++i) { - const char c(beg[i]); - if (c == ' ' || c == '\t' || c == '\n') - break; - dest.push_back(c); - } - - return true; -} - -std::string& mystrrep(std::string& str, - const std::string& search, - const std::string& replace) { - size_t pos = 0; - while ((pos = str.find(search, pos)) != std::string::npos) { - str.replace(pos, search.length(), replace); - pos += replace.length(); - } - return str; -} - -char* mystrrep(char* word, const char* pat, const char* rep) { - char* pos = strstr(word, pat); - if (pos) { - int replen = strlen(rep); - int patlen = strlen(pat); - while (pos) { - if (replen < patlen) { - char* end = word + strlen(word); - char* next = pos + replen; - char* prev = pos + strlen(pat); - for (; prev < end;* next = *prev, prev++, next++) - ; - *next = '\0'; - } else if (replen > patlen) { - char* end = pos + patlen; - char* next = word + strlen(word) + replen - patlen; - char* prev = next - replen + patlen; - for (; prev >= end;* next = *prev, prev--, next--) - ; - } - strncpy(pos, rep, replen); - pos = strstr(word, pat); - } - } - return word; -} - -// reverse word -int reverseword(char* word) { - char r; - for (char *dest = word + strlen(word) - 1; word < dest; word++, dest--) { - r = *word; - *word = *dest; - *dest = r; - } - return 0; -} - -// reverse word -std::string& reverseword(std::string& word) { - std::reverse(word.begin(), word.end()); - return word; -} - -// reverse word (error: 1) -int reverseword_utf(char* word) { - w_char w[MAXWORDLEN]; - w_char* p; - w_char r; - int l = u8_u16(w, MAXWORDLEN, word); - if (l == -1) - return 1; - p = w; - for (w_char *dest = w + l - 1; p < dest; p++, dest--) { - r = *p; - *p = *dest; - *dest = r; - } - u16_u8(word, MAXWORDUTF8LEN, w, l); - return 0; -} - -// reverse word -std::string& reverseword_utf(std::string& word) { - std::vector w; - u8_u16(w, word); - std::reverse(w.begin(), w.end()); - u16_u8(word, w); - return word; -} - -int uniqlist(char** list, int n) { - int i; - if (n < 2) - return n; - for (i = 0; i < n; i++) { - for (int j = 0; j < i; j++) { - if (list[j] && list[i] && (strcmp(list[j], list[i]) == 0)) { - free(list[i]); - list[i] = NULL; - break; - } - } - } - int m = 1; - for (i = 1; i < n; i++) - if (list[i]) { - list[m] = list[i]; - m++; - } - return m; -} - -void freelist(char*** list, int n) { - if (list && *list) { - for (int i = 0; i < n; i++) - free((*list)[i]); - free(*list); - *list = NULL; - } -} - -namespace { -unsigned char cupper(const struct cs_info* csconv, int nIndex) { - if (nIndex < 0 || nIndex > 255) - return nIndex; - return csconv[nIndex].cupper; -} - -unsigned char clower(const struct cs_info* csconv, int nIndex) { - if (nIndex < 0 || nIndex > 255) - return nIndex; - return csconv[nIndex].clower; -} - -unsigned char ccase(const struct cs_info* csconv, int nIndex) { - if (nIndex < 0 || nIndex > 255) - return nIndex; - return csconv[nIndex].ccase; -} -} - -// convert null terminated string to all caps -void mkallcap(char* p, const struct cs_info* csconv) { - while (*p != '\0') { - *p = cupper(csconv, static_cast(*p)); - p++; - } -} - -// convert std::string to all caps -std::string& mkallcap(std::string& s, const struct cs_info* csconv) { - for (std::string::iterator aI = s.begin(), aEnd = s.end(); aI != aEnd; ++aI) { - *aI = cupper(csconv, static_cast(*aI)); - } - return s; -} - -// convert null terminated string to all little -void mkallsmall(char* p, const struct cs_info* csconv) { - while (*p != '\0') { - *p = clower(csconv, static_cast(*p)); - p++; - } -} - -// convert std::string to all little -std::string& mkallsmall(std::string& s, const struct cs_info* csconv) { - for (std::string::iterator aI = s.begin(), aEnd = s.end(); aI != aEnd; ++aI) { - *aI = clower(csconv, static_cast(*aI)); - } - return s; -} - -void mkallsmall_utf(w_char* u, int nc, int langnum) { - for (int i = 0; i < nc; i++) { - unsigned short idx = (u[i].h << 8) + u[i].l; - if (idx != unicodetolower(idx, langnum)) { - u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); - u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); - } - } -} - -std::vector& mkallsmall_utf(std::vector& u, - int nc, - int langnum) { - for (int i = 0; i < nc; i++) { - unsigned short idx = (u[i].h << 8) + u[i].l; - if (idx != unicodetolower(idx, langnum)) { - u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); - u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); - } - } - return u; -} - -void mkallcap_utf(w_char* u, int nc, int langnum) { - for (int i = 0; i < nc; i++) { - unsigned short idx = (u[i].h << 8) + u[i].l; - if (idx != unicodetoupper(idx, langnum)) { - u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); - u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); - } - } -} - -std::vector& mkallcap_utf(std::vector& u, int nc, int langnum) { - for (int i = 0; i < nc; i++) { - unsigned short idx = (u[i].h << 8) + u[i].l; - if (idx != unicodetoupper(idx, langnum)) { - u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); - u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); - } - } - return u; -} - -// convert null terminated string to have initial capital -void mkinitcap(char* p, const struct cs_info* csconv) { - if (*p != '\0') - *p = cupper(csconv, static_cast(*p)); -} - -// conversion function for protected memory -void store_pointer(char* dest, char* source) { - memcpy(dest, &source, sizeof(char*)); -} - -// conversion function for protected memory -char* get_stored_pointer(const char* s) { - char* p; - memcpy(&p, s, sizeof(char*)); - return p; -} - -#ifndef MOZILLA_CLIENT -// convert null terminated string to all caps using encoding -void enmkallcap(char* d, const char* p, const char* encoding) - -{ - struct cs_info* csconv = get_current_cs(encoding); - while (*p != '\0') { - *d++ = cupper(csconv, static_cast(*p)); - p++; - } - *d = '\0'; -} - -// convert null terminated string to all little using encoding -void enmkallsmall(char* d, const char* p, const char* encoding) { - struct cs_info* csconv = get_current_cs(encoding); - while (*p != '\0') { - *d++ = clower(csconv, static_cast(*p)); - p++; - } - *d = '\0'; -} - -// convert null terminated string to have initial capital using encoding -void enmkinitcap(char* d, const char* p, const char* encoding) { - struct cs_info* csconv = get_current_cs(encoding); - memcpy(d, p, (strlen(p) + 1)); - if (*p != '\0') - *d = cupper(csconv, static_cast(*p)); -} - -// these are simple character mappings for the -// encodings supported -// supplying isupper, tolower, and toupper - -static struct cs_info iso1_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, - {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, - {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, - {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, - {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, - {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, - {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, - {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, - {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, - {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, - {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, - {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, - {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, - {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, - {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, - {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, - {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, - {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, - {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, - {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, - {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, - {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, - {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, - {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, - {0x00, 0xff, 0xff}}; - -static struct cs_info iso2_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x01, 0xb3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x01, 0xb5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x01, 0xb9, 0xa9}, {0x01, 0xba, 0xaa}, - {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, - {0x01, 0xbe, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xa3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xa5}, {0x00, 0xb6, 0xa6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xa9}, - {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac}, - {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xae}, {0x00, 0xbf, 0xaf}, - {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, - {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, - {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, - {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, - {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, - {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, - {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, - {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, - {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, - {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, - {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, - {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, - {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, - {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, - {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, - {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, - {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, - {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, - {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, - {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, - {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, - {0x00, 0xff, 0xff}}; - -static struct cs_info iso3_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x00, 0xa5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x01, 0x69, 0xa9}, {0x01, 0xba, 0xaa}, - {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xa6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0x49}, - {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac}, - {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xaf}, - {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, - {0x00, 0xc3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, - {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, - {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, - {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, - {0x01, 0xef, 0xcf}, {0x00, 0xd0, 0xd0}, {0x01, 0xf1, 0xd1}, - {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, - {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, - {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, - {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, - {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, - {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xe3}, - {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, - {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, - {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, - {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, - {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, - {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, - {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, - {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, - {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, - {0x00, 0xff, 0xff}}; - -static struct cs_info iso4_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x01, 0xb3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x01, 0xb5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x01, 0xb9, 0xa9}, {0x01, 0xba, 0xaa}, - {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, - {0x01, 0xbe, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xa3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xa5}, {0x00, 0xb6, 0xa6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xa9}, - {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac}, - {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xae}, {0x00, 0xbf, 0xbf}, - {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, - {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, - {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, - {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, - {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, - {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, - {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, - {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, - {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, - {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, - {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, - {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, - {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, - {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, - {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, - {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, - {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, - {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, - {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, - {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, - {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, - {0x00, 0xff, 0xff}}; - -static struct cs_info iso5_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xf1, 0xa1}, - {0x01, 0xf2, 0xa2}, {0x01, 0xf3, 0xa3}, {0x01, 0xf4, 0xa4}, - {0x01, 0xf5, 0xa5}, {0x01, 0xf6, 0xa6}, {0x01, 0xf7, 0xa7}, - {0x01, 0xf8, 0xa8}, {0x01, 0xf9, 0xa9}, {0x01, 0xfa, 0xaa}, - {0x01, 0xfb, 0xab}, {0x01, 0xfc, 0xac}, {0x00, 0xad, 0xad}, - {0x01, 0xfe, 0xae}, {0x01, 0xff, 0xaf}, {0x01, 0xd0, 0xb0}, - {0x01, 0xd1, 0xb1}, {0x01, 0xd2, 0xb2}, {0x01, 0xd3, 0xb3}, - {0x01, 0xd4, 0xb4}, {0x01, 0xd5, 0xb5}, {0x01, 0xd6, 0xb6}, - {0x01, 0xd7, 0xb7}, {0x01, 0xd8, 0xb8}, {0x01, 0xd9, 0xb9}, - {0x01, 0xda, 0xba}, {0x01, 0xdb, 0xbb}, {0x01, 0xdc, 0xbc}, - {0x01, 0xdd, 0xbd}, {0x01, 0xde, 0xbe}, {0x01, 0xdf, 0xbf}, - {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, - {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, - {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, - {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, - {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, - {0x01, 0xef, 0xcf}, {0x00, 0xd0, 0xb0}, {0x00, 0xd1, 0xb1}, - {0x00, 0xd2, 0xb2}, {0x00, 0xd3, 0xb3}, {0x00, 0xd4, 0xb4}, - {0x00, 0xd5, 0xb5}, {0x00, 0xd6, 0xb6}, {0x00, 0xd7, 0xb7}, - {0x00, 0xd8, 0xb8}, {0x00, 0xd9, 0xb9}, {0x00, 0xda, 0xba}, - {0x00, 0xdb, 0xbb}, {0x00, 0xdc, 0xbc}, {0x00, 0xdd, 0xbd}, - {0x00, 0xde, 0xbe}, {0x00, 0xdf, 0xbf}, {0x00, 0xe0, 0xc0}, - {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, - {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, - {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, - {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, - {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, - {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xa1}, {0x00, 0xf2, 0xa2}, - {0x00, 0xf3, 0xa3}, {0x00, 0xf4, 0xa4}, {0x00, 0xf5, 0xa5}, - {0x00, 0xf6, 0xa6}, {0x00, 0xf7, 0xa7}, {0x00, 0xf8, 0xa8}, - {0x00, 0xf9, 0xa9}, {0x00, 0xfa, 0xaa}, {0x00, 0xfb, 0xab}, - {0x00, 0xfc, 0xac}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xae}, - {0x00, 0xff, 0xaf}}; - -static struct cs_info iso6_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, - {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, - {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, - {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, - {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, - {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, - {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, - {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, - {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, - {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, - {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, - {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, - {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, - {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, - {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, - {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, - {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, - {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, - {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, - {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, - {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, - {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, - {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, - {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, - {0x00, 0xff, 0xff}}; - -static struct cs_info iso7_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x01, 0xdc, 0xb6}, - {0x00, 0xb7, 0xb7}, {0x01, 0xdd, 0xb8}, {0x01, 0xde, 0xb9}, - {0x01, 0xdf, 0xba}, {0x00, 0xbb, 0xbb}, {0x01, 0xfc, 0xbc}, - {0x00, 0xbd, 0xbd}, {0x01, 0xfd, 0xbe}, {0x01, 0xfe, 0xbf}, - {0x00, 0xc0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, - {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, - {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, - {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, - {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, - {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, - {0x00, 0xd2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, - {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7}, - {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, - {0x01, 0xfb, 0xdb}, {0x00, 0xdc, 0xb6}, {0x00, 0xdd, 0xb8}, - {0x00, 0xde, 0xb9}, {0x00, 0xdf, 0xba}, {0x00, 0xe0, 0xe0}, - {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, - {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, - {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, - {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, - {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, - {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd3}, - {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, - {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8}, - {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, - {0x00, 0xfc, 0xbc}, {0x00, 0xfd, 0xbe}, {0x00, 0xfe, 0xbf}, - {0x00, 0xff, 0xff}}; - -static struct cs_info iso8_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, - {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, - {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, - {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, - {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, - {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, - {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, - {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, - {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, - {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, - {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, - {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, - {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, - {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, - {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, - {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, - {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, - {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, - {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, - {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, - {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, - {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, - {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, - {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, - {0x00, 0xff, 0xff}}; - -static struct cs_info iso9_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0xfd, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0xdd}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, - {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, - {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, - {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, - {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, - {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, - {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, - {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, - {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, - {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, - {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, - {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, - {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0x69, 0xdd}, - {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, - {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, - {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, - {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, - {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, - {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, - {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, - {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, - {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, - {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, - {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0x49}, {0x00, 0xfe, 0xde}, - {0x00, 0xff, 0xff}}; - -static struct cs_info iso10_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, - {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, - {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, - {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, - {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, - {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, - {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, - {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, - {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, - {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, - {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, - {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, - {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, - {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, - {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, - {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, - {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, - {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, - {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, - {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, - {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, - {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, - {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, - {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, - {0x00, 0xff, 0xff}}; - -static struct cs_info koi8r_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xb3}, {0x00, 0xa4, 0xa4}, - {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x01, 0xa3, 0xb3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, - {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, - {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, - {0x00, 0xc0, 0xe0}, {0x00, 0xc1, 0xe1}, {0x00, 0xc2, 0xe2}, - {0x00, 0xc3, 0xe3}, {0x00, 0xc4, 0xe4}, {0x00, 0xc5, 0xe5}, - {0x00, 0xc6, 0xe6}, {0x00, 0xc7, 0xe7}, {0x00, 0xc8, 0xe8}, - {0x00, 0xc9, 0xe9}, {0x00, 0xca, 0xea}, {0x00, 0xcb, 0xeb}, - {0x00, 0xcc, 0xec}, {0x00, 0xcd, 0xed}, {0x00, 0xce, 0xee}, - {0x00, 0xcf, 0xef}, {0x00, 0xd0, 0xf0}, {0x00, 0xd1, 0xf1}, - {0x00, 0xd2, 0xf2}, {0x00, 0xd3, 0xf3}, {0x00, 0xd4, 0xf4}, - {0x00, 0xd5, 0xf5}, {0x00, 0xd6, 0xf6}, {0x00, 0xd7, 0xf7}, - {0x00, 0xd8, 0xf8}, {0x00, 0xd9, 0xf9}, {0x00, 0xda, 0xfa}, - {0x00, 0xdb, 0xfb}, {0x00, 0xdc, 0xfc}, {0x00, 0xdd, 0xfd}, - {0x00, 0xde, 0xfe}, {0x00, 0xdf, 0xff}, {0x01, 0xc0, 0xe0}, - {0x01, 0xc1, 0xe1}, {0x01, 0xc2, 0xe2}, {0x01, 0xc3, 0xe3}, - {0x01, 0xc4, 0xe4}, {0x01, 0xc5, 0xe5}, {0x01, 0xc6, 0xe6}, - {0x01, 0xc7, 0xe7}, {0x01, 0xc8, 0xe8}, {0x01, 0xc9, 0xe9}, - {0x01, 0xca, 0xea}, {0x01, 0xcb, 0xeb}, {0x01, 0xcc, 0xec}, - {0x01, 0xcd, 0xed}, {0x01, 0xce, 0xee}, {0x01, 0xcf, 0xef}, - {0x01, 0xd0, 0xf0}, {0x01, 0xd1, 0xf1}, {0x01, 0xd2, 0xf2}, - {0x01, 0xd3, 0xf3}, {0x01, 0xd4, 0xf4}, {0x01, 0xd5, 0xf5}, - {0x01, 0xd6, 0xf6}, {0x01, 0xd7, 0xf7}, {0x01, 0xd8, 0xf8}, - {0x01, 0xd9, 0xf9}, {0x01, 0xda, 0xfa}, {0x01, 0xdb, 0xfb}, - {0x01, 0xdc, 0xfc}, {0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe}, - {0x01, 0xdf, 0xff}}; - -static struct cs_info koi8u_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xb3}, {0x00, 0xa4, 0xb4}, /* ie */ - {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xb6}, /* i */ - {0x00, 0xa7, 0xb7}, /* ii */ - {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xbd}, /* g'' */ - {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x01, 0xa3, 0xb3}, - {0x00, 0xb4, 0xb4}, /* IE */ - {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, /* I */ - {0x00, 0xb7, 0xb7}, /* II */ - {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, {0x00, 0xba, 0xba}, - {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, {0x00, 0xbd, 0xbd}, - {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, {0x00, 0xc0, 0xe0}, - {0x00, 0xc1, 0xe1}, {0x00, 0xc2, 0xe2}, {0x00, 0xc3, 0xe3}, - {0x00, 0xc4, 0xe4}, {0x00, 0xc5, 0xe5}, {0x00, 0xc6, 0xe6}, - {0x00, 0xc7, 0xe7}, {0x00, 0xc8, 0xe8}, {0x00, 0xc9, 0xe9}, - {0x00, 0xca, 0xea}, {0x00, 0xcb, 0xeb}, {0x00, 0xcc, 0xec}, - {0x00, 0xcd, 0xed}, {0x00, 0xce, 0xee}, {0x00, 0xcf, 0xef}, - {0x00, 0xd0, 0xf0}, {0x00, 0xd1, 0xf1}, {0x00, 0xd2, 0xf2}, - {0x00, 0xd3, 0xf3}, {0x00, 0xd4, 0xf4}, {0x00, 0xd5, 0xf5}, - {0x00, 0xd6, 0xf6}, {0x00, 0xd7, 0xf7}, {0x00, 0xd8, 0xf8}, - {0x00, 0xd9, 0xf9}, {0x00, 0xda, 0xfa}, {0x00, 0xdb, 0xfb}, - {0x00, 0xdc, 0xfc}, {0x00, 0xdd, 0xfd}, {0x00, 0xde, 0xfe}, - {0x00, 0xdf, 0xff}, {0x01, 0xc0, 0xe0}, {0x01, 0xc1, 0xe1}, - {0x01, 0xc2, 0xe2}, {0x01, 0xc3, 0xe3}, {0x01, 0xc4, 0xe4}, - {0x01, 0xc5, 0xe5}, {0x01, 0xc6, 0xe6}, {0x01, 0xc7, 0xe7}, - {0x01, 0xc8, 0xe8}, {0x01, 0xc9, 0xe9}, {0x01, 0xca, 0xea}, - {0x01, 0xcb, 0xeb}, {0x01, 0xcc, 0xec}, {0x01, 0xcd, 0xed}, - {0x01, 0xce, 0xee}, {0x01, 0xcf, 0xef}, {0x01, 0xd0, 0xf0}, - {0x01, 0xd1, 0xf1}, {0x01, 0xd2, 0xf2}, {0x01, 0xd3, 0xf3}, - {0x01, 0xd4, 0xf4}, {0x01, 0xd5, 0xf5}, {0x01, 0xd6, 0xf6}, - {0x01, 0xd7, 0xf7}, {0x01, 0xd8, 0xf8}, {0x01, 0xd9, 0xf9}, - {0x01, 0xda, 0xfa}, {0x01, 0xdb, 0xfb}, {0x01, 0xdc, 0xfc}, - {0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe}, {0x01, 0xdf, 0xff}}; - -static struct cs_info cp1251_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x01, 0x90, 0x80}, - {0x01, 0x83, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x81}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x01, 0x9a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x01, 0x9c, 0x8c}, - {0x01, 0x9d, 0x8d}, {0x01, 0x9e, 0x8e}, {0x01, 0x9f, 0x8f}, - {0x00, 0x90, 0x80}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x8a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x8c}, {0x00, 0x9d, 0x8d}, {0x00, 0x9e, 0x8e}, - {0x00, 0x9f, 0x8f}, {0x00, 0xa0, 0xa0}, {0x01, 0xa2, 0xa1}, - {0x00, 0xa2, 0xa1}, {0x01, 0xbc, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x01, 0xb4, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x01, 0xb8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x01, 0xba, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x01, 0xb3, 0xb2}, {0x00, 0xb3, 0xb2}, - {0x00, 0xb4, 0xa5}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xa8}, {0x00, 0xb9, 0xb9}, - {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xa3}, - {0x01, 0xbe, 0xbd}, {0x00, 0xbe, 0xbd}, {0x00, 0xbf, 0xaf}, - {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, - {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, - {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, - {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, - {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, - {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, - {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, - {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7}, - {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, - {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, - {0x01, 0xfe, 0xde}, {0x01, 0xff, 0xdf}, {0x00, 0xe0, 0xc0}, - {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, - {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, - {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, - {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, - {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, - {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, - {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, - {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8}, - {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, - {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, - {0x00, 0xff, 0xdf}}; - -static struct cs_info iso13_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0A, 0x0A}, {0x00, 0x0B, 0x0B}, - {0x00, 0x0C, 0x0C}, {0x00, 0x0D, 0x0D}, {0x00, 0x0E, 0x0E}, - {0x00, 0x0F, 0x0F}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1A, 0x1A}, - {0x00, 0x1B, 0x1B}, {0x00, 0x1C, 0x1C}, {0x00, 0x1D, 0x1D}, - {0x00, 0x1E, 0x1E}, {0x00, 0x1F, 0x1F}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2A, 0x2A}, {0x00, 0x2B, 0x2B}, {0x00, 0x2C, 0x2C}, - {0x00, 0x2D, 0x2D}, {0x00, 0x2E, 0x2E}, {0x00, 0x2F, 0x2F}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3A, 0x3A}, {0x00, 0x3B, 0x3B}, - {0x00, 0x3C, 0x3C}, {0x00, 0x3D, 0x3D}, {0x00, 0x3E, 0x3E}, - {0x00, 0x3F, 0x3F}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6A, 0x4A}, - {0x01, 0x6B, 0x4B}, {0x01, 0x6C, 0x4C}, {0x01, 0x6D, 0x4D}, - {0x01, 0x6E, 0x4E}, {0x01, 0x6F, 0x4F}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7A, 0x5A}, {0x00, 0x5B, 0x5B}, {0x00, 0x5C, 0x5C}, - {0x00, 0x5D, 0x5D}, {0x00, 0x5E, 0x5E}, {0x00, 0x5F, 0x5F}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6A, 0x4A}, {0x00, 0x6B, 0x4B}, - {0x00, 0x6C, 0x4C}, {0x00, 0x6D, 0x4D}, {0x00, 0x6E, 0x4E}, - {0x00, 0x6F, 0x4F}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7A, 0x5A}, - {0x00, 0x7B, 0x7B}, {0x00, 0x7C, 0x7C}, {0x00, 0x7D, 0x7D}, - {0x00, 0x7E, 0x7E}, {0x00, 0x7F, 0x7F}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8A, 0x8A}, {0x00, 0x8B, 0x8B}, {0x00, 0x8C, 0x8C}, - {0x00, 0x8D, 0x8D}, {0x00, 0x8E, 0x8E}, {0x00, 0x8F, 0x8F}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9A, 0x9A}, {0x00, 0x9B, 0x9B}, - {0x00, 0x9C, 0x9C}, {0x00, 0x9D, 0x9D}, {0x00, 0x9E, 0x9E}, - {0x00, 0x9F, 0x9F}, {0x00, 0xA0, 0xA0}, {0x00, 0xA1, 0xA1}, - {0x00, 0xA2, 0xA2}, {0x00, 0xA3, 0xA3}, {0x00, 0xA4, 0xA4}, - {0x00, 0xA5, 0xA5}, {0x00, 0xA6, 0xA6}, {0x00, 0xA7, 0xA7}, - {0x01, 0xB8, 0xA8}, {0x00, 0xA9, 0xA9}, {0x01, 0xBA, 0xAA}, - {0x00, 0xAB, 0xAB}, {0x00, 0xAC, 0xAC}, {0x00, 0xAD, 0xAD}, - {0x00, 0xAE, 0xAE}, {0x01, 0xBF, 0xAF}, {0x00, 0xB0, 0xB0}, - {0x00, 0xB1, 0xB1}, {0x00, 0xB2, 0xB2}, {0x00, 0xB3, 0xB3}, - {0x00, 0xB4, 0xB4}, {0x00, 0xB5, 0xB5}, {0x00, 0xB6, 0xB6}, - {0x00, 0xB7, 0xB7}, {0x00, 0xB8, 0xA8}, {0x00, 0xB9, 0xB9}, - {0x00, 0xBA, 0xAA}, {0x00, 0xBB, 0xBB}, {0x00, 0xBC, 0xBC}, - {0x00, 0xBD, 0xBD}, {0x00, 0xBE, 0xBE}, {0x00, 0xBF, 0xAF}, - {0x01, 0xE0, 0xC0}, {0x01, 0xE1, 0xC1}, {0x01, 0xE2, 0xC2}, - {0x01, 0xE3, 0xC3}, {0x01, 0xE4, 0xC4}, {0x01, 0xE5, 0xC5}, - {0x01, 0xE6, 0xC6}, {0x01, 0xE7, 0xC7}, {0x01, 0xE8, 0xC8}, - {0x01, 0xE9, 0xC9}, {0x01, 0xEA, 0xCA}, {0x01, 0xEB, 0xCB}, - {0x01, 0xEC, 0xCC}, {0x01, 0xED, 0xCD}, {0x01, 0xEE, 0xCE}, - {0x01, 0xEF, 0xCF}, {0x01, 0xF0, 0xD0}, {0x01, 0xF1, 0xD1}, - {0x01, 0xF2, 0xD2}, {0x01, 0xF3, 0xD3}, {0x01, 0xF4, 0xD4}, - {0x01, 0xF5, 0xD5}, {0x01, 0xF6, 0xD6}, {0x00, 0xD7, 0xD7}, - {0x01, 0xF8, 0xD8}, {0x01, 0xF9, 0xD9}, {0x01, 0xFA, 0xDA}, - {0x01, 0xFB, 0xDB}, {0x01, 0xFC, 0xDC}, {0x01, 0xFD, 0xDD}, - {0x01, 0xFE, 0xDE}, {0x00, 0xDF, 0xDF}, {0x00, 0xE0, 0xC0}, - {0x00, 0xE1, 0xC1}, {0x00, 0xE2, 0xC2}, {0x00, 0xE3, 0xC3}, - {0x00, 0xE4, 0xC4}, {0x00, 0xE5, 0xC5}, {0x00, 0xE6, 0xC6}, - {0x00, 0xE7, 0xC7}, {0x00, 0xE8, 0xC8}, {0x00, 0xE9, 0xC9}, - {0x00, 0xEA, 0xCA}, {0x00, 0xEB, 0xCB}, {0x00, 0xEC, 0xCC}, - {0x00, 0xED, 0xCD}, {0x00, 0xEE, 0xCE}, {0x00, 0xEF, 0xCF}, - {0x00, 0xF0, 0xD0}, {0x00, 0xF1, 0xD1}, {0x00, 0xF2, 0xD2}, - {0x00, 0xF3, 0xD3}, {0x00, 0xF4, 0xD4}, {0x00, 0xF5, 0xD5}, - {0x00, 0xF6, 0xD6}, {0x00, 0xF7, 0xF7}, {0x00, 0xF8, 0xD8}, - {0x00, 0xF9, 0xD9}, {0x00, 0xFA, 0xDA}, {0x00, 0xFB, 0xDB}, - {0x00, 0xFC, 0xDC}, {0x00, 0xFD, 0xDD}, {0x00, 0xFE, 0xDE}, - {0x00, 0xFF, 0xFF}}; - -static struct cs_info iso14_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xa2, 0xa1}, - {0x00, 0xa2, 0xa1}, {0x00, 0xa3, 0xa3}, {0x01, 0xa5, 0xa4}, - {0x00, 0xa5, 0xa4}, {0x01, 0xa6, 0xab}, {0x00, 0xa7, 0xa7}, - {0x01, 0xb8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x01, 0xba, 0xaa}, - {0x00, 0xab, 0xa6}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x01, 0xff, 0xaf}, {0x01, 0xb1, 0xb0}, - {0x00, 0xb1, 0xb0}, {0x01, 0xb3, 0xb2}, {0x00, 0xb3, 0xb2}, - {0x01, 0xb5, 0xb4}, {0x00, 0xb5, 0xb4}, {0x00, 0xb6, 0xb6}, - {0x01, 0xb9, 0xb7}, {0x00, 0xb8, 0xa8}, {0x00, 0xb9, 0xb6}, - {0x00, 0xba, 0xaa}, {0x01, 0xbf, 0xbb}, {0x00, 0xbc, 0xac}, - {0x01, 0xbe, 0xbd}, {0x00, 0xbe, 0xbd}, {0x00, 0xbf, 0xbb}, - {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, - {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, - {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, - {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, - {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, - {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, - {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, - {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7}, - {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, - {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, - {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, - {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, - {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, - {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, - {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, - {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, - {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, - {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, - {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8}, - {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, - {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, - {0x00, 0xff, 0xff}}; - -static struct cs_info iso15_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x00, 0xa5, 0xa5}, {0x01, 0xa8, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa6}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, - {0x01, 0xb8, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb4}, {0x00, 0xb9, 0xb9}, - {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x01, 0xbd, 0xbc}, - {0x00, 0xbd, 0xbc}, {0x01, 0xff, 0xbe}, {0x00, 0xbf, 0xbf}, - {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, - {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, - {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, - {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, - {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, - {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, - {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, - {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, - {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, - {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, - {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, - {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, - {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, - {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, - {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, - {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, - {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, - {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, - {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, - {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, - {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, - {0x00, 0xff, 0xbe}}; - -static struct cs_info iscii_devanagari_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, - {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, - {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, - {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, - {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, - {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, - {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, - {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, - {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, - {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, - {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, - {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, - {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, - {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, - {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, - {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, - {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, - {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, - {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, - {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, - {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, - {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, - {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, - {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, - {0x00, 0xff, 0xff}}; - -static struct cs_info tis620_tbl[] = { - {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, - {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, - {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, - {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, - {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, - {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, - {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, - {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, - {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, - {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, - {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, - {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, - {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, - {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, - {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, - {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, - {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, - {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, - {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, - {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, - {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, - {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, - {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, - {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, - {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, - {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, - {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, - {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, - {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, - {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, - {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, - {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, - {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, - {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, - {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, - {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, - {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, - {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, - {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, - {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, - {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, - {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, - {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, - {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, - {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, - {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, - {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, - {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, - {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, - {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, - {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, - {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, - {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, - {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, - {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, - {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, - {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, - {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, - {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, - {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, - {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, - {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, - {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, - {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, - {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, - {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, - {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, - {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, - {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, - {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, - {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, - {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, - {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, - {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, - {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, - {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, - {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, - {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, - {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, - {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, - {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, - {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, - {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, - {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, - {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, - {0x00, 0xff, 0xff}}; - -struct enc_entry { - const char* enc_name; - struct cs_info* cs_table; -}; - -static struct enc_entry encds[] = { - {"iso88591", iso1_tbl}, // ISO-8859-1 - {"iso88592", iso2_tbl}, // ISO-8859-2 - {"iso88593", iso3_tbl}, // ISO-8859-3 - {"iso88594", iso4_tbl}, // ISO-8859-4 - {"iso88595", iso5_tbl}, // ISO-8859-5 - {"iso88596", iso6_tbl}, // ISO-8859-6 - {"iso88597", iso7_tbl}, // ISO-8859-7 - {"iso88598", iso8_tbl}, // ISO-8859-8 - {"iso88599", iso9_tbl}, // ISO-8859-9 - {"iso885910", iso10_tbl}, // ISO-8859-10 - {"tis620", tis620_tbl}, // TIS-620/ISO-8859-11 - {"tis6202533", tis620_tbl}, // TIS-620/ISO-8859-11 - {"iso885911", tis620_tbl}, // TIS-620/ISO-8859-11 - {"iso885913", iso13_tbl}, // ISO-8859-13 - {"iso885914", iso14_tbl}, // ISO-8859-14 - {"iso885915", iso15_tbl}, // ISO-8859-15 - {"koi8r", koi8r_tbl}, // KOI8-R - {"koi8u", koi8u_tbl}, // KOI8-U - {"cp1251", cp1251_tbl}, // CP-1251 - {"microsoftcp1251", cp1251_tbl}, // microsoft-cp1251 - {"xisciias", iscii_devanagari_tbl}, // x-iscii-as - {"isciidevanagari", iscii_devanagari_tbl} // ISCII-DEVANAGARI -}; - -/* map to lower case and remove non alphanumeric chars */ -static void toAsciiLowerAndRemoveNonAlphanumeric(const char* pName, - char* pBuf) { - while (*pName) { - /* A-Z */ - if ((*pName >= 0x41) && (*pName <= 0x5A)) { - *pBuf = (*pName) + 0x20; /* toAsciiLower */ - pBuf++; - } - /* a-z, 0-9 */ - else if (((*pName >= 0x61) && (*pName <= 0x7A)) || - ((*pName >= 0x30) && (*pName <= 0x39))) { - *pBuf = *pName; - pBuf++; - } - - pName++; - } - - *pBuf = '\0'; -} - -struct cs_info* get_current_cs(const char* es) { - char* normalized_encoding = new char[strlen(es) + 1]; - toAsciiLowerAndRemoveNonAlphanumeric(es, normalized_encoding); - - struct cs_info* ccs = NULL; - int n = sizeof(encds) / sizeof(encds[0]); - for (int i = 0; i < n; i++) { - if (strcmp(normalized_encoding, encds[i].enc_name) == 0) { - ccs = encds[i].cs_table; - break; - } - } - - delete[] normalized_encoding; - - if (!ccs) { - HUNSPELL_WARNING(stderr, - "error: unknown encoding %s: using %s as fallback\n", es, - encds[0].enc_name); - ccs = encds[0].cs_table; - } - - return ccs; -} -#else -// XXX This function was rewritten for mozilla. Instead of storing the -// conversion tables static in this file, create them when needed -// with help the mozilla backend. -struct cs_info* get_current_cs(const char* es) { - struct cs_info* ccs = new cs_info[256]; - // Initialze the array with dummy data so that we wouldn't need - // to return null in case of failures. - for (int i = 0; i <= 0xff; ++i) { - ccs[i].ccase = false; - ccs[i].clower = i; - ccs[i].cupper = i; - } - - nsCOMPtr encoder; - nsCOMPtr decoder; - - nsresult rv; - - nsAutoCString label(es); - nsAutoCString encoding; - if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) { - return ccs; - } - encoder = EncodingUtils::EncoderForEncoding(encoding); - decoder = EncodingUtils::DecoderForEncoding(encoding); - encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nullptr, '?'); - decoder->SetInputErrorBehavior(decoder->kOnError_Signal); - - for (unsigned int i = 0; i <= 0xff; ++i) { - bool success = false; - // We want to find the upper/lowercase equivalents of each byte - // in this 1-byte character encoding. Call our encoding/decoding - // APIs separately for each byte since they may reject some of the - // bytes, and we want to handle errors separately for each byte. - char lower, upper; - do { - if (i == 0) - break; - const char source = char(i); - char16_t uni, uniCased; - int32_t charLength = 1, uniLength = 1; - - rv = decoder->Convert(&source, &charLength, &uni, &uniLength); - // Explicitly check NS_OK because we don't want to allow - // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. - if (rv != NS_OK || charLength != 1 || uniLength != 1) - break; - uniCased = ToLowerCase(uni); - rv = encoder->Convert(&uniCased, &uniLength, &lower, &charLength); - // Explicitly check NS_OK because we don't want to allow - // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. - if (rv != NS_OK || charLength != 1 || uniLength != 1) - break; - - uniCased = ToUpperCase(uni); - rv = encoder->Convert(&uniCased, &uniLength, &upper, &charLength); - // Explicitly check NS_OK because we don't want to allow - // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT. - if (rv != NS_OK || charLength != 1 || uniLength != 1) - break; - - success = true; - } while (0); - - if (success) { - ccs[i].cupper = upper; - ccs[i].clower = lower; - } else { - ccs[i].cupper = i; - ccs[i].clower = i; - } - - if (ccs[i].clower != (unsigned char)i) - ccs[i].ccase = true; - else - ccs[i].ccase = false; - } - - return ccs; -} -#endif - -// primitive isalpha() replacement for tokenization -char* get_casechars(const char* enc) { - struct cs_info* csconv = get_current_cs(enc); - char expw[MAXLNLEN]; - char* p = expw; - for (int i = 0; i <= 255; i++) { - if (cupper(csconv, i) != clower(csconv, i)) { - *p = static_cast(i); - p++; - } - } - *p = '\0'; -#ifdef MOZILLA_CLIENT - delete[] csconv; -#endif - return mystrdup(expw); -} - -// language to encoding default map - -struct lang_map { - const char* lang; - int num; -}; - -static struct lang_map lang2enc[] = - {{"ar", LANG_ar}, {"az", LANG_az}, - {"az_AZ", LANG_az}, // for back-compatibility - {"bg", LANG_bg}, {"ca", LANG_ca}, - {"cs", LANG_cs}, {"da", LANG_da}, - {"de", LANG_de}, {"el", LANG_el}, - {"en", LANG_en}, {"es", LANG_es}, - {"eu", LANG_eu}, {"gl", LANG_gl}, - {"fr", LANG_fr}, {"hr", LANG_hr}, - {"hu", LANG_hu}, {"hu_HU", LANG_hu}, // for back-compatibility - {"it", LANG_it}, {"la", LANG_la}, - {"lv", LANG_lv}, {"nl", LANG_nl}, - {"pl", LANG_pl}, {"pt", LANG_pt}, - {"sv", LANG_sv}, {"tr", LANG_tr}, - {"tr_TR", LANG_tr}, // for back-compatibility - {"ru", LANG_ru}, {"uk", LANG_uk}}; - -int get_lang_num(const char* lang) { - int n = sizeof(lang2enc) / sizeof(lang2enc[0]); - for (int i = 0; i < n; i++) { - if (strcmp(lang, lang2enc[i].lang) == 0) { - return lang2enc[i].num; - } - } - return LANG_xx; -} - -#ifndef OPENOFFICEORG -#ifndef MOZILLA_CLIENT -int initialize_utf_tbl() { - utf_tbl_count++; - if (utf_tbl) - return 0; - utf_tbl = (unicode_info2*)malloc(CONTSIZE * sizeof(unicode_info2)); - if (utf_tbl) { - size_t j; - for (j = 0; j < CONTSIZE; j++) { - utf_tbl[j].cletter = 0; - utf_tbl[j].clower = (unsigned short)j; - utf_tbl[j].cupper = (unsigned short)j; - } - for (j = 0; j < UTF_LST_LEN; j++) { - utf_tbl[utf_lst[j].c].cletter = 1; - utf_tbl[utf_lst[j].c].clower = utf_lst[j].clower; - utf_tbl[utf_lst[j].c].cupper = utf_lst[j].cupper; - } - } else - return 1; - return 0; -} -#endif -#endif - -void free_utf_tbl() { - if (utf_tbl_count > 0) - utf_tbl_count--; - if (utf_tbl && (utf_tbl_count == 0)) { - free(utf_tbl); - utf_tbl = NULL; - } -} - -unsigned short unicodetoupper(unsigned short c, int langnum) { - // In Azeri and Turkish, I and i dictinct letters: - // There are a dotless lower case i pair of upper `I', - // and an upper I with dot pair of lower `i'. - if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr))) - return 0x0130; -#ifdef OPENOFFICEORG - return static_cast(u_toupper(c)); -#else -#ifdef MOZILLA_CLIENT - return ToUpperCase((char16_t)c); -#else - return (utf_tbl) ? utf_tbl[c].cupper : c; -#endif -#endif -} - -unsigned short unicodetolower(unsigned short c, int langnum) { - // In Azeri and Turkish, I and i dictinct letters: - // There are a dotless lower case i pair of upper `I', - // and an upper I with dot pair of lower `i'. - if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr))) - return 0x0131; -#ifdef OPENOFFICEORG - return static_cast(u_tolower(c)); -#else -#ifdef MOZILLA_CLIENT - return ToLowerCase((char16_t)c); -#else - return (utf_tbl) ? utf_tbl[c].clower : c; -#endif -#endif -} - -int unicodeisalpha(unsigned short c) { -#ifdef OPENOFFICEORG - return u_isalpha(c); -#else - return (utf_tbl) ? utf_tbl[c].cletter : 0; -#endif -} - -/* get type of capitalization */ -int get_captype(char* word, int nl, cs_info* csconv) { - // now determine the capitalization type of the first nl letters - int ncap = 0; - int nneutral = 0; - int firstcap = 0; - if (csconv == NULL) - return NOCAP; - for (char* q = word; *q != '\0'; q++) { - unsigned char nIndex = static_cast(*q); - if (ccase(csconv, nIndex)) - ncap++; - if (cupper(csconv, nIndex) == clower(csconv, nIndex)) - nneutral++; - } - if (ncap) { - unsigned char nIndex = static_cast(word[0]); - firstcap = csconv[nIndex].ccase; - } - - // now finally set the captype - if (ncap == 0) { - return NOCAP; - } else if ((ncap == 1) && firstcap) { - return INITCAP; - } else if ((ncap == nl) || ((ncap + nneutral) == nl)) { - return ALLCAP; - } else if ((ncap > 1) && firstcap) { - return HUHINITCAP; - } - return HUHCAP; -} - -int get_captype_utf8(w_char* word, int nl, int langnum) { - // now determine the capitalization type of the first nl letters - int ncap = 0; - int nneutral = 0; - int firstcap = 0; - unsigned short idx; - // don't check too long words - if (nl >= MAXWORDLEN) - return 0; - // big Unicode character (non BMP area) - if (nl == -1) - return NOCAP; - for (int i = 0; i < nl; i++) { - idx = (word[i].h << 8) + word[i].l; - if (idx != unicodetolower(idx, langnum)) - ncap++; - if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) - nneutral++; - } - if (ncap) { - idx = (word[0].h << 8) + word[0].l; - firstcap = (idx != unicodetolower(idx, langnum)); - } - - // now finally set the captype - if (ncap == 0) { - return NOCAP; - } else if ((ncap == 1) && firstcap) { - return INITCAP; - } else if ((ncap == nl) || ((ncap + nneutral) == nl)) { - return ALLCAP; - } else if ((ncap > 1) && firstcap) { - return HUHINITCAP; - } - return HUHCAP; -} - -// strip all ignored characters in the string -void remove_ignored_chars_utf(char* word, - unsigned short ignored_chars[], - int ignored_len) { - w_char w[MAXWORDLEN]; - w_char w2[MAXWORDLEN]; - int i; - int j; - int len = u8_u16(w, MAXWORDLEN, word); - for (i = 0, j = 0; i < len; i++) { - if (!flag_bsearch(ignored_chars, ((unsigned short*)w)[i], ignored_len)) { - w2[j] = w[i]; - j++; - } - } - if (j < i) - u16_u8(word, MAXWORDUTF8LEN, w2, j); -} - -namespace { -union w_s { - w_char w; - unsigned short s; -}; - -unsigned short asushort(w_char in) { - w_s c; - c.w = in; - return c.s; -} -} - -// strip all ignored characters in the string -std::string& remove_ignored_chars_utf(std::string& word, - unsigned short ignored_chars[], - int ignored_len) { - std::vector w; - std::vector w2; - u8_u16(w, word); - - for (size_t i = 0; i < w.size(); ++i) { - if (!flag_bsearch(ignored_chars, asushort(w[i]), ignored_len)) - w2.push_back(w[i]); - } - - u16_u8(word, w2); - return word; -} - -// strip all ignored characters in the string -void remove_ignored_chars(char* word, char* ignored_chars) { - for (char* p = word; *p != '\0'; p++) { - if (!strchr(ignored_chars, *p)) { - *word = *p; - word++; - } - } - *word = '\0'; -} - -namespace { -class is_any_of { - public: - is_any_of(const std::string& in) : chars(in) {} - - bool operator()(char c) { return chars.find(c) != std::string::npos; } - - private: - const std::string& chars; -}; -} - -// strip all ignored characters in the string -std::string& remove_ignored_chars(std::string& word, - const std::string& ignored_chars) { - word.erase( - std::remove_if(word.begin(), word.end(), is_any_of(ignored_chars))); - return word; -} - -int parse_string(char* line, char** out, int ln) { - char* tp = line; - char* piece; - int i = 0; - int np = 0; - if (*out) { - HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions\n", ln); - return 1; - } - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - *out = mystrdup(piece); - if (!*out) - return 1; - np++; - break; - } - default: - break; - } - i++; - } - // free(piece); - piece = mystrsep(&tp, 0); - } - if (np != 2) { - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", ln); - return 1; - } - return 0; -} - -int parse_array(char* line, - char** out, - unsigned short** out_utf16, - int* out_utf16_len, - int utf8, - int ln) { - if (parse_string(line, out, ln)) - return 1; - if (utf8) { - w_char w[MAXWORDLEN]; - int n = u8_u16(w, MAXWORDLEN, *out); - if (n > 0) { - flag_qsort((unsigned short*)w, 0, n); - *out_utf16 = (unsigned short*)malloc(n * sizeof(unsigned short)); - if (!*out_utf16) - return 1; - memcpy(*out_utf16, w, n * sizeof(unsigned short)); - } - *out_utf16_len = n; - } - return 0; -} diff --git a/libs/hunspell/src/dictmgr.c++ b/libs/hunspell/src/dictmgr.c++ new file mode 100644 index 0000000000..473c09acfe --- /dev/null +++ b/libs/hunspell/src/dictmgr.c++ @@ -0,0 +1,216 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Hunspell, based on MySpell. + * + * The Initial Developers of the Original Code are + * Kevin Hendricks (MySpell) and Németh László (Hunspell). + * Portions created by the Initial Developers are Copyright (C) 2002-2005 + * the Initial Developers. All Rights Reserved. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include +#include +#include +#include + +#include "dictmgr.hxx" +#include "csutil.hxx" + +DictMgr::DictMgr(const char* dictpath, const char* etype) : numdict(0) { + // load list of etype entries + pdentry = (dictentry*)malloc(MAXDICTIONARIES * sizeof(struct dictentry)); + if (pdentry) { + if (parse_file(dictpath, etype)) { + numdict = 0; + // no dictionary.lst found is okay + } + } +} + +DictMgr::~DictMgr() { + dictentry* pdict = NULL; + if (pdentry) { + pdict = pdentry; + for (int i = 0; i < numdict; i++) { + if (pdict->lang) { + free(pdict->lang); + pdict->lang = NULL; + } + if (pdict->region) { + free(pdict->region); + pdict->region = NULL; + } + if (pdict->filename) { + free(pdict->filename); + pdict->filename = NULL; + } + pdict++; + } + free(pdentry); + pdentry = NULL; + pdict = NULL; + } + numdict = 0; +} + +// read in list of etype entries and build up structure to describe them +int DictMgr::parse_file(const char* dictpath, const char* etype) { + int i; + char line[MAXDICTENTRYLEN + 1]; + dictentry* pdict = pdentry; + + // open the dictionary list file + FILE* dictlst; + dictlst = myfopen(dictpath, "r"); + if (!dictlst) { + return 1; + } + + // step one is to parse the dictionary list building up the + // descriptive structures + + // read in each line ignoring any that dont start with etype + while (fgets(line, MAXDICTENTRYLEN, dictlst)) { + mychomp(line); + + /* parse in a dictionary entry */ + if (strncmp(line, etype, 4) == 0) { + if (numdict < MAXDICTIONARIES) { + char* tp = line; + char* piece; + i = 0; + while ((piece = mystrsep(&tp, ' '))) { + if (*piece != '\0') { + switch (i) { + case 0: + break; + case 1: + pdict->lang = mystrdup(piece); + break; + case 2: + if (strcmp(piece, "ANY") == 0) + pdict->region = mystrdup(""); + else + pdict->region = mystrdup(piece); + break; + case 3: + pdict->filename = mystrdup(piece); + break; + default: + break; + } + i++; + } + free(piece); + } + if (i == 4) { + numdict++; + pdict++; + } else { + switch (i) { + case 3: + free(pdict->region); + pdict->region = NULL; + /* FALLTHROUGH */ + case 2: + free(pdict->lang); + pdict->lang = NULL; + default: + break; + } + fprintf(stderr, "dictionary list corruption in line \"%s\"\n", line); + fflush(stderr); + } + } + } + } + fclose(dictlst); + return 0; +} + +// return text encoding of dictionary +int DictMgr::get_list(dictentry** ppentry) { + *ppentry = pdentry; + return numdict; +} + +// strip strings into token based on single char delimiter +// acts like strsep() but only uses a delim char and not +// a delim string + +char* DictMgr::mystrsep(char** stringp, const char delim) { + char* rv = NULL; + char* mp = *stringp; + size_t n = strlen(mp); + if (n > 0) { + char* dp = (char*)memchr(mp, (int)((unsigned char)delim), n); + if (dp) { + *stringp = dp + 1; + size_t nc = dp - mp; + rv = (char*)malloc(nc + 1); + if (rv) { + memcpy(rv, mp, nc); + *(rv + nc) = '\0'; + } + } else { + rv = (char*)malloc(n + 1); + if (rv) { + memcpy(rv, mp, n); + *(rv + n) = '\0'; + *stringp = mp + n; + } + } + } + return rv; +} + +// replaces strdup with ansi version +char* DictMgr::mystrdup(const char* s) { + char* d = NULL; + if (s) { + int sl = strlen(s) + 1; + d = (char*)malloc(sl); + if (d) + memcpy(d, s, sl); + } + return d; +} + +// remove cross-platform text line end characters +void DictMgr::mychomp(char* s) { + int k = strlen(s); + if ((k > 0) && ((*(s + k - 1) == '\r') || (*(s + k - 1) == '\n'))) + *(s + k - 1) = '\0'; + if ((k > 1) && (*(s + k - 2) == '\r')) + *(s + k - 2) = '\0'; +} diff --git a/libs/hunspell/src/dictmgr.cxx b/libs/hunspell/src/dictmgr.cxx deleted file mode 100644 index 473c09acfe..0000000000 --- a/libs/hunspell/src/dictmgr.cxx +++ /dev/null @@ -1,216 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include -#include -#include -#include - -#include "dictmgr.hxx" -#include "csutil.hxx" - -DictMgr::DictMgr(const char* dictpath, const char* etype) : numdict(0) { - // load list of etype entries - pdentry = (dictentry*)malloc(MAXDICTIONARIES * sizeof(struct dictentry)); - if (pdentry) { - if (parse_file(dictpath, etype)) { - numdict = 0; - // no dictionary.lst found is okay - } - } -} - -DictMgr::~DictMgr() { - dictentry* pdict = NULL; - if (pdentry) { - pdict = pdentry; - for (int i = 0; i < numdict; i++) { - if (pdict->lang) { - free(pdict->lang); - pdict->lang = NULL; - } - if (pdict->region) { - free(pdict->region); - pdict->region = NULL; - } - if (pdict->filename) { - free(pdict->filename); - pdict->filename = NULL; - } - pdict++; - } - free(pdentry); - pdentry = NULL; - pdict = NULL; - } - numdict = 0; -} - -// read in list of etype entries and build up structure to describe them -int DictMgr::parse_file(const char* dictpath, const char* etype) { - int i; - char line[MAXDICTENTRYLEN + 1]; - dictentry* pdict = pdentry; - - // open the dictionary list file - FILE* dictlst; - dictlst = myfopen(dictpath, "r"); - if (!dictlst) { - return 1; - } - - // step one is to parse the dictionary list building up the - // descriptive structures - - // read in each line ignoring any that dont start with etype - while (fgets(line, MAXDICTENTRYLEN, dictlst)) { - mychomp(line); - - /* parse in a dictionary entry */ - if (strncmp(line, etype, 4) == 0) { - if (numdict < MAXDICTIONARIES) { - char* tp = line; - char* piece; - i = 0; - while ((piece = mystrsep(&tp, ' '))) { - if (*piece != '\0') { - switch (i) { - case 0: - break; - case 1: - pdict->lang = mystrdup(piece); - break; - case 2: - if (strcmp(piece, "ANY") == 0) - pdict->region = mystrdup(""); - else - pdict->region = mystrdup(piece); - break; - case 3: - pdict->filename = mystrdup(piece); - break; - default: - break; - } - i++; - } - free(piece); - } - if (i == 4) { - numdict++; - pdict++; - } else { - switch (i) { - case 3: - free(pdict->region); - pdict->region = NULL; - /* FALLTHROUGH */ - case 2: - free(pdict->lang); - pdict->lang = NULL; - default: - break; - } - fprintf(stderr, "dictionary list corruption in line \"%s\"\n", line); - fflush(stderr); - } - } - } - } - fclose(dictlst); - return 0; -} - -// return text encoding of dictionary -int DictMgr::get_list(dictentry** ppentry) { - *ppentry = pdentry; - return numdict; -} - -// strip strings into token based on single char delimiter -// acts like strsep() but only uses a delim char and not -// a delim string - -char* DictMgr::mystrsep(char** stringp, const char delim) { - char* rv = NULL; - char* mp = *stringp; - size_t n = strlen(mp); - if (n > 0) { - char* dp = (char*)memchr(mp, (int)((unsigned char)delim), n); - if (dp) { - *stringp = dp + 1; - size_t nc = dp - mp; - rv = (char*)malloc(nc + 1); - if (rv) { - memcpy(rv, mp, nc); - *(rv + nc) = '\0'; - } - } else { - rv = (char*)malloc(n + 1); - if (rv) { - memcpy(rv, mp, n); - *(rv + n) = '\0'; - *stringp = mp + n; - } - } - } - return rv; -} - -// replaces strdup with ansi version -char* DictMgr::mystrdup(const char* s) { - char* d = NULL; - if (s) { - int sl = strlen(s) + 1; - d = (char*)malloc(sl); - if (d) - memcpy(d, s, sl); - } - return d; -} - -// remove cross-platform text line end characters -void DictMgr::mychomp(char* s) { - int k = strlen(s); - if ((k > 0) && ((*(s + k - 1) == '\r') || (*(s + k - 1) == '\n'))) - *(s + k - 1) = '\0'; - if ((k > 1) && (*(s + k - 2) == '\r')) - *(s + k - 2) = '\0'; -} diff --git a/libs/hunspell/src/filemgr.c++ b/libs/hunspell/src/filemgr.c++ new file mode 100644 index 0000000000..cbe41c577b --- /dev/null +++ b/libs/hunspell/src/filemgr.c++ @@ -0,0 +1,124 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Hunspell, based on MySpell. + * + * The Initial Developers of the Original Code are + * Kevin Hendricks (MySpell) and Németh László (Hunspell). + * Portions created by the Initial Developers are Copyright (C) 2002-2005 + * the Initial Developers. All Rights Reserved. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +#include "filemgr.hxx" +#include "csutil.hxx" + +int FileMgr::fail(const char* err, const char* par) { + fprintf(stderr, err, par); + return -1; +} + +FileMgr::FileMgr(const char* file, const char* key) : hin(NULL), linenum(0) { + in[0] = '\0'; + + fin = myfopen(file, "r"); + if (!fin) { + // check hzipped file + char* st = (char*)malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1); + if (st) { + strcpy(st, file); + strcat(st, HZIP_EXTENSION); + hin = new Hunzip(st, key); + free(st); + } + } + if (!fin && !hin) + fail(MSG_OPEN, file); +} + +FileMgr::~FileMgr() { + if (fin) + fclose(fin); + if (hin) + delete hin; +} + +char* FileMgr::getline() { + const char* l; + linenum++; + if (fin) + return fgets(in, BUFSIZE - 1, fin); + if (hin && ((l = hin->getline()) != NULL)) + return strcpy(in, l); + linenum--; + return NULL; +} + +int FileMgr::getlinenum() { + return linenum; +} diff --git a/libs/hunspell/src/filemgr.cxx b/libs/hunspell/src/filemgr.cxx deleted file mode 100644 index cbe41c577b..0000000000 --- a/libs/hunspell/src/filemgr.cxx +++ /dev/null @@ -1,124 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -/* - * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada - * And Contributors. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. All modifications to the source code must be clearly marked as - * such. Binary redistributions based on modified source code - * must be clearly marked as modified versions in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include - -#include "filemgr.hxx" -#include "csutil.hxx" - -int FileMgr::fail(const char* err, const char* par) { - fprintf(stderr, err, par); - return -1; -} - -FileMgr::FileMgr(const char* file, const char* key) : hin(NULL), linenum(0) { - in[0] = '\0'; - - fin = myfopen(file, "r"); - if (!fin) { - // check hzipped file - char* st = (char*)malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1); - if (st) { - strcpy(st, file); - strcat(st, HZIP_EXTENSION); - hin = new Hunzip(st, key); - free(st); - } - } - if (!fin && !hin) - fail(MSG_OPEN, file); -} - -FileMgr::~FileMgr() { - if (fin) - fclose(fin); - if (hin) - delete hin; -} - -char* FileMgr::getline() { - const char* l; - linenum++; - if (fin) - return fgets(in, BUFSIZE - 1, fin); - if (hin && ((l = hin->getline()) != NULL)) - return strcpy(in, l); - linenum--; - return NULL; -} - -int FileMgr::getlinenum() { - return linenum; -} diff --git a/libs/hunspell/src/hashmgr.c++ b/libs/hunspell/src/hashmgr.c++ new file mode 100644 index 0000000000..dbcf56a51c --- /dev/null +++ b/libs/hunspell/src/hashmgr.c++ @@ -0,0 +1,1116 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Hunspell, based on MySpell. + * + * The Initial Developers of the Original Code are + * Kevin Hendricks (MySpell) and Németh László (Hunspell). + * Portions created by the Initial Developers are Copyright (C) 2002-2005 + * the Initial Developers. All Rights Reserved. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include "hashmgr.hxx" +#include "csutil.hxx" +#include "atypes.hxx" + +// build a hash table from a munched word list + +HashMgr::HashMgr(const char* tpath, const char* apath, const char* key) + : tablesize(0), + tableptr(NULL), + flag_mode(FLAG_CHAR), + complexprefixes(0), + utf8(0), + forbiddenword(FORBIDDENWORD) // forbidden word signing flag + , + numaliasf(0), + aliasf(NULL), + aliasflen(0), + numaliasm(0), + aliasm(NULL) { + langnum = 0; + lang = NULL; + enc = NULL; + csconv = 0; + ignorechars = NULL; + ignorechars_utf16 = NULL; + ignorechars_utf16_len = 0; + load_config(apath, key); + int ec = load_tables(tpath, key); + if (ec) { + /* error condition - what should we do here */ + HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n", ec); + if (tableptr) { + free(tableptr); + tableptr = NULL; + } + tablesize = 0; + } +} + +HashMgr::~HashMgr() { + if (tableptr) { + // now pass through hash table freeing up everything + // go through column by column of the table + for (int i = 0; i < tablesize; i++) { + struct hentry* pt = tableptr[i]; + struct hentry* nt = NULL; + while (pt) { + nt = pt->next; + if (pt->astr && + (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) + free(pt->astr); + free(pt); + pt = nt; + } + } + free(tableptr); + } + tablesize = 0; + + if (aliasf) { + for (int j = 0; j < (numaliasf); j++) + free(aliasf[j]); + free(aliasf); + aliasf = NULL; + if (aliasflen) { + free(aliasflen); + aliasflen = NULL; + } + } + if (aliasm) { + for (int j = 0; j < (numaliasm); j++) + free(aliasm[j]); + free(aliasm); + aliasm = NULL; + } + +#ifndef OPENOFFICEORG +#ifndef MOZILLA_CLIENT + if (utf8) + free_utf_tbl(); +#endif +#endif + + if (enc) + free(enc); + if (lang) + free(lang); + + if (ignorechars) + free(ignorechars); + if (ignorechars_utf16) + free(ignorechars_utf16); + +#ifdef MOZILLA_CLIENT + delete[] csconv; +#endif +} + +// lookup a root word in the hashtable + +struct hentry* HashMgr::lookup(const char* word) const { + struct hentry* dp; + if (tableptr) { + dp = tableptr[hash(word)]; + if (!dp) + return NULL; + for (; dp != NULL; dp = dp->next) { + if (strcmp(word, dp->word) == 0) + return dp; + } + } + return NULL; +} + +// add a word to the hash table (private) +int HashMgr::add_word(const char* word, + int wbl, + int wcl, + unsigned short* aff, + int al, + const char* desc, + bool onlyupcase) { + bool upcasehomonym = false; + int descl = desc ? (aliasm ? sizeof(char*) : strlen(desc) + 1) : 0; + // variable-length hash record with word and optional fields + struct hentry* hp = + (struct hentry*)malloc(sizeof(struct hentry) + wbl + descl); + if (!hp) + return 1; + char* hpw = hp->word; + strcpy(hpw, word); + if (ignorechars != NULL) { + if (utf8) { + remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len); + } else { + remove_ignored_chars(hpw, ignorechars); + } + } + if (complexprefixes) { + if (utf8) + reverseword_utf(hpw); + else + reverseword(hpw); + } + + int i = hash(hpw); + + hp->blen = (unsigned char)wbl; + hp->clen = (unsigned char)wcl; + hp->alen = (short)al; + hp->astr = aff; + hp->next = NULL; + hp->next_homonym = NULL; + + // store the description string or its pointer + if (desc) { + hp->var = H_OPT; + if (aliasm) { + hp->var += H_OPT_ALIASM; + store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc))); + } else { + strcpy(hpw + wbl + 1, desc); + if (complexprefixes) { + if (utf8) + reverseword_utf(HENTRY_DATA(hp)); + else + reverseword(HENTRY_DATA(hp)); + } + } + if (strstr(HENTRY_DATA(hp), MORPH_PHON)) + hp->var += H_OPT_PHON; + } else + hp->var = 0; + + struct hentry* dp = tableptr[i]; + if (!dp) { + tableptr[i] = hp; + return 0; + } + while (dp->next != NULL) { + if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { + // remove hidden onlyupcase homonym + if (!onlyupcase) { + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { + free(dp->astr); + dp->astr = hp->astr; + dp->alen = hp->alen; + free(hp); + return 0; + } else { + dp->next_homonym = hp; + } + } else { + upcasehomonym = true; + } + } + dp = dp->next; + } + if (strcmp(hp->word, dp->word) == 0) { + // remove hidden onlyupcase homonym + if (!onlyupcase) { + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { + free(dp->astr); + dp->astr = hp->astr; + dp->alen = hp->alen; + free(hp); + return 0; + } else { + dp->next_homonym = hp; + } + } else { + upcasehomonym = true; + } + } + if (!upcasehomonym) { + dp->next = hp; + } else { + // remove hidden onlyupcase homonym + if (hp->astr) + free(hp->astr); + free(hp); + } + return 0; +} + +int HashMgr::add_hidden_capitalized_word(char* word, + int wbl, + int wcl, + unsigned short* flags, + int flagslen, + char* dp, + int captype) { + if (flags == NULL) + flagslen = 0; + + // add inner capitalized forms to handle the following allcap forms: + // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG + // Allcaps with suffixes: CIA's -> CIA'S + if (((captype == HUHCAP) || (captype == HUHINITCAP) || + ((captype == ALLCAP) && (flagslen != 0))) && + !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) { + unsigned short* flags2 = + (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1)); + if (!flags2) + return 1; + if (flagslen) + memcpy(flags2, flags, flagslen * sizeof(unsigned short)); + flags2[flagslen] = ONLYUPCASEFLAG; + if (utf8) { + char st[BUFSIZE]; + w_char w[BUFSIZE]; + int wlen = u8_u16(w, BUFSIZE, word); + mkallsmall_utf(w, wlen, langnum); + mkallcap_utf(w, 1, langnum); + u16_u8(st, BUFSIZE, w, wlen); + return add_word(st, wbl, wcl, flags2, flagslen + 1, dp, true); + } else { + mkallsmall(word, csconv); + mkinitcap(word, csconv); + return add_word(word, wbl, wcl, flags2, flagslen + 1, dp, true); + } + } + return 0; +} + +// detect captype and modify word length for UTF-8 encoding +int HashMgr::get_clen_and_captype(const char* word, int wbl, int* captype) { + int len; + if (utf8) { + w_char dest_utf[BUFSIZE]; + len = u8_u16(dest_utf, BUFSIZE, word); + *captype = get_captype_utf8(dest_utf, len, langnum); + } else { + len = wbl; + *captype = get_captype((char*)word, len, csconv); + } + return len; +} + +// remove word (personal dictionary function for standalone applications) +int HashMgr::remove(const char* word) { + struct hentry* dp = lookup(word); + while (dp) { + if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { + unsigned short* flags = + (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1)); + if (!flags) + return 1; + for (int i = 0; i < dp->alen; i++) + flags[i] = dp->astr[i]; + flags[dp->alen] = forbiddenword; + dp->astr = flags; + dp->alen++; + flag_qsort(flags, 0, dp->alen); + } + dp = dp->next_homonym; + } + return 0; +} + +/* remove forbidden flag to add a personal word to the hash */ +int HashMgr::remove_forbidden_flag(const char* word) { + struct hentry* dp = lookup(word); + if (!dp) + return 1; + while (dp) { + if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) { + if (dp->alen == 1) + dp->alen = 0; // XXX forbidden words of personal dic. + else { + unsigned short* flags2 = + (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen - 1)); + if (!flags2) + return 1; + int i, j = 0; + for (i = 0; i < dp->alen; i++) { + if (dp->astr[i] != forbiddenword) + flags2[j++] = dp->astr[i]; + } + dp->alen--; + dp->astr = flags2; // XXX allowed forbidden words + } + } + dp = dp->next_homonym; + } + return 0; +} + +// add a custom dic. word to the hash table (public) +int HashMgr::add(const char* word) { + unsigned short* flags = NULL; + int al = 0; + if (remove_forbidden_flag(word)) { + int captype; + int wbl = strlen(word); + int wcl = get_clen_and_captype(word, wbl, &captype); + add_word(word, wbl, wcl, flags, al, NULL, false); + return add_hidden_capitalized_word((char*)word, wbl, wcl, flags, al, NULL, + captype); + } + return 0; +} + +int HashMgr::add_with_affix(const char* word, const char* example) { + // detect captype and modify word length for UTF-8 encoding + struct hentry* dp = lookup(example); + remove_forbidden_flag(word); + if (dp && dp->astr) { + int captype; + int wbl = strlen(word); + int wcl = get_clen_and_captype(word, wbl, &captype); + if (aliasf) { + add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false); + } else { + unsigned short* flags = + (unsigned short*)malloc(dp->alen * sizeof(unsigned short)); + if (flags) { + memcpy((void*)flags, (void*)dp->astr, + dp->alen * sizeof(unsigned short)); + add_word(word, wbl, wcl, flags, dp->alen, NULL, false); + } else + return 1; + } + return add_hidden_capitalized_word((char*)word, wbl, wcl, dp->astr, + dp->alen, NULL, captype); + } + return 1; +} + +// walk the hash table entry by entry - null at end +// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp); +struct hentry* HashMgr::walk_hashtable(int& col, struct hentry* hp) const { + if (hp && hp->next != NULL) + return hp->next; + for (col++; col < tablesize; col++) { + if (tableptr[col]) + return tableptr[col]; + } + // null at end and reset to start + col = -1; + return NULL; +} + +// load a munched word list and build a hash table on the fly +int HashMgr::load_tables(const char* tpath, const char* key) { + int al; + char* ap; + char* dp; + char* dp2; + unsigned short* flags; + char* ts; + + // open dictionary file + FileMgr* dict = new FileMgr(tpath, key); + if (dict == NULL) + return 1; + + // first read the first line of file to get hash table size */ + if ((ts = dict->getline()) == NULL) { + HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath); + delete dict; + return 2; + } + mychomp(ts); + + /* remove byte order mark */ + if (strncmp(ts, "\xEF\xBB\xBF", 3) == 0) { + memmove(ts, ts + 3, strlen(ts + 3) + 1); + // warning: dic file begins with byte order mark: possible incompatibility + // with old Hunspell versions + } + + tablesize = atoi(ts); + + int nExtra = 5 + USERWORD; + + if (tablesize <= 0 || + (tablesize >= (std::numeric_limits::max() - 1 - nExtra) / + int(sizeof(struct hentry*)))) { + HUNSPELL_WARNING( + stderr, "error: line 1: missing or bad word count in the dic file\n"); + delete dict; + return 4; + } + tablesize += nExtra; + if ((tablesize % 2) == 0) + tablesize++; + + // allocate the hash table + tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*)); + if (!tableptr) { + delete dict; + return 3; + } + + // loop through all words on much list and add to hash + // table and create word and affix strings + + while ((ts = dict->getline()) != NULL) { + mychomp(ts); + // split each line into word and morphological description + dp = ts; + while ((dp = strchr(dp, ':')) != NULL) { + if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) { + for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--) + ; + if (dp < ts) { // missing word + dp = NULL; + } else { + *(dp + 1) = '\0'; + dp = dp + 2; + } + break; + } + dp++; + } + + // tabulator is the old morphological field separator + dp2 = strchr(ts, '\t'); + if (dp2 && (!dp || dp2 < dp)) { + *dp2 = '\0'; + dp = dp2 + 1; + } + + // split each line into word and affix char strings + // "\/" signs slash in words (not affix separator) + // "/" at beginning of the line is word character (not affix separator) + ap = strchr(ts, '/'); + while (ap) { + if (ap == ts) { + ap++; + continue; + } else if (*(ap - 1) != '\\') + break; + // replace "\/" with "/" + for (char *sp = ap - 1; *sp; *sp = *(sp + 1), sp++) + ; + ap = strchr(ap, '/'); + } + + if (ap) { + *ap = '\0'; + if (aliasf) { + int index = atoi(ap + 1); + al = get_aliasf(index, &flags, dict); + if (!al) { + HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", + dict->getlinenum()); + *ap = '\0'; + } + } else { + al = decode_flags(&flags, ap + 1, dict); + if (al == -1) { + HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); + delete dict; + return 6; + } + flag_qsort(flags, 0, al); + } + } else { + al = 0; + ap = NULL; + flags = NULL; + } + + int captype; + int wbl = strlen(ts); + int wcl = get_clen_and_captype(ts, wbl, &captype); + // add the word and its index plus its capitalized form optionally + if (add_word(ts, wbl, wcl, flags, al, dp, false) || + add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) { + delete dict; + return 5; + } + } + + delete dict; + return 0; +} + +// the hash function is a simple load and rotate +// algorithm borrowed +int HashMgr::hash(const char* word) const { + unsigned long hv = 0; + for (int i = 0; i < 4 && *word != 0; i++) + hv = (hv << 8) | (*word++); + while (*word != 0) { + ROTATE(hv, ROTATE_LEN); + hv ^= (*word++); + } + return (unsigned long)hv % tablesize; +} + +int HashMgr::decode_flags(unsigned short** result, char* flags, FileMgr* af) { + int len; + if (*flags == '\0') { + *result = NULL; + return 0; + } + switch (flag_mode) { + case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) + len = strlen(flags); + if (len % 2 == 1) + HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", + af->getlinenum()); + len /= 2; + *result = (unsigned short*)malloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + for (int i = 0; i < len; i++) { + (*result)[i] = (((unsigned short)flags[i * 2]) << 8) + + (unsigned short)flags[i * 2 + 1]; + } + break; + } + case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 + // 23 233) + int i; + len = 1; + char* src = flags; + unsigned short* dest; + char* p; + for (p = flags; *p; p++) { + if (*p == ',') + len++; + } + *result = (unsigned short*)malloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + dest = *result; + for (p = flags; *p; p++) { + if (*p == ',') { + i = atoi(src); + if (i >= DEFAULTFLAGS) + HUNSPELL_WARNING( + stderr, "error: line %d: flag id %d is too large (max: %d)\n", + af->getlinenum(), i, DEFAULTFLAGS - 1); + *dest = (unsigned short)i; + if (*dest == 0) + HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", + af->getlinenum()); + src = p + 1; + dest++; + } + } + i = atoi(src); + if (i >= DEFAULTFLAGS) + HUNSPELL_WARNING(stderr, + "error: line %d: flag id %d is too large (max: %d)\n", + af->getlinenum(), i, DEFAULTFLAGS - 1); + *dest = (unsigned short)i; + if (*dest == 0) + HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", + af->getlinenum()); + break; + } + case FLAG_UNI: { // UTF-8 characters + w_char w[BUFSIZE / 2]; + len = u8_u16(w, BUFSIZE / 2, flags); + *result = (unsigned short*)malloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + memcpy(*result, w, len * sizeof(short)); + break; + } + default: { // Ispell's one-character flags (erfg -> e r f g) + unsigned short* dest; + len = strlen(flags); + *result = (unsigned short*)malloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + dest = *result; + for (unsigned char* p = (unsigned char*)flags; *p; p++) { + *dest = (unsigned short)*p; + dest++; + } + } + } + return len; +} + +unsigned short HashMgr::decode_flag(const char* f) { + unsigned short s = 0; + int i; + switch (flag_mode) { + case FLAG_LONG: + s = ((unsigned short)f[0] << 8) + (unsigned short)f[1]; + break; + case FLAG_NUM: + i = atoi(f); + if (i >= DEFAULTFLAGS) + HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", + i, DEFAULTFLAGS - 1); + s = (unsigned short)i; + break; + case FLAG_UNI: + u8_u16((w_char*)&s, 1, f); + break; + default: + s = (unsigned short)*((unsigned char*)f); + } + if (s == 0) + HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); + return s; +} + +char* HashMgr::encode_flag(unsigned short f) { + unsigned char ch[10]; + if (f == 0) + return mystrdup("(NULL)"); + if (flag_mode == FLAG_LONG) { + ch[0] = (unsigned char)(f >> 8); + ch[1] = (unsigned char)(f - ((f >> 8) << 8)); + ch[2] = '\0'; + } else if (flag_mode == FLAG_NUM) { + sprintf((char*)ch, "%d", f); + } else if (flag_mode == FLAG_UNI) { + u16_u8((char*)&ch, 10, (w_char*)&f, 1); + } else { + ch[0] = (unsigned char)(f); + ch[1] = '\0'; + } + return mystrdup((char*)ch); +} + +// read in aff file and set flag mode +int HashMgr::load_config(const char* affpath, const char* key) { + char* line; // io buffers + int firstline = 1; + + // open the affix file + FileMgr* afflst = new FileMgr(affpath, key); + if (!afflst) { + HUNSPELL_WARNING( + stderr, "Error - could not open affix description file %s\n", affpath); + return 1; + } + + // read in each line ignoring any that do not + // start with a known line type indicator + + while ((line = afflst->getline()) != NULL) { + mychomp(line); + + /* remove byte order mark */ + if (firstline) { + firstline = 0; + if (strncmp(line, "\xEF\xBB\xBF", 3) == 0) + memmove(line, line + 3, strlen(line + 3) + 1); + } + + /* parse in the try string */ + if ((strncmp(line, "FLAG", 4) == 0) && isspace(line[4])) { + if (flag_mode != FLAG_CHAR) { + HUNSPELL_WARNING(stderr, + "error: line %d: multiple definitions of the FLAG " + "affix file parameter\n", + afflst->getlinenum()); + } + if (strstr(line, "long")) + flag_mode = FLAG_LONG; + if (strstr(line, "num")) + flag_mode = FLAG_NUM; + if (strstr(line, "UTF-8")) + flag_mode = FLAG_UNI; + if (flag_mode == FLAG_CHAR) { + HUNSPELL_WARNING( + stderr, + "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", + afflst->getlinenum()); + } + } + if (strncmp(line, "FORBIDDENWORD", 13) == 0) { + char* st = NULL; + if (parse_string(line, &st, afflst->getlinenum())) { + delete afflst; + return 1; + } + forbiddenword = decode_flag(st); + free(st); + } + if (strncmp(line, "SET", 3) == 0) { + if (parse_string(line, &enc, afflst->getlinenum())) { + delete afflst; + return 1; + } + if (strcmp(enc, "UTF-8") == 0) { + utf8 = 1; +#ifndef OPENOFFICEORG +#ifndef MOZILLA_CLIENT + initialize_utf_tbl(); +#endif +#endif + } else + csconv = get_current_cs(enc); + } + if (strncmp(line, "LANG", 4) == 0) { + if (parse_string(line, &lang, afflst->getlinenum())) { + delete afflst; + return 1; + } + langnum = get_lang_num(lang); + } + + /* parse in the ignored characters (for example, Arabic optional diacritics + * characters */ + if (strncmp(line, "IGNORE", 6) == 0) { + if (parse_array(line, &ignorechars, &ignorechars_utf16, + &ignorechars_utf16_len, utf8, afflst->getlinenum())) { + delete afflst; + return 1; + } + } + + if ((strncmp(line, "AF", 2) == 0) && isspace(line[2])) { + if (parse_aliasf(line, afflst)) { + delete afflst; + return 1; + } + } + + if ((strncmp(line, "AM", 2) == 0) && isspace(line[2])) { + if (parse_aliasm(line, afflst)) { + delete afflst; + return 1; + } + } + + if (strncmp(line, "COMPLEXPREFIXES", 15) == 0) + complexprefixes = 1; + if (((strncmp(line, "SFX", 3) == 0) || (strncmp(line, "PFX", 3) == 0)) && + isspace(line[3])) + break; + } + if (csconv == NULL) + csconv = get_current_cs(SPELL_ENCODING); + delete afflst; + return 0; +} + +/* parse in the ALIAS table */ +int HashMgr::parse_aliasf(char* line, FileMgr* af) { + if (numaliasf != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return 1; + } + char* tp = line; + char* piece; + int i = 0; + int np = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numaliasf = atoi(piece); + if (numaliasf < 1) { + numaliasf = 0; + aliasf = NULL; + aliasflen = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return 1; + } + aliasf = + (unsigned short**)malloc(numaliasf * sizeof(unsigned short*)); + aliasflen = + (unsigned short*)malloc(numaliasf * sizeof(unsigned short)); + if (!aliasf || !aliasflen) { + numaliasf = 0; + if (aliasf) + free(aliasf); + if (aliasflen) + free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + return 1; + } + np++; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (np != 2) { + numaliasf = 0; + free(aliasf); + free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return 1; + } + + /* now parse the numaliasf lines to read in the remainder of the table */ + char* nl; + for (int j = 0; j < numaliasf; j++) { + if ((nl = af->getline()) == NULL) + return 1; + mychomp(nl); + tp = nl; + i = 0; + aliasf[j] = NULL; + aliasflen[j] = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + if (strncmp(piece, "AF", 2) != 0) { + numaliasf = 0; + free(aliasf); + free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return 1; + } + break; + } + case 1: { + aliasflen[j] = + (unsigned short)decode_flags(&(aliasf[j]), piece, af); + flag_qsort(aliasf[j], 0, aliasflen[j]); + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (!aliasf[j]) { + free(aliasf); + free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + numaliasf = 0; + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return 1; + } + } + return 0; +} + +int HashMgr::is_aliasf() { + return (aliasf != NULL); +} + +int HashMgr::get_aliasf(int index, unsigned short** fvec, FileMgr* af) { + if ((index > 0) && (index <= numaliasf)) { + *fvec = aliasf[index - 1]; + return aliasflen[index - 1]; + } + HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", + af->getlinenum(), index); + *fvec = NULL; + return 0; +} + +/* parse morph alias definitions */ +int HashMgr::parse_aliasm(char* line, FileMgr* af) { + if (numaliasm != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return 1; + } + char* tp = line; + char* piece; + int i = 0; + int np = 0; + piece = mystrsep(&tp, 0); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numaliasm = atoi(piece); + if (numaliasm < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return 1; + } + aliasm = (char**)malloc(numaliasm * sizeof(char*)); + if (!aliasm) { + numaliasm = 0; + return 1; + } + np++; + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, 0); + } + if (np != 2) { + numaliasm = 0; + free(aliasm); + aliasm = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return 1; + } + + /* now parse the numaliasm lines to read in the remainder of the table */ + char* nl = line; + for (int j = 0; j < numaliasm; j++) { + if ((nl = af->getline()) == NULL) + return 1; + mychomp(nl); + tp = nl; + i = 0; + aliasm[j] = NULL; + piece = mystrsep(&tp, ' '); + while (piece) { + if (*piece != '\0') { + switch (i) { + case 0: { + if (strncmp(piece, "AM", 2) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + numaliasm = 0; + free(aliasm); + aliasm = NULL; + return 1; + } + break; + } + case 1: { + // add the remaining of the line + if (*tp) { + *(tp - 1) = ' '; + tp = tp + strlen(tp); + } + if (complexprefixes) { + if (utf8) + reverseword_utf(piece); + else + reverseword(piece); + } + aliasm[j] = mystrdup(piece); + if (!aliasm[j]) { + numaliasm = 0; + free(aliasm); + aliasm = NULL; + return 1; + } + break; + } + default: + break; + } + i++; + } + piece = mystrsep(&tp, ' '); + } + if (!aliasm[j]) { + numaliasm = 0; + free(aliasm); + aliasm = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return 1; + } + } + return 0; +} + +int HashMgr::is_aliasm() { + return (aliasm != NULL); +} + +char* HashMgr::get_aliasm(int index) { + if ((index > 0) && (index <= numaliasm)) + return aliasm[index - 1]; + HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); + return NULL; +} diff --git a/libs/hunspell/src/hashmgr.cxx b/libs/hunspell/src/hashmgr.cxx deleted file mode 100644 index dbcf56a51c..0000000000 --- a/libs/hunspell/src/hashmgr.cxx +++ /dev/null @@ -1,1116 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -/* - * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada - * And Contributors. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. All modifications to the source code must be clearly marked as - * such. Binary redistributions based on modified source code - * must be clearly marked as modified versions in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include - -#include "hashmgr.hxx" -#include "csutil.hxx" -#include "atypes.hxx" - -// build a hash table from a munched word list - -HashMgr::HashMgr(const char* tpath, const char* apath, const char* key) - : tablesize(0), - tableptr(NULL), - flag_mode(FLAG_CHAR), - complexprefixes(0), - utf8(0), - forbiddenword(FORBIDDENWORD) // forbidden word signing flag - , - numaliasf(0), - aliasf(NULL), - aliasflen(0), - numaliasm(0), - aliasm(NULL) { - langnum = 0; - lang = NULL; - enc = NULL; - csconv = 0; - ignorechars = NULL; - ignorechars_utf16 = NULL; - ignorechars_utf16_len = 0; - load_config(apath, key); - int ec = load_tables(tpath, key); - if (ec) { - /* error condition - what should we do here */ - HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n", ec); - if (tableptr) { - free(tableptr); - tableptr = NULL; - } - tablesize = 0; - } -} - -HashMgr::~HashMgr() { - if (tableptr) { - // now pass through hash table freeing up everything - // go through column by column of the table - for (int i = 0; i < tablesize; i++) { - struct hentry* pt = tableptr[i]; - struct hentry* nt = NULL; - while (pt) { - nt = pt->next; - if (pt->astr && - (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) - free(pt->astr); - free(pt); - pt = nt; - } - } - free(tableptr); - } - tablesize = 0; - - if (aliasf) { - for (int j = 0; j < (numaliasf); j++) - free(aliasf[j]); - free(aliasf); - aliasf = NULL; - if (aliasflen) { - free(aliasflen); - aliasflen = NULL; - } - } - if (aliasm) { - for (int j = 0; j < (numaliasm); j++) - free(aliasm[j]); - free(aliasm); - aliasm = NULL; - } - -#ifndef OPENOFFICEORG -#ifndef MOZILLA_CLIENT - if (utf8) - free_utf_tbl(); -#endif -#endif - - if (enc) - free(enc); - if (lang) - free(lang); - - if (ignorechars) - free(ignorechars); - if (ignorechars_utf16) - free(ignorechars_utf16); - -#ifdef MOZILLA_CLIENT - delete[] csconv; -#endif -} - -// lookup a root word in the hashtable - -struct hentry* HashMgr::lookup(const char* word) const { - struct hentry* dp; - if (tableptr) { - dp = tableptr[hash(word)]; - if (!dp) - return NULL; - for (; dp != NULL; dp = dp->next) { - if (strcmp(word, dp->word) == 0) - return dp; - } - } - return NULL; -} - -// add a word to the hash table (private) -int HashMgr::add_word(const char* word, - int wbl, - int wcl, - unsigned short* aff, - int al, - const char* desc, - bool onlyupcase) { - bool upcasehomonym = false; - int descl = desc ? (aliasm ? sizeof(char*) : strlen(desc) + 1) : 0; - // variable-length hash record with word and optional fields - struct hentry* hp = - (struct hentry*)malloc(sizeof(struct hentry) + wbl + descl); - if (!hp) - return 1; - char* hpw = hp->word; - strcpy(hpw, word); - if (ignorechars != NULL) { - if (utf8) { - remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len); - } else { - remove_ignored_chars(hpw, ignorechars); - } - } - if (complexprefixes) { - if (utf8) - reverseword_utf(hpw); - else - reverseword(hpw); - } - - int i = hash(hpw); - - hp->blen = (unsigned char)wbl; - hp->clen = (unsigned char)wcl; - hp->alen = (short)al; - hp->astr = aff; - hp->next = NULL; - hp->next_homonym = NULL; - - // store the description string or its pointer - if (desc) { - hp->var = H_OPT; - if (aliasm) { - hp->var += H_OPT_ALIASM; - store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc))); - } else { - strcpy(hpw + wbl + 1, desc); - if (complexprefixes) { - if (utf8) - reverseword_utf(HENTRY_DATA(hp)); - else - reverseword(HENTRY_DATA(hp)); - } - } - if (strstr(HENTRY_DATA(hp), MORPH_PHON)) - hp->var += H_OPT_PHON; - } else - hp->var = 0; - - struct hentry* dp = tableptr[i]; - if (!dp) { - tableptr[i] = hp; - return 0; - } - while (dp->next != NULL) { - if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { - // remove hidden onlyupcase homonym - if (!onlyupcase) { - if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { - free(dp->astr); - dp->astr = hp->astr; - dp->alen = hp->alen; - free(hp); - return 0; - } else { - dp->next_homonym = hp; - } - } else { - upcasehomonym = true; - } - } - dp = dp->next; - } - if (strcmp(hp->word, dp->word) == 0) { - // remove hidden onlyupcase homonym - if (!onlyupcase) { - if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { - free(dp->astr); - dp->astr = hp->astr; - dp->alen = hp->alen; - free(hp); - return 0; - } else { - dp->next_homonym = hp; - } - } else { - upcasehomonym = true; - } - } - if (!upcasehomonym) { - dp->next = hp; - } else { - // remove hidden onlyupcase homonym - if (hp->astr) - free(hp->astr); - free(hp); - } - return 0; -} - -int HashMgr::add_hidden_capitalized_word(char* word, - int wbl, - int wcl, - unsigned short* flags, - int flagslen, - char* dp, - int captype) { - if (flags == NULL) - flagslen = 0; - - // add inner capitalized forms to handle the following allcap forms: - // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG - // Allcaps with suffixes: CIA's -> CIA'S - if (((captype == HUHCAP) || (captype == HUHINITCAP) || - ((captype == ALLCAP) && (flagslen != 0))) && - !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) { - unsigned short* flags2 = - (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1)); - if (!flags2) - return 1; - if (flagslen) - memcpy(flags2, flags, flagslen * sizeof(unsigned short)); - flags2[flagslen] = ONLYUPCASEFLAG; - if (utf8) { - char st[BUFSIZE]; - w_char w[BUFSIZE]; - int wlen = u8_u16(w, BUFSIZE, word); - mkallsmall_utf(w, wlen, langnum); - mkallcap_utf(w, 1, langnum); - u16_u8(st, BUFSIZE, w, wlen); - return add_word(st, wbl, wcl, flags2, flagslen + 1, dp, true); - } else { - mkallsmall(word, csconv); - mkinitcap(word, csconv); - return add_word(word, wbl, wcl, flags2, flagslen + 1, dp, true); - } - } - return 0; -} - -// detect captype and modify word length for UTF-8 encoding -int HashMgr::get_clen_and_captype(const char* word, int wbl, int* captype) { - int len; - if (utf8) { - w_char dest_utf[BUFSIZE]; - len = u8_u16(dest_utf, BUFSIZE, word); - *captype = get_captype_utf8(dest_utf, len, langnum); - } else { - len = wbl; - *captype = get_captype((char*)word, len, csconv); - } - return len; -} - -// remove word (personal dictionary function for standalone applications) -int HashMgr::remove(const char* word) { - struct hentry* dp = lookup(word); - while (dp) { - if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { - unsigned short* flags = - (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1)); - if (!flags) - return 1; - for (int i = 0; i < dp->alen; i++) - flags[i] = dp->astr[i]; - flags[dp->alen] = forbiddenword; - dp->astr = flags; - dp->alen++; - flag_qsort(flags, 0, dp->alen); - } - dp = dp->next_homonym; - } - return 0; -} - -/* remove forbidden flag to add a personal word to the hash */ -int HashMgr::remove_forbidden_flag(const char* word) { - struct hentry* dp = lookup(word); - if (!dp) - return 1; - while (dp) { - if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) { - if (dp->alen == 1) - dp->alen = 0; // XXX forbidden words of personal dic. - else { - unsigned short* flags2 = - (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen - 1)); - if (!flags2) - return 1; - int i, j = 0; - for (i = 0; i < dp->alen; i++) { - if (dp->astr[i] != forbiddenword) - flags2[j++] = dp->astr[i]; - } - dp->alen--; - dp->astr = flags2; // XXX allowed forbidden words - } - } - dp = dp->next_homonym; - } - return 0; -} - -// add a custom dic. word to the hash table (public) -int HashMgr::add(const char* word) { - unsigned short* flags = NULL; - int al = 0; - if (remove_forbidden_flag(word)) { - int captype; - int wbl = strlen(word); - int wcl = get_clen_and_captype(word, wbl, &captype); - add_word(word, wbl, wcl, flags, al, NULL, false); - return add_hidden_capitalized_word((char*)word, wbl, wcl, flags, al, NULL, - captype); - } - return 0; -} - -int HashMgr::add_with_affix(const char* word, const char* example) { - // detect captype and modify word length for UTF-8 encoding - struct hentry* dp = lookup(example); - remove_forbidden_flag(word); - if (dp && dp->astr) { - int captype; - int wbl = strlen(word); - int wcl = get_clen_and_captype(word, wbl, &captype); - if (aliasf) { - add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false); - } else { - unsigned short* flags = - (unsigned short*)malloc(dp->alen * sizeof(unsigned short)); - if (flags) { - memcpy((void*)flags, (void*)dp->astr, - dp->alen * sizeof(unsigned short)); - add_word(word, wbl, wcl, flags, dp->alen, NULL, false); - } else - return 1; - } - return add_hidden_capitalized_word((char*)word, wbl, wcl, dp->astr, - dp->alen, NULL, captype); - } - return 1; -} - -// walk the hash table entry by entry - null at end -// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp); -struct hentry* HashMgr::walk_hashtable(int& col, struct hentry* hp) const { - if (hp && hp->next != NULL) - return hp->next; - for (col++; col < tablesize; col++) { - if (tableptr[col]) - return tableptr[col]; - } - // null at end and reset to start - col = -1; - return NULL; -} - -// load a munched word list and build a hash table on the fly -int HashMgr::load_tables(const char* tpath, const char* key) { - int al; - char* ap; - char* dp; - char* dp2; - unsigned short* flags; - char* ts; - - // open dictionary file - FileMgr* dict = new FileMgr(tpath, key); - if (dict == NULL) - return 1; - - // first read the first line of file to get hash table size */ - if ((ts = dict->getline()) == NULL) { - HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath); - delete dict; - return 2; - } - mychomp(ts); - - /* remove byte order mark */ - if (strncmp(ts, "\xEF\xBB\xBF", 3) == 0) { - memmove(ts, ts + 3, strlen(ts + 3) + 1); - // warning: dic file begins with byte order mark: possible incompatibility - // with old Hunspell versions - } - - tablesize = atoi(ts); - - int nExtra = 5 + USERWORD; - - if (tablesize <= 0 || - (tablesize >= (std::numeric_limits::max() - 1 - nExtra) / - int(sizeof(struct hentry*)))) { - HUNSPELL_WARNING( - stderr, "error: line 1: missing or bad word count in the dic file\n"); - delete dict; - return 4; - } - tablesize += nExtra; - if ((tablesize % 2) == 0) - tablesize++; - - // allocate the hash table - tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*)); - if (!tableptr) { - delete dict; - return 3; - } - - // loop through all words on much list and add to hash - // table and create word and affix strings - - while ((ts = dict->getline()) != NULL) { - mychomp(ts); - // split each line into word and morphological description - dp = ts; - while ((dp = strchr(dp, ':')) != NULL) { - if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) { - for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--) - ; - if (dp < ts) { // missing word - dp = NULL; - } else { - *(dp + 1) = '\0'; - dp = dp + 2; - } - break; - } - dp++; - } - - // tabulator is the old morphological field separator - dp2 = strchr(ts, '\t'); - if (dp2 && (!dp || dp2 < dp)) { - *dp2 = '\0'; - dp = dp2 + 1; - } - - // split each line into word and affix char strings - // "\/" signs slash in words (not affix separator) - // "/" at beginning of the line is word character (not affix separator) - ap = strchr(ts, '/'); - while (ap) { - if (ap == ts) { - ap++; - continue; - } else if (*(ap - 1) != '\\') - break; - // replace "\/" with "/" - for (char *sp = ap - 1; *sp; *sp = *(sp + 1), sp++) - ; - ap = strchr(ap, '/'); - } - - if (ap) { - *ap = '\0'; - if (aliasf) { - int index = atoi(ap + 1); - al = get_aliasf(index, &flags, dict); - if (!al) { - HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", - dict->getlinenum()); - *ap = '\0'; - } - } else { - al = decode_flags(&flags, ap + 1, dict); - if (al == -1) { - HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); - delete dict; - return 6; - } - flag_qsort(flags, 0, al); - } - } else { - al = 0; - ap = NULL; - flags = NULL; - } - - int captype; - int wbl = strlen(ts); - int wcl = get_clen_and_captype(ts, wbl, &captype); - // add the word and its index plus its capitalized form optionally - if (add_word(ts, wbl, wcl, flags, al, dp, false) || - add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) { - delete dict; - return 5; - } - } - - delete dict; - return 0; -} - -// the hash function is a simple load and rotate -// algorithm borrowed -int HashMgr::hash(const char* word) const { - unsigned long hv = 0; - for (int i = 0; i < 4 && *word != 0; i++) - hv = (hv << 8) | (*word++); - while (*word != 0) { - ROTATE(hv, ROTATE_LEN); - hv ^= (*word++); - } - return (unsigned long)hv % tablesize; -} - -int HashMgr::decode_flags(unsigned short** result, char* flags, FileMgr* af) { - int len; - if (*flags == '\0') { - *result = NULL; - return 0; - } - switch (flag_mode) { - case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) - len = strlen(flags); - if (len % 2 == 1) - HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", - af->getlinenum()); - len /= 2; - *result = (unsigned short*)malloc(len * sizeof(unsigned short)); - if (!*result) - return -1; - for (int i = 0; i < len; i++) { - (*result)[i] = (((unsigned short)flags[i * 2]) << 8) + - (unsigned short)flags[i * 2 + 1]; - } - break; - } - case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 - // 23 233) - int i; - len = 1; - char* src = flags; - unsigned short* dest; - char* p; - for (p = flags; *p; p++) { - if (*p == ',') - len++; - } - *result = (unsigned short*)malloc(len * sizeof(unsigned short)); - if (!*result) - return -1; - dest = *result; - for (p = flags; *p; p++) { - if (*p == ',') { - i = atoi(src); - if (i >= DEFAULTFLAGS) - HUNSPELL_WARNING( - stderr, "error: line %d: flag id %d is too large (max: %d)\n", - af->getlinenum(), i, DEFAULTFLAGS - 1); - *dest = (unsigned short)i; - if (*dest == 0) - HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", - af->getlinenum()); - src = p + 1; - dest++; - } - } - i = atoi(src); - if (i >= DEFAULTFLAGS) - HUNSPELL_WARNING(stderr, - "error: line %d: flag id %d is too large (max: %d)\n", - af->getlinenum(), i, DEFAULTFLAGS - 1); - *dest = (unsigned short)i; - if (*dest == 0) - HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", - af->getlinenum()); - break; - } - case FLAG_UNI: { // UTF-8 characters - w_char w[BUFSIZE / 2]; - len = u8_u16(w, BUFSIZE / 2, flags); - *result = (unsigned short*)malloc(len * sizeof(unsigned short)); - if (!*result) - return -1; - memcpy(*result, w, len * sizeof(short)); - break; - } - default: { // Ispell's one-character flags (erfg -> e r f g) - unsigned short* dest; - len = strlen(flags); - *result = (unsigned short*)malloc(len * sizeof(unsigned short)); - if (!*result) - return -1; - dest = *result; - for (unsigned char* p = (unsigned char*)flags; *p; p++) { - *dest = (unsigned short)*p; - dest++; - } - } - } - return len; -} - -unsigned short HashMgr::decode_flag(const char* f) { - unsigned short s = 0; - int i; - switch (flag_mode) { - case FLAG_LONG: - s = ((unsigned short)f[0] << 8) + (unsigned short)f[1]; - break; - case FLAG_NUM: - i = atoi(f); - if (i >= DEFAULTFLAGS) - HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", - i, DEFAULTFLAGS - 1); - s = (unsigned short)i; - break; - case FLAG_UNI: - u8_u16((w_char*)&s, 1, f); - break; - default: - s = (unsigned short)*((unsigned char*)f); - } - if (s == 0) - HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); - return s; -} - -char* HashMgr::encode_flag(unsigned short f) { - unsigned char ch[10]; - if (f == 0) - return mystrdup("(NULL)"); - if (flag_mode == FLAG_LONG) { - ch[0] = (unsigned char)(f >> 8); - ch[1] = (unsigned char)(f - ((f >> 8) << 8)); - ch[2] = '\0'; - } else if (flag_mode == FLAG_NUM) { - sprintf((char*)ch, "%d", f); - } else if (flag_mode == FLAG_UNI) { - u16_u8((char*)&ch, 10, (w_char*)&f, 1); - } else { - ch[0] = (unsigned char)(f); - ch[1] = '\0'; - } - return mystrdup((char*)ch); -} - -// read in aff file and set flag mode -int HashMgr::load_config(const char* affpath, const char* key) { - char* line; // io buffers - int firstline = 1; - - // open the affix file - FileMgr* afflst = new FileMgr(affpath, key); - if (!afflst) { - HUNSPELL_WARNING( - stderr, "Error - could not open affix description file %s\n", affpath); - return 1; - } - - // read in each line ignoring any that do not - // start with a known line type indicator - - while ((line = afflst->getline()) != NULL) { - mychomp(line); - - /* remove byte order mark */ - if (firstline) { - firstline = 0; - if (strncmp(line, "\xEF\xBB\xBF", 3) == 0) - memmove(line, line + 3, strlen(line + 3) + 1); - } - - /* parse in the try string */ - if ((strncmp(line, "FLAG", 4) == 0) && isspace(line[4])) { - if (flag_mode != FLAG_CHAR) { - HUNSPELL_WARNING(stderr, - "error: line %d: multiple definitions of the FLAG " - "affix file parameter\n", - afflst->getlinenum()); - } - if (strstr(line, "long")) - flag_mode = FLAG_LONG; - if (strstr(line, "num")) - flag_mode = FLAG_NUM; - if (strstr(line, "UTF-8")) - flag_mode = FLAG_UNI; - if (flag_mode == FLAG_CHAR) { - HUNSPELL_WARNING( - stderr, - "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", - afflst->getlinenum()); - } - } - if (strncmp(line, "FORBIDDENWORD", 13) == 0) { - char* st = NULL; - if (parse_string(line, &st, afflst->getlinenum())) { - delete afflst; - return 1; - } - forbiddenword = decode_flag(st); - free(st); - } - if (strncmp(line, "SET", 3) == 0) { - if (parse_string(line, &enc, afflst->getlinenum())) { - delete afflst; - return 1; - } - if (strcmp(enc, "UTF-8") == 0) { - utf8 = 1; -#ifndef OPENOFFICEORG -#ifndef MOZILLA_CLIENT - initialize_utf_tbl(); -#endif -#endif - } else - csconv = get_current_cs(enc); - } - if (strncmp(line, "LANG", 4) == 0) { - if (parse_string(line, &lang, afflst->getlinenum())) { - delete afflst; - return 1; - } - langnum = get_lang_num(lang); - } - - /* parse in the ignored characters (for example, Arabic optional diacritics - * characters */ - if (strncmp(line, "IGNORE", 6) == 0) { - if (parse_array(line, &ignorechars, &ignorechars_utf16, - &ignorechars_utf16_len, utf8, afflst->getlinenum())) { - delete afflst; - return 1; - } - } - - if ((strncmp(line, "AF", 2) == 0) && isspace(line[2])) { - if (parse_aliasf(line, afflst)) { - delete afflst; - return 1; - } - } - - if ((strncmp(line, "AM", 2) == 0) && isspace(line[2])) { - if (parse_aliasm(line, afflst)) { - delete afflst; - return 1; - } - } - - if (strncmp(line, "COMPLEXPREFIXES", 15) == 0) - complexprefixes = 1; - if (((strncmp(line, "SFX", 3) == 0) || (strncmp(line, "PFX", 3) == 0)) && - isspace(line[3])) - break; - } - if (csconv == NULL) - csconv = get_current_cs(SPELL_ENCODING); - delete afflst; - return 0; -} - -/* parse in the ALIAS table */ -int HashMgr::parse_aliasf(char* line, FileMgr* af) { - if (numaliasf != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", - af->getlinenum()); - return 1; - } - char* tp = line; - char* piece; - int i = 0; - int np = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - numaliasf = atoi(piece); - if (numaliasf < 1) { - numaliasf = 0; - aliasf = NULL; - aliasflen = NULL; - HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", - af->getlinenum()); - return 1; - } - aliasf = - (unsigned short**)malloc(numaliasf * sizeof(unsigned short*)); - aliasflen = - (unsigned short*)malloc(numaliasf * sizeof(unsigned short)); - if (!aliasf || !aliasflen) { - numaliasf = 0; - if (aliasf) - free(aliasf); - if (aliasflen) - free(aliasflen); - aliasf = NULL; - aliasflen = NULL; - return 1; - } - np++; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (np != 2) { - numaliasf = 0; - free(aliasf); - free(aliasflen); - aliasf = NULL; - aliasflen = NULL; - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", - af->getlinenum()); - return 1; - } - - /* now parse the numaliasf lines to read in the remainder of the table */ - char* nl; - for (int j = 0; j < numaliasf; j++) { - if ((nl = af->getline()) == NULL) - return 1; - mychomp(nl); - tp = nl; - i = 0; - aliasf[j] = NULL; - aliasflen[j] = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - if (strncmp(piece, "AF", 2) != 0) { - numaliasf = 0; - free(aliasf); - free(aliasflen); - aliasf = NULL; - aliasflen = NULL; - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - return 1; - } - break; - } - case 1: { - aliasflen[j] = - (unsigned short)decode_flags(&(aliasf[j]), piece, af); - flag_qsort(aliasf[j], 0, aliasflen[j]); - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (!aliasf[j]) { - free(aliasf); - free(aliasflen); - aliasf = NULL; - aliasflen = NULL; - numaliasf = 0; - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - return 1; - } - } - return 0; -} - -int HashMgr::is_aliasf() { - return (aliasf != NULL); -} - -int HashMgr::get_aliasf(int index, unsigned short** fvec, FileMgr* af) { - if ((index > 0) && (index <= numaliasf)) { - *fvec = aliasf[index - 1]; - return aliasflen[index - 1]; - } - HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", - af->getlinenum(), index); - *fvec = NULL; - return 0; -} - -/* parse morph alias definitions */ -int HashMgr::parse_aliasm(char* line, FileMgr* af) { - if (numaliasm != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", - af->getlinenum()); - return 1; - } - char* tp = line; - char* piece; - int i = 0; - int np = 0; - piece = mystrsep(&tp, 0); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - np++; - break; - } - case 1: { - numaliasm = atoi(piece); - if (numaliasm < 1) { - HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", - af->getlinenum()); - return 1; - } - aliasm = (char**)malloc(numaliasm * sizeof(char*)); - if (!aliasm) { - numaliasm = 0; - return 1; - } - np++; - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, 0); - } - if (np != 2) { - numaliasm = 0; - free(aliasm); - aliasm = NULL; - HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", - af->getlinenum()); - return 1; - } - - /* now parse the numaliasm lines to read in the remainder of the table */ - char* nl = line; - for (int j = 0; j < numaliasm; j++) { - if ((nl = af->getline()) == NULL) - return 1; - mychomp(nl); - tp = nl; - i = 0; - aliasm[j] = NULL; - piece = mystrsep(&tp, ' '); - while (piece) { - if (*piece != '\0') { - switch (i) { - case 0: { - if (strncmp(piece, "AM", 2) != 0) { - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - numaliasm = 0; - free(aliasm); - aliasm = NULL; - return 1; - } - break; - } - case 1: { - // add the remaining of the line - if (*tp) { - *(tp - 1) = ' '; - tp = tp + strlen(tp); - } - if (complexprefixes) { - if (utf8) - reverseword_utf(piece); - else - reverseword(piece); - } - aliasm[j] = mystrdup(piece); - if (!aliasm[j]) { - numaliasm = 0; - free(aliasm); - aliasm = NULL; - return 1; - } - break; - } - default: - break; - } - i++; - } - piece = mystrsep(&tp, ' '); - } - if (!aliasm[j]) { - numaliasm = 0; - free(aliasm); - aliasm = NULL; - HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", - af->getlinenum()); - return 1; - } - } - return 0; -} - -int HashMgr::is_aliasm() { - return (aliasm != NULL); -} - -char* HashMgr::get_aliasm(int index) { - if ((index > 0) && (index <= numaliasm)) - return aliasm[index - 1]; - HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); - return NULL; -} diff --git a/libs/hunspell/src/hunspell.c++ b/libs/hunspell/src/hunspell.c++ new file mode 100644 index 0000000000..726c72931a --- /dev/null +++ b/libs/hunspell/src/hunspell.c++ @@ -0,0 +1,2367 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Hunspell, based on MySpell. + * + * The Initial Developers of the Original Code are + * Kevin Hendricks (MySpell) and Németh László (Hunspell). + * Portions created by the Initial Developers are Copyright (C) 2002-2005 + * the Initial Developers. All Rights Reserved. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +#include "hunspell.hxx" +#include "hunspell.h" +#ifndef MOZILLA_CLIENT +#include "config.h" +#endif +#include "csutil.hxx" + +#include +#include + +Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) { + encoding = NULL; + csconv = NULL; + utf8 = 0; + complexprefixes = 0; + affixpath = mystrdup(affpath); + maxdic = 0; + + /* first set up the hash manager */ + pHMgr[0] = new HashMgr(dpath, affpath, key); + if (pHMgr[0]) + maxdic = 1; + + /* next set up the affix manager */ + /* it needs access to the hash manager lookup methods */ + pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key); + + /* get the preferred try string and the dictionary */ + /* encoding from the Affix Manager for that dictionary */ + char* try_string = pAMgr->get_try_string(); + encoding = pAMgr->get_encoding(); + langnum = pAMgr->get_langnum(); + utf8 = pAMgr->get_utf8(); + if (!utf8) + csconv = get_current_cs(encoding); + complexprefixes = pAMgr->get_complexprefixes(); + wordbreak = pAMgr->get_breaktable(); + + /* and finally set up the suggestion manager */ + pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr); + if (try_string) + free(try_string); +} + +Hunspell::~Hunspell() { + delete pSMgr; + delete pAMgr; + for (int i = 0; i < maxdic; i++) + delete pHMgr[i]; + maxdic = 0; + pSMgr = NULL; + pAMgr = NULL; +#ifdef MOZILLA_CLIENT + delete[] csconv; +#endif + csconv = NULL; + if (encoding) + free(encoding); + encoding = NULL; + if (affixpath) + free(affixpath); + affixpath = NULL; +} + +// load extra dictionaries +int Hunspell::add_dic(const char* dpath, const char* key) { + if (maxdic == MAXDIC || !affixpath) + return 1; + pHMgr[maxdic] = new HashMgr(dpath, affixpath, key); + if (pHMgr[maxdic]) + maxdic++; + else + return 1; + return 0; +} + +// make a copy of src at destination while removing all leading +// blanks and removing any trailing periods after recording +// their presence with the abbreviation flag +// also since already going through character by character, +// set the capitalization type +// return the length of the "cleaned" (and UTF-8 encoded) word + +int Hunspell::cleanword2(char* dest, + const char* src, + w_char* dest_utf, + int* nc, + int* pcaptype, + int* pabbrev) { + unsigned char* p = (unsigned char*)dest; + const unsigned char* q = (const unsigned char*)src; + + // first skip over any leading blanks + while ((*q != '\0') && (*q == ' ')) + q++; + + // now strip off any trailing periods (recording their presence) + *pabbrev = 0; + int nl = strlen((const char*)q); + while ((nl > 0) && (*(q + nl - 1) == '.')) { + nl--; + (*pabbrev)++; + } + + // if no characters are left it can't be capitalized + if (nl <= 0) { + *pcaptype = NOCAP; + *p = '\0'; + return 0; + } + + strncpy(dest, (char*)q, nl); + *(dest + nl) = '\0'; + nl = strlen(dest); + if (utf8) { + *nc = u8_u16(dest_utf, MAXWORDLEN, dest); + // don't check too long words + if (*nc >= MAXWORDLEN) + return 0; + if (*nc == -1) { // big Unicode character (non BMP area) + *pcaptype = NOCAP; + return nl; + } + *pcaptype = get_captype_utf8(dest_utf, *nc, langnum); + } else { + *pcaptype = get_captype(dest, nl, csconv); + *nc = nl; + } + return nl; +} + +int Hunspell::cleanword(char* dest, + const char* src, + int* pcaptype, + int* pabbrev) { + unsigned char* p = (unsigned char*)dest; + const unsigned char* q = (const unsigned char*)src; + int firstcap = 0; + + // first skip over any leading blanks + while ((*q != '\0') && (*q == ' ')) + q++; + + // now strip off any trailing periods (recording their presence) + *pabbrev = 0; + int nl = strlen((const char*)q); + while ((nl > 0) && (*(q + nl - 1) == '.')) { + nl--; + (*pabbrev)++; + } + + // if no characters are left it can't be capitalized + if (nl <= 0) { + *pcaptype = NOCAP; + *p = '\0'; + return 0; + } + + // now determine the capitalization type of the first nl letters + int ncap = 0; + int nneutral = 0; + int nc = 0; + + if (!utf8) { + while (nl > 0) { + nc++; + if (csconv[(*q)].ccase) + ncap++; + if (csconv[(*q)].cupper == csconv[(*q)].clower) + nneutral++; + *p++ = *q++; + nl--; + } + // remember to terminate the destination string + *p = '\0'; + firstcap = csconv[(unsigned char)(*dest)].ccase; + } else { + unsigned short idx; + w_char t[MAXWORDLEN]; + nc = u8_u16(t, MAXWORDLEN, src); + for (int i = 0; i < nc; i++) { + idx = (t[i].h << 8) + t[i].l; + unsigned short low = unicodetolower(idx, langnum); + if (idx != low) + ncap++; + if (unicodetoupper(idx, langnum) == low) + nneutral++; + } + u16_u8(dest, MAXWORDUTF8LEN, t, nc); + if (ncap) { + idx = (t[0].h << 8) + t[0].l; + firstcap = (idx != unicodetolower(idx, langnum)); + } + } + + // now finally set the captype + if (ncap == 0) { + *pcaptype = NOCAP; + } else if ((ncap == 1) && firstcap) { + *pcaptype = INITCAP; + } else if ((ncap == nc) || ((ncap + nneutral) == nc)) { + *pcaptype = ALLCAP; + } else if ((ncap > 1) && firstcap) { + *pcaptype = HUHINITCAP; + } else { + *pcaptype = HUHCAP; + } + return strlen(dest); +} + +void Hunspell::mkallcap(char* p) { + if (utf8) { + w_char u[MAXWORDLEN]; + int nc = u8_u16(u, MAXWORDLEN, p); + unsigned short idx; + for (int i = 0; i < nc; i++) { + idx = (u[i].h << 8) + u[i].l; + if (idx != unicodetoupper(idx, langnum)) { + u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); + u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); + } + } + u16_u8(p, MAXWORDUTF8LEN, u, nc); + } else { + while (*p != '\0') { + *p = csconv[((unsigned char)*p)].cupper; + p++; + } + } +} + +int Hunspell::mkallcap2(char* p, w_char* u, int nc) { + if (utf8) { + unsigned short idx; + for (int i = 0; i < nc; i++) { + idx = (u[i].h << 8) + u[i].l; + unsigned short up = unicodetoupper(idx, langnum); + if (idx != up) { + u[i].h = (unsigned char)(up >> 8); + u[i].l = (unsigned char)(up & 0x00FF); + } + } + u16_u8(p, MAXWORDUTF8LEN, u, nc); + return strlen(p); + } else { + while (*p != '\0') { + *p = csconv[((unsigned char)*p)].cupper; + p++; + } + } + return nc; +} + +void Hunspell::mkallsmall(char* p) { + while (*p != '\0') { + *p = csconv[((unsigned char)*p)].clower; + p++; + } +} + +int Hunspell::mkallsmall2(char* p, w_char* u, int nc) { + if (utf8) { + unsigned short idx; + for (int i = 0; i < nc; i++) { + idx = (u[i].h << 8) + u[i].l; + unsigned short low = unicodetolower(idx, langnum); + if (idx != low) { + u[i].h = (unsigned char)(low >> 8); + u[i].l = (unsigned char)(low & 0x00FF); + } + } + u16_u8(p, MAXWORDUTF8LEN, u, nc); + return strlen(p); + } else { + while (*p != '\0') { + *p = csconv[((unsigned char)*p)].clower; + p++; + } + } + return nc; +} + +// convert UTF-8 sharp S codes to latin 1 +char* Hunspell::sharps_u8_l1(char* dest, char* source) { + char* p = dest; + *p = *source; + for (p++, source++; *(source - 1); p++, source++) { + *p = *source; + if (*source == '\x9F') + *--p = '\xDF'; + } + return dest; +} + +// recursive search for right ss - sharp s permutations +hentry* Hunspell::spellsharps(char* base, + char* pos, + int n, + int repnum, + char* tmp, + int* info, + char** root) { + pos = strstr(pos, "ss"); + if (pos && (n < MAXSHARPS)) { + *pos = '\xC3'; + *(pos + 1) = '\x9F'; + hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root); + if (h) + return h; + *pos = 's'; + *(pos + 1) = 's'; + h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root); + if (h) + return h; + } else if (repnum > 0) { + if (utf8) + return checkword(base, info, root); + return checkword(sharps_u8_l1(tmp, base), info, root); + } + return NULL; +} + +int Hunspell::is_keepcase(const hentry* rv) { + return pAMgr && rv->astr && pAMgr->get_keepcase() && + TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen); +} + +/* insert a word to the beginning of the suggestion array and return ns */ +int Hunspell::insert_sug(char*** slst, char* word, int ns) { + if (!*slst) + return ns; + char* dup = mystrdup(word); + if (!dup) + return ns; + if (ns == MAXSUGGESTION) { + ns--; + free((*slst)[ns]); + } + for (int k = ns; k > 0; k--) + (*slst)[k] = (*slst)[k - 1]; + (*slst)[0] = dup; + return ns + 1; +} + +int Hunspell::spell(const char* word, int* info, char** root) { + struct hentry* rv = NULL; + // need larger vector. For example, Turkish capital letter I converted a + // 2-byte UTF-8 character (dotless i) by mkallsmall. + char cw[MAXWORDUTF8LEN]; + char wspace[MAXWORDUTF8LEN]; + w_char unicw[MAXWORDLEN]; + + int info2 = 0; + if (!info) + info = &info2; + else + *info = 0; + + // Hunspell supports XML input of the simplified API (see manual) + if (strcmp(word, SPELL_XML) == 0) + return 1; + int nc = strlen(word); + int wl2 = 0; + if (utf8) { + if (nc >= MAXWORDUTF8LEN) + return 0; + } else { + if (nc >= MAXWORDLEN) + return 0; + } + int captype = 0; + int abbv = 0; + int wl = 0; + + // input conversion + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; + if (convstatus < 0) + return 0; + else if (convstatus > 0) + wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); + else + wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); + +#ifdef MOZILLA_CLIENT + // accept the abbreviated words without dots + // workaround for the incomplete tokenization of Mozilla + abbv = 1; +#endif + + if (wl == 0 || maxdic == 0) + return 1; + if (root) + *root = NULL; + + // allow numbers with dots, dashes and commas (but forbid double separators: + // "..", "--" etc.) + enum { NBEGIN, NNUM, NSEP }; + int nstate = NBEGIN; + int i; + + for (i = 0; (i < wl); i++) { + if ((cw[i] <= '9') && (cw[i] >= '0')) { + nstate = NNUM; + } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) { + if ((nstate == NSEP) || (i == 0)) + break; + nstate = NSEP; + } else + break; + } + if ((i == wl) && (nstate == NNUM)) + return 1; + + switch (captype) { + case HUHCAP: + /* FALLTHROUGH */ + case HUHINITCAP: + *info += SPELL_ORIGCAP; + /* FALLTHROUGH */ + case NOCAP: + rv = checkword(cw, info, root); + if ((abbv) && !(rv)) { + memcpy(wspace, cw, wl); + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + rv = checkword(wspace, info, root); + } + break; + case ALLCAP: { + *info += SPELL_ORIGCAP; + rv = checkword(cw, info, root); + if (rv) + break; + if (abbv) { + memcpy(wspace, cw, wl); + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + rv = checkword(wspace, info, root); + if (rv) + break; + } + // Spec. prefix handling for Catalan, French, Italian: + // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia). + if (pAMgr && strchr(cw, '\'')) { + mkallsmall2(cw, unicw, nc); + // There are no really sane circumstances where this could fail, + // but anyway... + if (char* apostrophe = strchr(cw, '\'')) { + if (utf8) { + w_char tmpword[MAXWORDLEN]; + *apostrophe = '\0'; + wl2 = u8_u16(tmpword, MAXWORDLEN, cw); + *apostrophe = '\''; + if (wl2 >= 0 && wl2 < nc) { + mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1); + rv = checkword(cw, info, root); + if (rv) + break; + } + } else { + mkinitcap2(apostrophe + 1, unicw, nc); + rv = checkword(cw, info, root); + if (rv) + break; + } + } + mkinitcap2(cw, unicw, nc); + rv = checkword(cw, info, root); + if (rv) + break; + } + if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) { + char tmpword[MAXWORDUTF8LEN]; + wl = mkallsmall2(cw, unicw, nc); + memcpy(wspace, cw, (wl + 1)); + rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); + if (!rv) { + wl2 = mkinitcap2(cw, unicw, nc); + rv = spellsharps(cw, cw, 0, 0, tmpword, info, root); + } + if ((abbv) && !(rv)) { + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); + if (!rv) { + memcpy(wspace, cw, wl2); + *(wspace + wl2) = '.'; + *(wspace + wl2 + 1) = '\0'; + rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); + } + } + if (rv) + break; + } + } + case INITCAP: { + *info += SPELL_ORIGCAP; + wl = mkallsmall2(cw, unicw, nc); + memcpy(wspace, cw, (wl + 1)); + wl2 = mkinitcap2(cw, unicw, nc); + if (captype == INITCAP) + *info += SPELL_INITCAP; + rv = checkword(cw, info, root); + if (captype == INITCAP) + *info -= SPELL_INITCAP; + // forbid bad capitalization + // (for example, ijs -> Ijs instead of IJs in Dutch) + // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag) + if (*info & SPELL_FORBIDDEN) { + rv = NULL; + break; + } + if (rv && is_keepcase(rv) && (captype == ALLCAP)) + rv = NULL; + if (rv) + break; + + rv = checkword(wspace, info, root); + if (abbv && !rv) { + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + rv = checkword(wspace, info, root); + if (!rv) { + memcpy(wspace, cw, wl2); + *(wspace + wl2) = '.'; + *(wspace + wl2 + 1) = '\0'; + if (captype == INITCAP) + *info += SPELL_INITCAP; + rv = checkword(wspace, info, root); + if (captype == INITCAP) + *info -= SPELL_INITCAP; + if (rv && is_keepcase(rv) && (captype == ALLCAP)) + rv = NULL; + break; + } + } + if (rv && is_keepcase(rv) && + ((captype == ALLCAP) || + // if CHECKSHARPS: KEEPCASE words with \xDF are allowed + // in INITCAP form, too. + !(pAMgr->get_checksharps() && + ((utf8 && strstr(wspace, "\xC3\x9F")) || + (!utf8 && strchr(wspace, '\xDF')))))) + rv = NULL; + break; + } + } + + if (rv) { + if (pAMgr && pAMgr->get_warn() && rv->astr && + TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) { + *info += SPELL_WARN; + if (pAMgr->get_forbidwarn()) + return 0; + return HUNSPELL_OK_WARN; + } + return HUNSPELL_OK; + } + + // recursive breaking at break points + if (wordbreak) { + char* s; + char r; + int nbr = 0; + wl = strlen(cw); + int numbreak = pAMgr ? pAMgr->get_numbreak() : 0; + + // calculate break points for recursion limit + for (int j = 0; j < numbreak; j++) { + s = cw; + do { + s = (char*)strstr(s, wordbreak[j]); + if (s) { + nbr++; + s++; + } + } while (s); + } + if (nbr >= 10) + return 0; + + // check boundary patterns (^begin and end$) + for (int j = 0; j < numbreak; j++) { + int plen = strlen(wordbreak[j]); + if (plen == 1 || plen > wl) + continue; + if (wordbreak[j][0] == '^' && + strncmp(cw, wordbreak[j] + 1, plen - 1) == 0 && spell(cw + plen - 1)) + return 1; + if (wordbreak[j][plen - 1] == '$' && + strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) { + r = cw[wl - plen + 1]; + cw[wl - plen + 1] = '\0'; + if (spell(cw)) + return 1; + cw[wl - plen + 1] = r; + } + } + + // other patterns + for (int j = 0; j < numbreak; j++) { + int plen = strlen(wordbreak[j]); + s = (char*)strstr(cw, wordbreak[j]); + if (s && (s > cw) && (s < cw + wl - plen)) { + if (!spell(s + plen)) + continue; + r = *s; + *s = '\0'; + // examine 2 sides of the break point + if (spell(cw)) + return 1; + *s = r; + + // LANG_hu: spec. dash rule + if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) { + r = s[1]; + s[1] = '\0'; + if (spell(cw)) + return 1; // check the first part with dash + s[1] = r; + } + // end of LANG speficic region + } + } + } + + return 0; +} + +struct hentry* Hunspell::checkword(const char* w, int* info, char** root) { + struct hentry* he = NULL; + bool usebuffer = false; + int len, i; + std::string w2; + const char* word; + + char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL; + if (ignoredchars != NULL) { + w2.assign(w); + if (utf8) { + int ignoredchars_utf16_len; + unsigned short* ignoredchars_utf16 = + pAMgr->get_ignore_utf16(&ignoredchars_utf16_len); + remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len); + } else { + remove_ignored_chars(w2, ignoredchars); + } + word = w2.c_str(); + usebuffer = true; + } else + word = w; + + len = strlen(word); + + if (!len) + return NULL; + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + if (!usebuffer) { + w2.assign(word); + usebuffer = true; + } + if (utf8) + reverseword_utf(w2); + else + reverseword(w2); + } + + if (usebuffer) { + word = w2.c_str(); + } + + // look word in hash table + for (i = 0; (i < maxdic) && !he; i++) { + he = (pHMgr[i])->lookup(word); + + // check forbidden and onlyincompound words + if ((he) && (he->astr) && (pAMgr) && + TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { + if (info) + *info += SPELL_FORBIDDEN; + // LANG_hu section: set dash information for suggestions + if (langnum == LANG_hu) { + if (pAMgr->get_compoundflag() && + TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) { + if (info) + *info += SPELL_COMPOUND; + } + } + return NULL; + } + + // he = next not needaffix, onlyincompound homonym or onlyupcase word + while (he && (he->astr) && pAMgr && + ((pAMgr->get_needaffix() && + TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) || + (pAMgr->get_onlyincompound() && + TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || + (info && (*info & SPELL_INITCAP) && + TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) + he = he->next_homonym; + } + + // check with affixes + if (!he && pAMgr) { + // try stripping off affixes */ + he = pAMgr->affix_check(word, len, 0); + + // check compound restriction and onlyupcase + if (he && he->astr && + ((pAMgr->get_onlyincompound() && + TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || + (info && (*info & SPELL_INITCAP) && + TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) { + he = NULL; + } + + if (he) { + if ((he->astr) && (pAMgr) && + TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { + if (info) + *info += SPELL_FORBIDDEN; + return NULL; + } + if (root) { + *root = mystrdup(he->word); + if (*root && complexprefixes) { + if (utf8) + reverseword_utf(*root); + else + reverseword(*root); + } + } + // try check compound word + } else if (pAMgr->get_compound()) { + he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info); + // LANG_hu section: `moving rule' with last dash + if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) { + char* dup = mystrdup(word); + if (!dup) + return NULL; + dup[len - 1] = '\0'; + he = pAMgr->compound_check(dup, len - 1, -5, 0, 100, 0, NULL, 1, 0, + info); + free(dup); + } + // end of LANG speficic region + if (he) { + if (root) { + *root = mystrdup(he->word); + if (*root && complexprefixes) { + if (utf8) + reverseword_utf(*root); + else + reverseword(*root); + } + } + if (info) + *info += SPELL_COMPOUND; + } + } + } + + return he; +} + +int Hunspell::suggest(char*** slst, const char* word) { + int onlycmpdsug = 0; + char cw[MAXWORDUTF8LEN]; + char wspace[MAXWORDUTF8LEN]; + if (!pSMgr || maxdic == 0) + return 0; + w_char unicw[MAXWORDLEN]; + *slst = NULL; + // process XML input of the simplified API (see manual) + if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) { + return spellml(slst, word); + } + int nc = strlen(word); + if (utf8) { + if (nc >= MAXWORDUTF8LEN) + return 0; + } else { + if (nc >= MAXWORDLEN) + return 0; + } + int captype = 0; + int abbv = 0; + int wl = 0; + + // input conversion + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; + if (convstatus < 0) + return 0; + else if (convstatus > 0) + wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); + else + wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); + + if (wl == 0) + return 0; + int ns = 0; + int capwords = 0; + + // check capitalized form for FORCEUCASE + if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) { + int info = SPELL_ORIGCAP; + char** wlst; + if (checkword(cw, &info, NULL)) { + if (*slst) { + wlst = *slst; + } else { + wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*)); + if (wlst == NULL) + return -1; + *slst = wlst; + for (int i = 0; i < MAXSUGGESTION; i++) { + wlst[i] = NULL; + } + } + wlst[0] = mystrdup(cw); + mkinitcap(wlst[0]); + return 1; + } + } + + switch (captype) { + case NOCAP: { + ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); + break; + } + + case INITCAP: { + capwords = 1; + ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); + if (ns == -1) + break; + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + break; + } + case HUHINITCAP: + capwords = 1; + case HUHCAP: { + ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); + if (ns != -1) { + int prevns; + // something.The -> something. The + char* dot = strchr(cw, '.'); + if (dot && (dot > cw)) { + int captype_; + if (utf8) { + w_char w_[MAXWORDLEN]; + int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1); + captype_ = get_captype_utf8(w_, wl_, langnum); + } else + captype_ = get_captype(dot + 1, strlen(dot + 1), csconv); + if (captype_ == INITCAP) { + char* st = mystrdup(cw); + if (st) { + char* newst = (char*)realloc(st, wl + 2); + if (newst == NULL) + free(st); + st = newst; + } + if (st) { + st[(dot - cw) + 1] = ' '; + strcpy(st + (dot - cw) + 2, dot + 1); + ns = insert_sug(slst, st, ns); + free(st); + } + } + } + if (captype == HUHINITCAP) { + // TheOpenOffice.org -> The OpenOffice.org + memcpy(wspace, cw, (wl + 1)); + mkinitsmall2(wspace, unicw, nc); + ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + } + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + if (spell(wspace)) + ns = insert_sug(slst, wspace, ns); + prevns = ns; + ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + if (captype == HUHINITCAP) { + mkinitcap2(wspace, unicw, nc); + if (spell(wspace)) + ns = insert_sug(slst, wspace, ns); + ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + } + // aNew -> "a New" (instead of "a new") + for (int j = prevns; j < ns; j++) { + char* space = strchr((*slst)[j], ' '); + if (space) { + int slen = strlen(space + 1); + // different case after space (need capitalisation) + if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) { + w_char w[MAXWORDLEN]; + int wc = 0; + char* r = (*slst)[j]; + if (utf8) + wc = u8_u16(w, MAXWORDLEN, space + 1); + mkinitcap2(space + 1, w, wc); + // set as first suggestion + for (int k = j; k > 0; k--) + (*slst)[k] = (*slst)[k - 1]; + (*slst)[0] = r; + } + } + } + } + break; + } + + case ALLCAP: { + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + if (ns == -1) + break; + if (pAMgr && pAMgr->get_keepcase() && spell(wspace)) + ns = insert_sug(slst, wspace, ns); + mkinitcap2(wspace, unicw, nc); + ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); + for (int j = 0; j < ns; j++) { + mkallcap((*slst)[j]); + if (pAMgr && pAMgr->get_checksharps()) { + char* pos; + if (utf8) { + pos = strstr((*slst)[j], "\xC3\x9F"); + while (pos) { + *pos = 'S'; + *(pos + 1) = 'S'; + pos = strstr(pos + 2, "\xC3\x9F"); + } + } else { + pos = strchr((*slst)[j], '\xDF'); + while (pos) { + (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 2); + mystrrep((*slst)[j], "\xDF", "SS"); + pos = strchr((*slst)[j], '\xDF'); + } + } + } + } + break; + } + } + + // LANG_hu section: replace '-' with ' ' in Hungarian + if (langnum == LANG_hu) { + for (int j = 0; j < ns; j++) { + char* pos = strchr((*slst)[j], '-'); + if (pos) { + int info; + char w[MAXWORDUTF8LEN]; + *pos = '\0'; + strcpy(w, (*slst)[j]); + strcat(w, pos + 1); + (void)spell(w, &info, NULL); + if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { + *pos = ' '; + } else + *pos = '-'; + } + } + } + // END OF LANG_hu section + + // try ngram approach since found nothing or only compound words + if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && + (*slst)) { + switch (captype) { + case NOCAP: { + ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic); + break; + } + case HUHINITCAP: + capwords = 1; + case HUHCAP: { + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); + break; + } + case INITCAP: { + capwords = 1; + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); + break; + } + case ALLCAP: { + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + int oldns = ns; + ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); + for (int j = oldns; j < ns; j++) + mkallcap((*slst)[j]); + break; + } + } + } + + // try dash suggestion (Afo-American -> Afro-American) + if (char* pos = strchr(cw, '-')) { + char* ppos = cw; + int nodashsug = 1; + char** nlst = NULL; + int nn = 0; + int last = 0; + if (*slst) { + for (int j = 0; j < ns && nodashsug == 1; j++) { + if (strchr((*slst)[j], '-')) + nodashsug = 0; + } + } + while (nodashsug && !last) { + if (*pos == '\0') + last = 1; + else + *pos = '\0'; + if (!spell(ppos)) { + nn = suggest(&nlst, ppos); + for (int j = nn - 1; j >= 0; j--) { + strncpy(wspace, cw, ppos - cw); + strcpy(wspace + (ppos - cw), nlst[j]); + if (!last) { + strcat(wspace, "-"); + strcat(wspace, pos + 1); + } + ns = insert_sug(slst, wspace, ns); + free(nlst[j]); + } + if (nlst != NULL) + free(nlst); + nodashsug = 0; + } + if (!last) { + *pos = '-'; + ppos = pos + 1; + pos = strchr(ppos, '-'); + } + if (!pos) + pos = cw + strlen(cw); + } + } + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + for (int j = 0; j < ns; j++) { + if (utf8) + reverseword_utf((*slst)[j]); + else + reverseword((*slst)[j]); + } + } + + // capitalize + if (capwords) + for (int j = 0; j < ns; j++) { + mkinitcap((*slst)[j]); + } + + // expand suggestions with dot(s) + if (abbv && pAMgr && pAMgr->get_sugswithdots()) { + for (int j = 0; j < ns; j++) { + (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv); + strcat((*slst)[j], word + strlen(word) - abbv); + } + } + + // remove bad capitalized and forbidden forms + if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) { + switch (captype) { + case INITCAP: + case ALLCAP: { + int l = 0; + for (int j = 0; j < ns; j++) { + if (!strchr((*slst)[j], ' ') && !spell((*slst)[j])) { + char s[MAXSWUTF8L]; + w_char w[MAXSWL]; + int len; + if (utf8) { + len = u8_u16(w, MAXSWL, (*slst)[j]); + } else { + strcpy(s, (*slst)[j]); + len = strlen(s); + } + mkallsmall2(s, w, len); + free((*slst)[j]); + if (spell(s)) { + (*slst)[l] = mystrdup(s); + if ((*slst)[l]) + l++; + } else { + mkinitcap2(s, w, len); + if (spell(s)) { + (*slst)[l] = mystrdup(s); + if ((*slst)[l]) + l++; + } + } + } else { + (*slst)[l] = (*slst)[j]; + l++; + } + } + ns = l; + } + } + } + + // remove duplications + int l = 0; + for (int j = 0; j < ns; j++) { + (*slst)[l] = (*slst)[j]; + for (int k = 0; k < l; k++) { + if (strcmp((*slst)[k], (*slst)[j]) == 0) { + free((*slst)[j]); + l--; + break; + } + } + l++; + } + ns = l; + + // output conversion + rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + for (int j = 0; rl && j < ns; j++) { + if (rl->conv((*slst)[j], wspace, MAXWORDUTF8LEN) > 0) { + free((*slst)[j]); + (*slst)[j] = mystrdup(wspace); + } + } + + // if suggestions removed by nosuggest, onlyincompound parameters + if (l == 0 && *slst) { + free(*slst); + *slst = NULL; + } + return l; +} + +void Hunspell::free_list(char*** slst, int n) { + freelist(slst, n); +} + +char* Hunspell::get_dic_encoding() { + return encoding; +} + +#ifdef HUNSPELL_EXPERIMENTAL +// XXX UTF-8 support is OK? +int Hunspell::suggest_auto(char*** slst, const char* word) { + char cw[MAXWORDUTF8LEN]; + char wspace[MAXWORDUTF8LEN]; + if (!pSMgr || maxdic == 0) + return 0; + w_char unicw[MAXWORDLEN]; + int nc = strlen(word); + if (utf8) { + if (nc >= MAXWORDUTF8LEN) + return 0; + } else { + if (nc >= MAXWORDLEN) + return 0; + } + int captype = 0; + int abbv = 0; + int wl = 0; + + // input conversion + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + int convstatus = rl ? rl->conv(word, wspace) : 0; + if (convstatus < 0) + return 0; + else if (convstatus > 0) + wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); + else + wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); + + if (wl == 0) + return 0; + int ns = 0; + *slst = NULL; // HU, nsug in pSMgr->suggest + + switch (captype) { + case NOCAP: { + ns = pSMgr->suggest_auto(slst, cw, ns); + if (ns > 0) + break; + break; + } + + case INITCAP: { + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + ns = pSMgr->suggest_auto(slst, wspace, ns); + for (int j = 0; j < ns; j++) + mkinitcap((*slst)[j]); + ns = pSMgr->suggest_auto(slst, cw, ns); + break; + } + + case HUHINITCAP: + case HUHCAP: { + ns = pSMgr->suggest_auto(slst, cw, ns); + if (ns == 0) { + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + ns = pSMgr->suggest_auto(slst, wspace, ns); + } + break; + } + + case ALLCAP: { + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + ns = pSMgr->suggest_auto(slst, wspace, ns); + + mkinitcap(wspace); + ns = pSMgr->suggest_auto(slst, wspace, ns); + + for (int j = 0; j < ns; j++) + mkallcap((*slst)[j]); + break; + } + } + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + for (int j = 0; j < ns; j++) { + if (utf8) + reverseword_utf((*slst)[j]); + else + reverseword((*slst)[j]); + } + } + + // expand suggestions with dot(s) + if (abbv && pAMgr && pAMgr->get_sugswithdots()) { + for (int j = 0; j < ns; j++) { + (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv); + strcat((*slst)[j], word + strlen(word) - abbv); + } + } + + // LANG_hu section: replace '-' with ' ' in Hungarian + if (langnum == LANG_hu) { + for (int j = 0; j < ns; j++) { + char* pos = strchr((*slst)[j], '-'); + if (pos) { + int info; + char w[MAXWORDUTF8LEN]; + *pos = '\0'; + strcpy(w, (*slst)[j]); + strcat(w, pos + 1); + spell(w, &info, NULL); + if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { + *pos = ' '; + } else + *pos = '-'; + } + } + } + + // output conversion + rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + for (int j = 0; rl && j < ns; j++) { + if (rl->conv((*slst)[j], wspace) > 0) { + free((*slst)[j]); + (*slst)[j] = mystrdup(wspace); + } + } + + // END OF LANG_hu section + return ns; +} +#endif + +int Hunspell::stem(char*** slst, char** desc, int n) { + char result[MAXLNLEN]; + char result2[MAXLNLEN]; + *slst = NULL; + if (n == 0) + return 0; + *result2 = '\0'; + for (int i = 0; i < n; i++) { + *result = '\0'; + // add compound word parts (except the last one) + char* s = (char*)desc[i]; + char* part = strstr(s, MORPH_PART); + if (part) { + char* nextpart = strstr(part + 1, MORPH_PART); + while (nextpart) { + copy_field(result + strlen(result), part, MORPH_PART); + part = nextpart; + nextpart = strstr(part + 1, MORPH_PART); + } + s = part; + } + + char** pl; + std::string tok(s); + size_t alt = 0; + while ((alt = tok.find(" | ", alt)) != std::string::npos) { + tok[alt + 1] = MSEP_ALT; + } + int pln = line_tok(tok.c_str(), &pl, MSEP_ALT); + for (int k = 0; k < pln; k++) { + // add derivational suffixes + if (strstr(pl[k], MORPH_DERI_SFX)) { + // remove inflectional suffixes + char* is = strstr(pl[k], MORPH_INFL_SFX); + if (is) + *is = '\0'; + char* sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]); + if (sg) { + char** gen; + int genl = line_tok(sg, &gen, MSEP_REC); + free(sg); + for (int j = 0; j < genl; j++) { + sprintf(result2 + strlen(result2), "%c%s%s", MSEP_REC, result, + gen[j]); + } + freelist(&gen, genl); + } + } else { + sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result); + if (strstr(pl[k], MORPH_SURF_PFX)) { + copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX); + } + copy_field(result2 + strlen(result2), pl[k], MORPH_STEM); + } + } + freelist(&pl, pln); + } + int sln = line_tok(result2, slst, MSEP_REC); + return uniqlist(*slst, sln); +} + +int Hunspell::stem(char*** slst, const char* word) { + char** pl; + int pln = analyze(&pl, word); + int pln2 = stem(slst, pl, pln); + freelist(&pl, pln); + return pln2; +} + +#ifdef HUNSPELL_EXPERIMENTAL +int Hunspell::suggest_pos_stems(char*** slst, const char* word) { + char cw[MAXWORDUTF8LEN]; + char wspace[MAXWORDUTF8LEN]; + if (!pSMgr || maxdic == 0) + return 0; + w_char unicw[MAXWORDLEN]; + int nc = strlen(word); + if (utf8) { + if (nc >= MAXWORDUTF8LEN) + return 0; + } else { + if (nc >= MAXWORDLEN) + return 0; + } + int captype = 0; + int abbv = 0; + int wl = 0; + + // input conversion + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + int convstatus = rl ? rl->conv(word, wspace) : 0; + if (convstatus < 0) + return 0; + else if (convstatus > 0) + wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); + else + wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); + + if (wl == 0) + return 0; + + int ns = 0; // ns=0 = normalized input + + *slst = NULL; // HU, nsug in pSMgr->suggest + + switch (captype) { + case HUHCAP: + case NOCAP: { + ns = pSMgr->suggest_pos_stems(slst, cw, ns); + + if ((abbv) && (ns == 0)) { + memcpy(wspace, cw, wl); + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + ns = pSMgr->suggest_pos_stems(slst, wspace, ns); + } + + break; + } + + case INITCAP: { + ns = pSMgr->suggest_pos_stems(slst, cw, ns); + + if (ns == 0 || ((*slst)[0][0] == '#')) { + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + ns = pSMgr->suggest_pos_stems(slst, wspace, ns); + } + + break; + } + + case ALLCAP: { + ns = pSMgr->suggest_pos_stems(slst, cw, ns); + if (ns != 0) + break; + + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + ns = pSMgr->suggest_pos_stems(slst, wspace, ns); + + if (ns == 0) { + mkinitcap(wspace); + ns = pSMgr->suggest_pos_stems(slst, wspace, ns); + } + break; + } + } + + // output conversion + rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + for (int j = 0; rl && j < ns; j++) { + if (rl->conv((*slst)[j], wspace) > 0) { + free((*slst)[j]); + (*slst)[j] = mystrdup(wspace); + } + } + + return ns; +} +#endif // END OF HUNSPELL_EXPERIMENTAL CODE + +const char* Hunspell::get_wordchars() { + return pAMgr->get_wordchars(); +} + +unsigned short* Hunspell::get_wordchars_utf16(int* len) { + return pAMgr->get_wordchars_utf16(len); +} + +void Hunspell::mkinitcap(char* p) { + if (!utf8) { + if (*p != '\0') + *p = csconv[((unsigned char)*p)].cupper; + } else { + int len; + w_char u[MAXWORDLEN]; + len = u8_u16(u, MAXWORDLEN, p); + unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum); + u[0].h = (unsigned char)(i >> 8); + u[0].l = (unsigned char)(i & 0x00FF); + u16_u8(p, MAXWORDUTF8LEN, u, len); + } +} + +int Hunspell::mkinitcap2(char* p, w_char* u, int nc) { + if (!utf8) { + if (*p != '\0') + *p = csconv[((unsigned char)*p)].cupper; + } else if (nc > 0) { + unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum); + u[0].h = (unsigned char)(i >> 8); + u[0].l = (unsigned char)(i & 0x00FF); + u16_u8(p, MAXWORDUTF8LEN, u, nc); + return strlen(p); + } + return nc; +} + +int Hunspell::mkinitsmall2(char* p, w_char* u, int nc) { + if (!utf8) { + if (*p != '\0') + *p = csconv[((unsigned char)*p)].clower; + } else if (nc > 0) { + unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum); + u[0].h = (unsigned char)(i >> 8); + u[0].l = (unsigned char)(i & 0x00FF); + u16_u8(p, MAXWORDUTF8LEN, u, nc); + return strlen(p); + } + return nc; +} + +int Hunspell::add(const char* word) { + if (pHMgr[0]) + return (pHMgr[0])->add(word); + return 0; +} + +int Hunspell::add_with_affix(const char* word, const char* example) { + if (pHMgr[0]) + return (pHMgr[0])->add_with_affix(word, example); + return 0; +} + +int Hunspell::remove(const char* word) { + if (pHMgr[0]) + return (pHMgr[0])->remove(word); + return 0; +} + +const char* Hunspell::get_version() { + return pAMgr->get_version(); +} + +struct cs_info* Hunspell::get_csconv() { + return csconv; +} + +void Hunspell::cat_result(char* result, char* st) { + if (st) { + if (*result) + mystrcat(result, "\n", MAXLNLEN); + mystrcat(result, st, MAXLNLEN); + free(st); + } +} + +int Hunspell::analyze(char*** slst, const char* word) { + char cw[MAXWORDUTF8LEN]; + char wspace[MAXWORDUTF8LEN]; + w_char unicw[MAXWORDLEN]; + int wl2 = 0; + *slst = NULL; + if (!pSMgr || maxdic == 0) + return 0; + int nc = strlen(word); + if (utf8) { + if (nc >= MAXWORDUTF8LEN) + return 0; + } else { + if (nc >= MAXWORDLEN) + return 0; + } + int captype = 0; + int abbv = 0; + int wl = 0; + + // input conversion + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; + if (convstatus < 0) + return 0; + else if (convstatus > 0) + wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); + else + wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); + + if (wl == 0) { + if (abbv) { + for (wl = 0; wl < abbv; wl++) + cw[wl] = '.'; + cw[wl] = '\0'; + abbv = 0; + } else + return 0; + } + + char result[MAXLNLEN]; + char* st = NULL; + + *result = '\0'; + + int n = 0; + int n2 = 0; + int n3 = 0; + + // test numbers + // LANG_hu section: set dash information for suggestions + if (langnum == LANG_hu) { + while ((n < wl) && (((cw[n] <= '9') && (cw[n] >= '0')) || + (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) { + n++; + if ((cw[n] == '.') || (cw[n] == ',')) { + if (((n2 == 0) && (n > 3)) || + ((n2 > 0) && ((cw[n - 1] == '.') || (cw[n - 1] == ',')))) + break; + n2++; + n3 = n; + } + } + + if ((n == wl) && (n3 > 0) && (n - n3 > 3)) + return 0; + if ((n == wl) || ((n > 0) && ((cw[n] == '%') || (cw[n] == '\xB0')) && + checkword(cw + n, NULL, NULL))) { + mystrcat(result, cw, MAXLNLEN); + result[n - 1] = '\0'; + if (n == wl) + cat_result(result, pSMgr->suggest_morph(cw + n - 1)); + else { + char sign = cw[n]; + cw[n] = '\0'; + cat_result(result, pSMgr->suggest_morph(cw + n - 1)); + mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE + cw[n] = sign; + cat_result(result, pSMgr->suggest_morph(cw + n)); + } + return line_tok(result, slst, MSEP_REC); + } + } + // END OF LANG_hu section + + switch (captype) { + case HUHCAP: + case HUHINITCAP: + case NOCAP: { + cat_result(result, pSMgr->suggest_morph(cw)); + if (abbv) { + memcpy(wspace, cw, wl); + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + cat_result(result, pSMgr->suggest_morph(wspace)); + } + break; + } + case INITCAP: { + wl = mkallsmall2(cw, unicw, nc); + memcpy(wspace, cw, (wl + 1)); + wl2 = mkinitcap2(cw, unicw, nc); + cat_result(result, pSMgr->suggest_morph(wspace)); + cat_result(result, pSMgr->suggest_morph(cw)); + if (abbv) { + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + cat_result(result, pSMgr->suggest_morph(wspace)); + + memcpy(wspace, cw, wl2); + *(wspace + wl2) = '.'; + *(wspace + wl2 + 1) = '\0'; + + cat_result(result, pSMgr->suggest_morph(wspace)); + } + break; + } + case ALLCAP: { + cat_result(result, pSMgr->suggest_morph(cw)); + if (abbv) { + memcpy(wspace, cw, wl); + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + cat_result(result, pSMgr->suggest_morph(cw)); + } + wl = mkallsmall2(cw, unicw, nc); + memcpy(wspace, cw, (wl + 1)); + wl2 = mkinitcap2(cw, unicw, nc); + + cat_result(result, pSMgr->suggest_morph(wspace)); + cat_result(result, pSMgr->suggest_morph(cw)); + if (abbv) { + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + cat_result(result, pSMgr->suggest_morph(wspace)); + + memcpy(wspace, cw, wl2); + *(wspace + wl2) = '.'; + *(wspace + wl2 + 1) = '\0'; + + cat_result(result, pSMgr->suggest_morph(wspace)); + } + break; + } + } + + if (*result) { + // word reversing wrapper for complex prefixes + if (complexprefixes) { + if (utf8) + reverseword_utf(result); + else + reverseword(result); + } + return line_tok(result, slst, MSEP_REC); + } + + // compound word with dash (HU) I18n + char* dash = NULL; + int nresult = 0; + // LANG_hu section: set dash information for suggestions + if (langnum == LANG_hu) + dash = (char*)strchr(cw, '-'); + if ((langnum == LANG_hu) && dash) { + *dash = '\0'; + // examine 2 sides of the dash + if (dash[1] == '\0') { // base word ending with dash + if (spell(cw)) { + char* p = pSMgr->suggest_morph(cw); + if (p) { + int ret = line_tok(p, slst, MSEP_REC); + free(p); + return ret; + } + } + } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat. + if (spell(cw) && (spell("-e"))) { + st = pSMgr->suggest_morph(cw); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + mystrcat(result, "+", MAXLNLEN); // XXX spec. separator in MORPHCODE + st = pSMgr->suggest_morph("-e"); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + return line_tok(result, slst, MSEP_REC); + } + } else { + // first word ending with dash: word- XXX ??? + char r2 = *(dash + 1); + dash[0] = '-'; + dash[1] = '\0'; + nresult = spell(cw); + dash[1] = r2; + dash[0] = '\0'; + if (nresult && spell(dash + 1) && + ((strlen(dash + 1) > 1) || ((dash[1] > '0') && (dash[1] < '9')))) { + st = pSMgr->suggest_morph(cw); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + mystrcat(result, "+", MAXLNLEN); // XXX spec. separator in MORPHCODE + } + st = pSMgr->suggest_morph(dash + 1); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + return line_tok(result, slst, MSEP_REC); + } + } + // affixed number in correct word + if (nresult && (dash > cw) && + (((*(dash - 1) <= '9') && (*(dash - 1) >= '0')) || + (*(dash - 1) == '.'))) { + *dash = '-'; + n = 1; + if (*(dash - n) == '.') + n++; + // search first not a number character to left from dash + while (((dash - n) >= cw) && ((*(dash - n) == '0') || (n < 3)) && + (n < 6)) { + n++; + } + if ((dash - n) < cw) + n--; + // numbers: valami1000000-hoz + // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz, + // 56-hoz, 6-hoz + for (; n >= 1; n--) { + if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && + checkword(dash - n, NULL, NULL)) { + mystrcat(result, cw, MAXLNLEN); + result[dash - cw - n] = '\0'; + st = pSMgr->suggest_morph(dash - n); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + return line_tok(result, slst, MSEP_REC); + } + } + } + } + return 0; +} + +int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) { + *slst = NULL; + if (!pSMgr || !pln) + return 0; + char** pl2; + int pl2n = analyze(&pl2, word); + int captype = 0; + int abbv = 0; + char cw[MAXWORDUTF8LEN]; + cleanword(cw, word, &captype, &abbv); + char result[MAXLNLEN]; + *result = '\0'; + + for (int i = 0; i < pln; i++) { + cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i])); + } + freelist(&pl2, pl2n); + + if (*result) { + // allcap + if (captype == ALLCAP) + mkallcap(result); + + // line split + int linenum = line_tok(result, slst, MSEP_REC); + + // capitalize + if (captype == INITCAP || captype == HUHINITCAP) { + for (int j = 0; j < linenum; j++) + mkinitcap((*slst)[j]); + } + + // temporary filtering of prefix related errors (eg. + // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks") + + int r = 0; + for (int j = 0; j < linenum; j++) { + if (!spell((*slst)[j])) { + free((*slst)[j]); + (*slst)[j] = NULL; + } else { + if (r < j) + (*slst)[r] = (*slst)[j]; + r++; + } + } + if (r > 0) + return r; + free(*slst); + *slst = NULL; + } + return 0; +} + +int Hunspell::generate(char*** slst, const char* word, const char* pattern) { + char** pl; + int pln = analyze(&pl, pattern); + int n = generate(slst, word, pl, pln); + freelist(&pl, pln); + return uniqlist(*slst, n); +} + +// minimal XML parser functions +int Hunspell::get_xml_par(char* dest, const char* par, int max) { + char* d = dest; + if (!par) + return 0; + char end = *par; + char* dmax = dest + max; + if (end == '>') + end = '<'; + else if (end != '\'' && end != '"') + return 0; // bad XML + for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) + *d = *par; + *d = '\0'; + mystrrep(dest, "<", "<"); + mystrrep(dest, "&", "&"); + return (int)(d - dest); +} + +int Hunspell::get_langnum() const { + return langnum; +} + +int Hunspell::input_conv(const char* word, char* dest, size_t destsize) { + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + return (rl && rl->conv(word, dest, destsize) > 0); +} + +// return the beginning of the element (attr == NULL) or the attribute +const char* Hunspell::get_xml_pos(const char* s, const char* attr) { + const char* end = strchr(s, '>'); + const char* p = s; + if (attr == NULL) + return end; + do { + p = strstr(p, attr); + if (!p || p >= end) + return 0; + } while (*(p - 1) != ' ' && *(p - 1) != '\n'); + return p + strlen(attr); +} + +int Hunspell::check_xml_par(const char* q, + const char* attr, + const char* value) { + char cw[MAXWORDUTF8LEN]; + if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) && + strcmp(cw, value) == 0) + return 1; + return 0; +} + +int Hunspell::get_xml_list(char*** slst, char* list, const char* tag) { + int n = 0; + char* p; + if (!list) + return 0; + for (p = list; ((p = strstr(p, tag)) != NULL); p++) + n++; + if (n == 0) + return 0; + *slst = (char**)malloc(sizeof(char*) * n); + if (!*slst) + return 0; + for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) { + int l = strlen(p); + (*slst)[n] = (char*)malloc(l + 1); + if (!(*slst)[n]) + return n; + if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) { + free((*slst)[n]); + break; + } + } + return n; +} + +int Hunspell::spellml(char*** slst, const char* word) { + char *q, *q2; + char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN]; + q = (char*)strstr(word, "'); + if (!q2) + return 0; // bad XML input + q2 = strstr(q2, "'), MAXWORDUTF8LEN - 10)) + n = analyze(slst, cw); + if (n == 0) + return 0; + // convert the result to ana1ana2 format + std::string r; + r.append(""); + for (int i = 0; i < n; i++) { + r.append(""); + + std::string entry((*slst)[i]); + free((*slst)[i]); + mystrrep(entry, "\t", " "); + mystrrep(entry, "&", "&"); + mystrrep(entry, "<", "<"); + r.append(entry); + + r.append(""); + } + r.append(""); + (*slst)[0] = mystrdup(r.c_str()); + return 1; + } else if (check_xml_par(q, "type=", "stem")) { + if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) + return stem(slst, cw); + } else if (check_xml_par(q, "type=", "generate")) { + int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1); + if (n == 0) + return 0; + char* q3 = strstr(q2 + 1, "'), MAXWORDUTF8LEN - 1)) { + return generate(slst, cw, cw2); + } + } else { + if ((q2 = strstr(q2 + 1, "'), "")) != 0) { + int n2 = generate(slst, cw, slst2, n); + freelist(&slst2, n); + return uniqlist(*slst, n2); + } + freelist(&slst2, n); + } + } + } + return 0; +} + +#ifdef HUNSPELL_EXPERIMENTAL +// XXX is UTF-8 support OK? +char* Hunspell::morph_with_correction(const char* word) { + char cw[MAXWORDUTF8LEN]; + char wspace[MAXWORDUTF8LEN]; + if (!pSMgr || maxdic == 0) + return NULL; + w_char unicw[MAXWORDLEN]; + int nc = strlen(word); + if (utf8) { + if (nc >= MAXWORDUTF8LEN) + return NULL; + } else { + if (nc >= MAXWORDLEN) + return NULL; + } + int captype = 0; + int abbv = 0; + int wl = 0; + + // input conversion + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + int convstatus = rl ? rl->conv(word, wspace) : 0; + if (convstatus < 0) + return 0; + else if (convstatus > 0) + wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); + else + wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); + + if (wl == 0) + return NULL; + + char result[MAXLNLEN]; + char* st = NULL; + + *result = '\0'; + + switch (captype) { + case NOCAP: { + st = pSMgr->suggest_morph_for_spelling_error(cw); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + if (abbv) { + memcpy(wspace, cw, wl); + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + st = pSMgr->suggest_morph_for_spelling_error(wspace); + if (st) { + if (*result) + mystrcat(result, "\n", MAXLNLEN); + mystrcat(result, st, MAXLNLEN); + free(st); + } + } + break; + } + case INITCAP: { + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + st = pSMgr->suggest_morph_for_spelling_error(wspace); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + st = pSMgr->suggest_morph_for_spelling_error(cw); + if (st) { + if (*result) + mystrcat(result, "\n", MAXLNLEN); + mystrcat(result, st, MAXLNLEN); + free(st); + } + if (abbv) { + memcpy(wspace, cw, wl); + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + mkallsmall2(wspace, unicw, nc); + st = pSMgr->suggest_morph_for_spelling_error(wspace); + if (st) { + if (*result) + mystrcat(result, "\n", MAXLNLEN); + mystrcat(result, st, MAXLNLEN); + free(st); + } + mkinitcap(wspace); + st = pSMgr->suggest_morph_for_spelling_error(wspace); + if (st) { + if (*result) + mystrcat(result, "\n", MAXLNLEN); + mystrcat(result, st, MAXLNLEN); + free(st); + } + } + break; + } + case HUHCAP: { + st = pSMgr->suggest_morph_for_spelling_error(cw); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + memcpy(wspace, cw, (wl + 1)); + mkallsmall2(wspace, unicw, nc); + st = pSMgr->suggest_morph_for_spelling_error(wspace); + if (st) { + if (*result) + mystrcat(result, "\n", MAXLNLEN); + mystrcat(result, st, MAXLNLEN); + free(st); + } + break; + } + case ALLCAP: { + memcpy(wspace, cw, (wl + 1)); + st = pSMgr->suggest_morph_for_spelling_error(wspace); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + mkallsmall2(wspace, unicw, nc); + st = pSMgr->suggest_morph_for_spelling_error(wspace); + if (st) { + if (*result) + mystrcat(result, "\n", MAXLNLEN); + mystrcat(result, st, MAXLNLEN); + free(st); + } + mkinitcap(wspace); + st = pSMgr->suggest_morph_for_spelling_error(wspace); + if (st) { + if (*result) + mystrcat(result, "\n", MAXLNLEN); + mystrcat(result, st, MAXLNLEN); + free(st); + } + if (abbv) { + memcpy(wspace, cw, (wl + 1)); + *(wspace + wl) = '.'; + *(wspace + wl + 1) = '\0'; + if (*result) + mystrcat(result, "\n", MAXLNLEN); + st = pSMgr->suggest_morph_for_spelling_error(wspace); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + mkallsmall2(wspace, unicw, nc); + st = pSMgr->suggest_morph_for_spelling_error(wspace); + if (st) { + if (*result) + mystrcat(result, "\n", MAXLNLEN); + mystrcat(result, st, MAXLNLEN); + free(st); + } + mkinitcap(wspace); + st = pSMgr->suggest_morph_for_spelling_error(wspace); + if (st) { + if (*result) + mystrcat(result, "\n", MAXLNLEN); + mystrcat(result, st, MAXLNLEN); + free(st); + } + } + break; + } + } + + if (*result) + return mystrdup(result); + return NULL; +} + +#endif // END OF HUNSPELL_EXPERIMENTAL CODE + +Hunhandle* Hunspell_create(const char* affpath, const char* dpath) { + return (Hunhandle*)(new Hunspell(affpath, dpath)); +} + +Hunhandle* Hunspell_create_key(const char* affpath, + const char* dpath, + const char* key) { + return (Hunhandle*)(new Hunspell(affpath, dpath, key)); +} + +void Hunspell_destroy(Hunhandle* pHunspell) { + delete (Hunspell*)(pHunspell); +} + +int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) { + return ((Hunspell*)pHunspell)->add_dic(dpath); +} + +int Hunspell_spell(Hunhandle* pHunspell, const char* word) { + return ((Hunspell*)pHunspell)->spell(word); +} + +char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) { + return ((Hunspell*)pHunspell)->get_dic_encoding(); +} + +int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) { + return ((Hunspell*)pHunspell)->suggest(slst, word); +} + +int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) { + return ((Hunspell*)pHunspell)->analyze(slst, word); +} + +int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) { + return ((Hunspell*)pHunspell)->stem(slst, word); +} + +int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) { + return ((Hunspell*)pHunspell)->stem(slst, desc, n); +} + +int Hunspell_generate(Hunhandle* pHunspell, + char*** slst, + const char* word, + const char* word2) { + return ((Hunspell*)pHunspell)->generate(slst, word, word2); +} + +int Hunspell_generate2(Hunhandle* pHunspell, + char*** slst, + const char* word, + char** desc, + int n) { + return ((Hunspell*)pHunspell)->generate(slst, word, desc, n); +} + +/* functions for run-time modification of the dictionary */ + +/* add word to the run-time dictionary */ + +int Hunspell_add(Hunhandle* pHunspell, const char* word) { + return ((Hunspell*)pHunspell)->add(word); +} + +/* add word to the run-time dictionary with affix flags of + * the example (a dictionary word): Hunspell will recognize + * affixed forms of the new word, too. + */ + +int Hunspell_add_with_affix(Hunhandle* pHunspell, + const char* word, + const char* example) { + return ((Hunspell*)pHunspell)->add_with_affix(word, example); +} + +/* remove word from the run-time dictionary */ + +int Hunspell_remove(Hunhandle* pHunspell, const char* word) { + return ((Hunspell*)pHunspell)->remove(word); +} + +void Hunspell_free_list(Hunhandle*, char*** slst, int n) { + freelist(slst, n); +} + +int Hunspell::suffix_suggest(char*** slst, const char* root_word) { + struct hentry* he = NULL; + int len; + std::string w2; + const char* word; + char* ignoredchars = pAMgr->get_ignore(); + if (ignoredchars != NULL) { + w2.assign(root_word); + if (utf8) { + int ignoredchars_utf16_len; + unsigned short* ignoredchars_utf16 = + pAMgr->get_ignore_utf16(&ignoredchars_utf16_len); + remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len); + } else { + remove_ignored_chars(w2, ignoredchars); + } + word = w2.c_str(); + } else + word = root_word; + + len = strlen(word); + + if (!len) + return 0; + + char** wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*)); + if (wlst == NULL) + return -1; + *slst = wlst; + for (int i = 0; i < MAXSUGGESTION; i++) { + wlst[i] = NULL; + } + + for (int i = 0; (i < maxdic) && !he; i++) { + he = (pHMgr[i])->lookup(word); + } + if (he) { + return pAMgr->get_suffix_words(he->astr, he->alen, root_word, *slst); + } + return 0; +} diff --git a/libs/hunspell/src/hunspell.cxx b/libs/hunspell/src/hunspell.cxx deleted file mode 100644 index 726c72931a..0000000000 --- a/libs/hunspell/src/hunspell.cxx +++ /dev/null @@ -1,2367 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -/* - * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada - * And Contributors. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. All modifications to the source code must be clearly marked as - * such. Binary redistributions based on modified source code - * must be clearly marked as modified versions in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include - -#include "hunspell.hxx" -#include "hunspell.h" -#ifndef MOZILLA_CLIENT -#include "config.h" -#endif -#include "csutil.hxx" - -#include -#include - -Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) { - encoding = NULL; - csconv = NULL; - utf8 = 0; - complexprefixes = 0; - affixpath = mystrdup(affpath); - maxdic = 0; - - /* first set up the hash manager */ - pHMgr[0] = new HashMgr(dpath, affpath, key); - if (pHMgr[0]) - maxdic = 1; - - /* next set up the affix manager */ - /* it needs access to the hash manager lookup methods */ - pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key); - - /* get the preferred try string and the dictionary */ - /* encoding from the Affix Manager for that dictionary */ - char* try_string = pAMgr->get_try_string(); - encoding = pAMgr->get_encoding(); - langnum = pAMgr->get_langnum(); - utf8 = pAMgr->get_utf8(); - if (!utf8) - csconv = get_current_cs(encoding); - complexprefixes = pAMgr->get_complexprefixes(); - wordbreak = pAMgr->get_breaktable(); - - /* and finally set up the suggestion manager */ - pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr); - if (try_string) - free(try_string); -} - -Hunspell::~Hunspell() { - delete pSMgr; - delete pAMgr; - for (int i = 0; i < maxdic; i++) - delete pHMgr[i]; - maxdic = 0; - pSMgr = NULL; - pAMgr = NULL; -#ifdef MOZILLA_CLIENT - delete[] csconv; -#endif - csconv = NULL; - if (encoding) - free(encoding); - encoding = NULL; - if (affixpath) - free(affixpath); - affixpath = NULL; -} - -// load extra dictionaries -int Hunspell::add_dic(const char* dpath, const char* key) { - if (maxdic == MAXDIC || !affixpath) - return 1; - pHMgr[maxdic] = new HashMgr(dpath, affixpath, key); - if (pHMgr[maxdic]) - maxdic++; - else - return 1; - return 0; -} - -// make a copy of src at destination while removing all leading -// blanks and removing any trailing periods after recording -// their presence with the abbreviation flag -// also since already going through character by character, -// set the capitalization type -// return the length of the "cleaned" (and UTF-8 encoded) word - -int Hunspell::cleanword2(char* dest, - const char* src, - w_char* dest_utf, - int* nc, - int* pcaptype, - int* pabbrev) { - unsigned char* p = (unsigned char*)dest; - const unsigned char* q = (const unsigned char*)src; - - // first skip over any leading blanks - while ((*q != '\0') && (*q == ' ')) - q++; - - // now strip off any trailing periods (recording their presence) - *pabbrev = 0; - int nl = strlen((const char*)q); - while ((nl > 0) && (*(q + nl - 1) == '.')) { - nl--; - (*pabbrev)++; - } - - // if no characters are left it can't be capitalized - if (nl <= 0) { - *pcaptype = NOCAP; - *p = '\0'; - return 0; - } - - strncpy(dest, (char*)q, nl); - *(dest + nl) = '\0'; - nl = strlen(dest); - if (utf8) { - *nc = u8_u16(dest_utf, MAXWORDLEN, dest); - // don't check too long words - if (*nc >= MAXWORDLEN) - return 0; - if (*nc == -1) { // big Unicode character (non BMP area) - *pcaptype = NOCAP; - return nl; - } - *pcaptype = get_captype_utf8(dest_utf, *nc, langnum); - } else { - *pcaptype = get_captype(dest, nl, csconv); - *nc = nl; - } - return nl; -} - -int Hunspell::cleanword(char* dest, - const char* src, - int* pcaptype, - int* pabbrev) { - unsigned char* p = (unsigned char*)dest; - const unsigned char* q = (const unsigned char*)src; - int firstcap = 0; - - // first skip over any leading blanks - while ((*q != '\0') && (*q == ' ')) - q++; - - // now strip off any trailing periods (recording their presence) - *pabbrev = 0; - int nl = strlen((const char*)q); - while ((nl > 0) && (*(q + nl - 1) == '.')) { - nl--; - (*pabbrev)++; - } - - // if no characters are left it can't be capitalized - if (nl <= 0) { - *pcaptype = NOCAP; - *p = '\0'; - return 0; - } - - // now determine the capitalization type of the first nl letters - int ncap = 0; - int nneutral = 0; - int nc = 0; - - if (!utf8) { - while (nl > 0) { - nc++; - if (csconv[(*q)].ccase) - ncap++; - if (csconv[(*q)].cupper == csconv[(*q)].clower) - nneutral++; - *p++ = *q++; - nl--; - } - // remember to terminate the destination string - *p = '\0'; - firstcap = csconv[(unsigned char)(*dest)].ccase; - } else { - unsigned short idx; - w_char t[MAXWORDLEN]; - nc = u8_u16(t, MAXWORDLEN, src); - for (int i = 0; i < nc; i++) { - idx = (t[i].h << 8) + t[i].l; - unsigned short low = unicodetolower(idx, langnum); - if (idx != low) - ncap++; - if (unicodetoupper(idx, langnum) == low) - nneutral++; - } - u16_u8(dest, MAXWORDUTF8LEN, t, nc); - if (ncap) { - idx = (t[0].h << 8) + t[0].l; - firstcap = (idx != unicodetolower(idx, langnum)); - } - } - - // now finally set the captype - if (ncap == 0) { - *pcaptype = NOCAP; - } else if ((ncap == 1) && firstcap) { - *pcaptype = INITCAP; - } else if ((ncap == nc) || ((ncap + nneutral) == nc)) { - *pcaptype = ALLCAP; - } else if ((ncap > 1) && firstcap) { - *pcaptype = HUHINITCAP; - } else { - *pcaptype = HUHCAP; - } - return strlen(dest); -} - -void Hunspell::mkallcap(char* p) { - if (utf8) { - w_char u[MAXWORDLEN]; - int nc = u8_u16(u, MAXWORDLEN, p); - unsigned short idx; - for (int i = 0; i < nc; i++) { - idx = (u[i].h << 8) + u[i].l; - if (idx != unicodetoupper(idx, langnum)) { - u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); - u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); - } - } - u16_u8(p, MAXWORDUTF8LEN, u, nc); - } else { - while (*p != '\0') { - *p = csconv[((unsigned char)*p)].cupper; - p++; - } - } -} - -int Hunspell::mkallcap2(char* p, w_char* u, int nc) { - if (utf8) { - unsigned short idx; - for (int i = 0; i < nc; i++) { - idx = (u[i].h << 8) + u[i].l; - unsigned short up = unicodetoupper(idx, langnum); - if (idx != up) { - u[i].h = (unsigned char)(up >> 8); - u[i].l = (unsigned char)(up & 0x00FF); - } - } - u16_u8(p, MAXWORDUTF8LEN, u, nc); - return strlen(p); - } else { - while (*p != '\0') { - *p = csconv[((unsigned char)*p)].cupper; - p++; - } - } - return nc; -} - -void Hunspell::mkallsmall(char* p) { - while (*p != '\0') { - *p = csconv[((unsigned char)*p)].clower; - p++; - } -} - -int Hunspell::mkallsmall2(char* p, w_char* u, int nc) { - if (utf8) { - unsigned short idx; - for (int i = 0; i < nc; i++) { - idx = (u[i].h << 8) + u[i].l; - unsigned short low = unicodetolower(idx, langnum); - if (idx != low) { - u[i].h = (unsigned char)(low >> 8); - u[i].l = (unsigned char)(low & 0x00FF); - } - } - u16_u8(p, MAXWORDUTF8LEN, u, nc); - return strlen(p); - } else { - while (*p != '\0') { - *p = csconv[((unsigned char)*p)].clower; - p++; - } - } - return nc; -} - -// convert UTF-8 sharp S codes to latin 1 -char* Hunspell::sharps_u8_l1(char* dest, char* source) { - char* p = dest; - *p = *source; - for (p++, source++; *(source - 1); p++, source++) { - *p = *source; - if (*source == '\x9F') - *--p = '\xDF'; - } - return dest; -} - -// recursive search for right ss - sharp s permutations -hentry* Hunspell::spellsharps(char* base, - char* pos, - int n, - int repnum, - char* tmp, - int* info, - char** root) { - pos = strstr(pos, "ss"); - if (pos && (n < MAXSHARPS)) { - *pos = '\xC3'; - *(pos + 1) = '\x9F'; - hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root); - if (h) - return h; - *pos = 's'; - *(pos + 1) = 's'; - h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root); - if (h) - return h; - } else if (repnum > 0) { - if (utf8) - return checkword(base, info, root); - return checkword(sharps_u8_l1(tmp, base), info, root); - } - return NULL; -} - -int Hunspell::is_keepcase(const hentry* rv) { - return pAMgr && rv->astr && pAMgr->get_keepcase() && - TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen); -} - -/* insert a word to the beginning of the suggestion array and return ns */ -int Hunspell::insert_sug(char*** slst, char* word, int ns) { - if (!*slst) - return ns; - char* dup = mystrdup(word); - if (!dup) - return ns; - if (ns == MAXSUGGESTION) { - ns--; - free((*slst)[ns]); - } - for (int k = ns; k > 0; k--) - (*slst)[k] = (*slst)[k - 1]; - (*slst)[0] = dup; - return ns + 1; -} - -int Hunspell::spell(const char* word, int* info, char** root) { - struct hentry* rv = NULL; - // need larger vector. For example, Turkish capital letter I converted a - // 2-byte UTF-8 character (dotless i) by mkallsmall. - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; - w_char unicw[MAXWORDLEN]; - - int info2 = 0; - if (!info) - info = &info2; - else - *info = 0; - - // Hunspell supports XML input of the simplified API (see manual) - if (strcmp(word, SPELL_XML) == 0) - return 1; - int nc = strlen(word); - int wl2 = 0; - if (utf8) { - if (nc >= MAXWORDUTF8LEN) - return 0; - } else { - if (nc >= MAXWORDLEN) - return 0; - } - int captype = 0; - int abbv = 0; - int wl = 0; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - -#ifdef MOZILLA_CLIENT - // accept the abbreviated words without dots - // workaround for the incomplete tokenization of Mozilla - abbv = 1; -#endif - - if (wl == 0 || maxdic == 0) - return 1; - if (root) - *root = NULL; - - // allow numbers with dots, dashes and commas (but forbid double separators: - // "..", "--" etc.) - enum { NBEGIN, NNUM, NSEP }; - int nstate = NBEGIN; - int i; - - for (i = 0; (i < wl); i++) { - if ((cw[i] <= '9') && (cw[i] >= '0')) { - nstate = NNUM; - } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) { - if ((nstate == NSEP) || (i == 0)) - break; - nstate = NSEP; - } else - break; - } - if ((i == wl) && (nstate == NNUM)) - return 1; - - switch (captype) { - case HUHCAP: - /* FALLTHROUGH */ - case HUHINITCAP: - *info += SPELL_ORIGCAP; - /* FALLTHROUGH */ - case NOCAP: - rv = checkword(cw, info, root); - if ((abbv) && !(rv)) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - rv = checkword(wspace, info, root); - } - break; - case ALLCAP: { - *info += SPELL_ORIGCAP; - rv = checkword(cw, info, root); - if (rv) - break; - if (abbv) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - rv = checkword(wspace, info, root); - if (rv) - break; - } - // Spec. prefix handling for Catalan, French, Italian: - // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia). - if (pAMgr && strchr(cw, '\'')) { - mkallsmall2(cw, unicw, nc); - // There are no really sane circumstances where this could fail, - // but anyway... - if (char* apostrophe = strchr(cw, '\'')) { - if (utf8) { - w_char tmpword[MAXWORDLEN]; - *apostrophe = '\0'; - wl2 = u8_u16(tmpword, MAXWORDLEN, cw); - *apostrophe = '\''; - if (wl2 >= 0 && wl2 < nc) { - mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1); - rv = checkword(cw, info, root); - if (rv) - break; - } - } else { - mkinitcap2(apostrophe + 1, unicw, nc); - rv = checkword(cw, info, root); - if (rv) - break; - } - } - mkinitcap2(cw, unicw, nc); - rv = checkword(cw, info, root); - if (rv) - break; - } - if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) { - char tmpword[MAXWORDUTF8LEN]; - wl = mkallsmall2(cw, unicw, nc); - memcpy(wspace, cw, (wl + 1)); - rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); - if (!rv) { - wl2 = mkinitcap2(cw, unicw, nc); - rv = spellsharps(cw, cw, 0, 0, tmpword, info, root); - } - if ((abbv) && !(rv)) { - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); - if (!rv) { - memcpy(wspace, cw, wl2); - *(wspace + wl2) = '.'; - *(wspace + wl2 + 1) = '\0'; - rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); - } - } - if (rv) - break; - } - } - case INITCAP: { - *info += SPELL_ORIGCAP; - wl = mkallsmall2(cw, unicw, nc); - memcpy(wspace, cw, (wl + 1)); - wl2 = mkinitcap2(cw, unicw, nc); - if (captype == INITCAP) - *info += SPELL_INITCAP; - rv = checkword(cw, info, root); - if (captype == INITCAP) - *info -= SPELL_INITCAP; - // forbid bad capitalization - // (for example, ijs -> Ijs instead of IJs in Dutch) - // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag) - if (*info & SPELL_FORBIDDEN) { - rv = NULL; - break; - } - if (rv && is_keepcase(rv) && (captype == ALLCAP)) - rv = NULL; - if (rv) - break; - - rv = checkword(wspace, info, root); - if (abbv && !rv) { - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - rv = checkword(wspace, info, root); - if (!rv) { - memcpy(wspace, cw, wl2); - *(wspace + wl2) = '.'; - *(wspace + wl2 + 1) = '\0'; - if (captype == INITCAP) - *info += SPELL_INITCAP; - rv = checkword(wspace, info, root); - if (captype == INITCAP) - *info -= SPELL_INITCAP; - if (rv && is_keepcase(rv) && (captype == ALLCAP)) - rv = NULL; - break; - } - } - if (rv && is_keepcase(rv) && - ((captype == ALLCAP) || - // if CHECKSHARPS: KEEPCASE words with \xDF are allowed - // in INITCAP form, too. - !(pAMgr->get_checksharps() && - ((utf8 && strstr(wspace, "\xC3\x9F")) || - (!utf8 && strchr(wspace, '\xDF')))))) - rv = NULL; - break; - } - } - - if (rv) { - if (pAMgr && pAMgr->get_warn() && rv->astr && - TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) { - *info += SPELL_WARN; - if (pAMgr->get_forbidwarn()) - return 0; - return HUNSPELL_OK_WARN; - } - return HUNSPELL_OK; - } - - // recursive breaking at break points - if (wordbreak) { - char* s; - char r; - int nbr = 0; - wl = strlen(cw); - int numbreak = pAMgr ? pAMgr->get_numbreak() : 0; - - // calculate break points for recursion limit - for (int j = 0; j < numbreak; j++) { - s = cw; - do { - s = (char*)strstr(s, wordbreak[j]); - if (s) { - nbr++; - s++; - } - } while (s); - } - if (nbr >= 10) - return 0; - - // check boundary patterns (^begin and end$) - for (int j = 0; j < numbreak; j++) { - int plen = strlen(wordbreak[j]); - if (plen == 1 || plen > wl) - continue; - if (wordbreak[j][0] == '^' && - strncmp(cw, wordbreak[j] + 1, plen - 1) == 0 && spell(cw + plen - 1)) - return 1; - if (wordbreak[j][plen - 1] == '$' && - strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) { - r = cw[wl - plen + 1]; - cw[wl - plen + 1] = '\0'; - if (spell(cw)) - return 1; - cw[wl - plen + 1] = r; - } - } - - // other patterns - for (int j = 0; j < numbreak; j++) { - int plen = strlen(wordbreak[j]); - s = (char*)strstr(cw, wordbreak[j]); - if (s && (s > cw) && (s < cw + wl - plen)) { - if (!spell(s + plen)) - continue; - r = *s; - *s = '\0'; - // examine 2 sides of the break point - if (spell(cw)) - return 1; - *s = r; - - // LANG_hu: spec. dash rule - if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) { - r = s[1]; - s[1] = '\0'; - if (spell(cw)) - return 1; // check the first part with dash - s[1] = r; - } - // end of LANG speficic region - } - } - } - - return 0; -} - -struct hentry* Hunspell::checkword(const char* w, int* info, char** root) { - struct hentry* he = NULL; - bool usebuffer = false; - int len, i; - std::string w2; - const char* word; - - char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL; - if (ignoredchars != NULL) { - w2.assign(w); - if (utf8) { - int ignoredchars_utf16_len; - unsigned short* ignoredchars_utf16 = - pAMgr->get_ignore_utf16(&ignoredchars_utf16_len); - remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len); - } else { - remove_ignored_chars(w2, ignoredchars); - } - word = w2.c_str(); - usebuffer = true; - } else - word = w; - - len = strlen(word); - - if (!len) - return NULL; - - // word reversing wrapper for complex prefixes - if (complexprefixes) { - if (!usebuffer) { - w2.assign(word); - usebuffer = true; - } - if (utf8) - reverseword_utf(w2); - else - reverseword(w2); - } - - if (usebuffer) { - word = w2.c_str(); - } - - // look word in hash table - for (i = 0; (i < maxdic) && !he; i++) { - he = (pHMgr[i])->lookup(word); - - // check forbidden and onlyincompound words - if ((he) && (he->astr) && (pAMgr) && - TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { - if (info) - *info += SPELL_FORBIDDEN; - // LANG_hu section: set dash information for suggestions - if (langnum == LANG_hu) { - if (pAMgr->get_compoundflag() && - TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) { - if (info) - *info += SPELL_COMPOUND; - } - } - return NULL; - } - - // he = next not needaffix, onlyincompound homonym or onlyupcase word - while (he && (he->astr) && pAMgr && - ((pAMgr->get_needaffix() && - TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) || - (pAMgr->get_onlyincompound() && - TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || - (info && (*info & SPELL_INITCAP) && - TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) - he = he->next_homonym; - } - - // check with affixes - if (!he && pAMgr) { - // try stripping off affixes */ - he = pAMgr->affix_check(word, len, 0); - - // check compound restriction and onlyupcase - if (he && he->astr && - ((pAMgr->get_onlyincompound() && - TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || - (info && (*info & SPELL_INITCAP) && - TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) { - he = NULL; - } - - if (he) { - if ((he->astr) && (pAMgr) && - TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { - if (info) - *info += SPELL_FORBIDDEN; - return NULL; - } - if (root) { - *root = mystrdup(he->word); - if (*root && complexprefixes) { - if (utf8) - reverseword_utf(*root); - else - reverseword(*root); - } - } - // try check compound word - } else if (pAMgr->get_compound()) { - he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info); - // LANG_hu section: `moving rule' with last dash - if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) { - char* dup = mystrdup(word); - if (!dup) - return NULL; - dup[len - 1] = '\0'; - he = pAMgr->compound_check(dup, len - 1, -5, 0, 100, 0, NULL, 1, 0, - info); - free(dup); - } - // end of LANG speficic region - if (he) { - if (root) { - *root = mystrdup(he->word); - if (*root && complexprefixes) { - if (utf8) - reverseword_utf(*root); - else - reverseword(*root); - } - } - if (info) - *info += SPELL_COMPOUND; - } - } - } - - return he; -} - -int Hunspell::suggest(char*** slst, const char* word) { - int onlycmpdsug = 0; - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; - if (!pSMgr || maxdic == 0) - return 0; - w_char unicw[MAXWORDLEN]; - *slst = NULL; - // process XML input of the simplified API (see manual) - if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) { - return spellml(slst, word); - } - int nc = strlen(word); - if (utf8) { - if (nc >= MAXWORDUTF8LEN) - return 0; - } else { - if (nc >= MAXWORDLEN) - return 0; - } - int captype = 0; - int abbv = 0; - int wl = 0; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - - if (wl == 0) - return 0; - int ns = 0; - int capwords = 0; - - // check capitalized form for FORCEUCASE - if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) { - int info = SPELL_ORIGCAP; - char** wlst; - if (checkword(cw, &info, NULL)) { - if (*slst) { - wlst = *slst; - } else { - wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*)); - if (wlst == NULL) - return -1; - *slst = wlst; - for (int i = 0; i < MAXSUGGESTION; i++) { - wlst[i] = NULL; - } - } - wlst[0] = mystrdup(cw); - mkinitcap(wlst[0]); - return 1; - } - } - - switch (captype) { - case NOCAP: { - ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); - break; - } - - case INITCAP: { - capwords = 1; - ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); - if (ns == -1) - break; - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); - break; - } - case HUHINITCAP: - capwords = 1; - case HUHCAP: { - ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); - if (ns != -1) { - int prevns; - // something.The -> something. The - char* dot = strchr(cw, '.'); - if (dot && (dot > cw)) { - int captype_; - if (utf8) { - w_char w_[MAXWORDLEN]; - int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1); - captype_ = get_captype_utf8(w_, wl_, langnum); - } else - captype_ = get_captype(dot + 1, strlen(dot + 1), csconv); - if (captype_ == INITCAP) { - char* st = mystrdup(cw); - if (st) { - char* newst = (char*)realloc(st, wl + 2); - if (newst == NULL) - free(st); - st = newst; - } - if (st) { - st[(dot - cw) + 1] = ' '; - strcpy(st + (dot - cw) + 2, dot + 1); - ns = insert_sug(slst, st, ns); - free(st); - } - } - } - if (captype == HUHINITCAP) { - // TheOpenOffice.org -> The OpenOffice.org - memcpy(wspace, cw, (wl + 1)); - mkinitsmall2(wspace, unicw, nc); - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); - } - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - if (spell(wspace)) - ns = insert_sug(slst, wspace, ns); - prevns = ns; - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); - if (captype == HUHINITCAP) { - mkinitcap2(wspace, unicw, nc); - if (spell(wspace)) - ns = insert_sug(slst, wspace, ns); - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); - } - // aNew -> "a New" (instead of "a new") - for (int j = prevns; j < ns; j++) { - char* space = strchr((*slst)[j], ' '); - if (space) { - int slen = strlen(space + 1); - // different case after space (need capitalisation) - if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) { - w_char w[MAXWORDLEN]; - int wc = 0; - char* r = (*slst)[j]; - if (utf8) - wc = u8_u16(w, MAXWORDLEN, space + 1); - mkinitcap2(space + 1, w, wc); - // set as first suggestion - for (int k = j; k > 0; k--) - (*slst)[k] = (*slst)[k - 1]; - (*slst)[0] = r; - } - } - } - } - break; - } - - case ALLCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); - if (ns == -1) - break; - if (pAMgr && pAMgr->get_keepcase() && spell(wspace)) - ns = insert_sug(slst, wspace, ns); - mkinitcap2(wspace, unicw, nc); - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); - for (int j = 0; j < ns; j++) { - mkallcap((*slst)[j]); - if (pAMgr && pAMgr->get_checksharps()) { - char* pos; - if (utf8) { - pos = strstr((*slst)[j], "\xC3\x9F"); - while (pos) { - *pos = 'S'; - *(pos + 1) = 'S'; - pos = strstr(pos + 2, "\xC3\x9F"); - } - } else { - pos = strchr((*slst)[j], '\xDF'); - while (pos) { - (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 2); - mystrrep((*slst)[j], "\xDF", "SS"); - pos = strchr((*slst)[j], '\xDF'); - } - } - } - } - break; - } - } - - // LANG_hu section: replace '-' with ' ' in Hungarian - if (langnum == LANG_hu) { - for (int j = 0; j < ns; j++) { - char* pos = strchr((*slst)[j], '-'); - if (pos) { - int info; - char w[MAXWORDUTF8LEN]; - *pos = '\0'; - strcpy(w, (*slst)[j]); - strcat(w, pos + 1); - (void)spell(w, &info, NULL); - if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { - *pos = ' '; - } else - *pos = '-'; - } - } - } - // END OF LANG_hu section - - // try ngram approach since found nothing or only compound words - if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && - (*slst)) { - switch (captype) { - case NOCAP: { - ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic); - break; - } - case HUHINITCAP: - capwords = 1; - case HUHCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); - break; - } - case INITCAP: { - capwords = 1; - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); - break; - } - case ALLCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - int oldns = ns; - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); - for (int j = oldns; j < ns; j++) - mkallcap((*slst)[j]); - break; - } - } - } - - // try dash suggestion (Afo-American -> Afro-American) - if (char* pos = strchr(cw, '-')) { - char* ppos = cw; - int nodashsug = 1; - char** nlst = NULL; - int nn = 0; - int last = 0; - if (*slst) { - for (int j = 0; j < ns && nodashsug == 1; j++) { - if (strchr((*slst)[j], '-')) - nodashsug = 0; - } - } - while (nodashsug && !last) { - if (*pos == '\0') - last = 1; - else - *pos = '\0'; - if (!spell(ppos)) { - nn = suggest(&nlst, ppos); - for (int j = nn - 1; j >= 0; j--) { - strncpy(wspace, cw, ppos - cw); - strcpy(wspace + (ppos - cw), nlst[j]); - if (!last) { - strcat(wspace, "-"); - strcat(wspace, pos + 1); - } - ns = insert_sug(slst, wspace, ns); - free(nlst[j]); - } - if (nlst != NULL) - free(nlst); - nodashsug = 0; - } - if (!last) { - *pos = '-'; - ppos = pos + 1; - pos = strchr(ppos, '-'); - } - if (!pos) - pos = cw + strlen(cw); - } - } - - // word reversing wrapper for complex prefixes - if (complexprefixes) { - for (int j = 0; j < ns; j++) { - if (utf8) - reverseword_utf((*slst)[j]); - else - reverseword((*slst)[j]); - } - } - - // capitalize - if (capwords) - for (int j = 0; j < ns; j++) { - mkinitcap((*slst)[j]); - } - - // expand suggestions with dot(s) - if (abbv && pAMgr && pAMgr->get_sugswithdots()) { - for (int j = 0; j < ns; j++) { - (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv); - strcat((*slst)[j], word + strlen(word) - abbv); - } - } - - // remove bad capitalized and forbidden forms - if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) { - switch (captype) { - case INITCAP: - case ALLCAP: { - int l = 0; - for (int j = 0; j < ns; j++) { - if (!strchr((*slst)[j], ' ') && !spell((*slst)[j])) { - char s[MAXSWUTF8L]; - w_char w[MAXSWL]; - int len; - if (utf8) { - len = u8_u16(w, MAXSWL, (*slst)[j]); - } else { - strcpy(s, (*slst)[j]); - len = strlen(s); - } - mkallsmall2(s, w, len); - free((*slst)[j]); - if (spell(s)) { - (*slst)[l] = mystrdup(s); - if ((*slst)[l]) - l++; - } else { - mkinitcap2(s, w, len); - if (spell(s)) { - (*slst)[l] = mystrdup(s); - if ((*slst)[l]) - l++; - } - } - } else { - (*slst)[l] = (*slst)[j]; - l++; - } - } - ns = l; - } - } - } - - // remove duplications - int l = 0; - for (int j = 0; j < ns; j++) { - (*slst)[l] = (*slst)[j]; - for (int k = 0; k < l; k++) { - if (strcmp((*slst)[k], (*slst)[j]) == 0) { - free((*slst)[j]); - l--; - break; - } - } - l++; - } - ns = l; - - // output conversion - rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; - for (int j = 0; rl && j < ns; j++) { - if (rl->conv((*slst)[j], wspace, MAXWORDUTF8LEN) > 0) { - free((*slst)[j]); - (*slst)[j] = mystrdup(wspace); - } - } - - // if suggestions removed by nosuggest, onlyincompound parameters - if (l == 0 && *slst) { - free(*slst); - *slst = NULL; - } - return l; -} - -void Hunspell::free_list(char*** slst, int n) { - freelist(slst, n); -} - -char* Hunspell::get_dic_encoding() { - return encoding; -} - -#ifdef HUNSPELL_EXPERIMENTAL -// XXX UTF-8 support is OK? -int Hunspell::suggest_auto(char*** slst, const char* word) { - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; - if (!pSMgr || maxdic == 0) - return 0; - w_char unicw[MAXWORDLEN]; - int nc = strlen(word); - if (utf8) { - if (nc >= MAXWORDUTF8LEN) - return 0; - } else { - if (nc >= MAXWORDLEN) - return 0; - } - int captype = 0; - int abbv = 0; - int wl = 0; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - - if (wl == 0) - return 0; - int ns = 0; - *slst = NULL; // HU, nsug in pSMgr->suggest - - switch (captype) { - case NOCAP: { - ns = pSMgr->suggest_auto(slst, cw, ns); - if (ns > 0) - break; - break; - } - - case INITCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest_auto(slst, wspace, ns); - for (int j = 0; j < ns; j++) - mkinitcap((*slst)[j]); - ns = pSMgr->suggest_auto(slst, cw, ns); - break; - } - - case HUHINITCAP: - case HUHCAP: { - ns = pSMgr->suggest_auto(slst, cw, ns); - if (ns == 0) { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest_auto(slst, wspace, ns); - } - break; - } - - case ALLCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest_auto(slst, wspace, ns); - - mkinitcap(wspace); - ns = pSMgr->suggest_auto(slst, wspace, ns); - - for (int j = 0; j < ns; j++) - mkallcap((*slst)[j]); - break; - } - } - - // word reversing wrapper for complex prefixes - if (complexprefixes) { - for (int j = 0; j < ns; j++) { - if (utf8) - reverseword_utf((*slst)[j]); - else - reverseword((*slst)[j]); - } - } - - // expand suggestions with dot(s) - if (abbv && pAMgr && pAMgr->get_sugswithdots()) { - for (int j = 0; j < ns; j++) { - (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv); - strcat((*slst)[j], word + strlen(word) - abbv); - } - } - - // LANG_hu section: replace '-' with ' ' in Hungarian - if (langnum == LANG_hu) { - for (int j = 0; j < ns; j++) { - char* pos = strchr((*slst)[j], '-'); - if (pos) { - int info; - char w[MAXWORDUTF8LEN]; - *pos = '\0'; - strcpy(w, (*slst)[j]); - strcat(w, pos + 1); - spell(w, &info, NULL); - if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { - *pos = ' '; - } else - *pos = '-'; - } - } - } - - // output conversion - rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; - for (int j = 0; rl && j < ns; j++) { - if (rl->conv((*slst)[j], wspace) > 0) { - free((*slst)[j]); - (*slst)[j] = mystrdup(wspace); - } - } - - // END OF LANG_hu section - return ns; -} -#endif - -int Hunspell::stem(char*** slst, char** desc, int n) { - char result[MAXLNLEN]; - char result2[MAXLNLEN]; - *slst = NULL; - if (n == 0) - return 0; - *result2 = '\0'; - for (int i = 0; i < n; i++) { - *result = '\0'; - // add compound word parts (except the last one) - char* s = (char*)desc[i]; - char* part = strstr(s, MORPH_PART); - if (part) { - char* nextpart = strstr(part + 1, MORPH_PART); - while (nextpart) { - copy_field(result + strlen(result), part, MORPH_PART); - part = nextpart; - nextpart = strstr(part + 1, MORPH_PART); - } - s = part; - } - - char** pl; - std::string tok(s); - size_t alt = 0; - while ((alt = tok.find(" | ", alt)) != std::string::npos) { - tok[alt + 1] = MSEP_ALT; - } - int pln = line_tok(tok.c_str(), &pl, MSEP_ALT); - for (int k = 0; k < pln; k++) { - // add derivational suffixes - if (strstr(pl[k], MORPH_DERI_SFX)) { - // remove inflectional suffixes - char* is = strstr(pl[k], MORPH_INFL_SFX); - if (is) - *is = '\0'; - char* sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]); - if (sg) { - char** gen; - int genl = line_tok(sg, &gen, MSEP_REC); - free(sg); - for (int j = 0; j < genl; j++) { - sprintf(result2 + strlen(result2), "%c%s%s", MSEP_REC, result, - gen[j]); - } - freelist(&gen, genl); - } - } else { - sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result); - if (strstr(pl[k], MORPH_SURF_PFX)) { - copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX); - } - copy_field(result2 + strlen(result2), pl[k], MORPH_STEM); - } - } - freelist(&pl, pln); - } - int sln = line_tok(result2, slst, MSEP_REC); - return uniqlist(*slst, sln); -} - -int Hunspell::stem(char*** slst, const char* word) { - char** pl; - int pln = analyze(&pl, word); - int pln2 = stem(slst, pl, pln); - freelist(&pl, pln); - return pln2; -} - -#ifdef HUNSPELL_EXPERIMENTAL -int Hunspell::suggest_pos_stems(char*** slst, const char* word) { - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; - if (!pSMgr || maxdic == 0) - return 0; - w_char unicw[MAXWORDLEN]; - int nc = strlen(word); - if (utf8) { - if (nc >= MAXWORDUTF8LEN) - return 0; - } else { - if (nc >= MAXWORDLEN) - return 0; - } - int captype = 0; - int abbv = 0; - int wl = 0; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - - if (wl == 0) - return 0; - - int ns = 0; // ns=0 = normalized input - - *slst = NULL; // HU, nsug in pSMgr->suggest - - switch (captype) { - case HUHCAP: - case NOCAP: { - ns = pSMgr->suggest_pos_stems(slst, cw, ns); - - if ((abbv) && (ns == 0)) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - ns = pSMgr->suggest_pos_stems(slst, wspace, ns); - } - - break; - } - - case INITCAP: { - ns = pSMgr->suggest_pos_stems(slst, cw, ns); - - if (ns == 0 || ((*slst)[0][0] == '#')) { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest_pos_stems(slst, wspace, ns); - } - - break; - } - - case ALLCAP: { - ns = pSMgr->suggest_pos_stems(slst, cw, ns); - if (ns != 0) - break; - - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - ns = pSMgr->suggest_pos_stems(slst, wspace, ns); - - if (ns == 0) { - mkinitcap(wspace); - ns = pSMgr->suggest_pos_stems(slst, wspace, ns); - } - break; - } - } - - // output conversion - rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; - for (int j = 0; rl && j < ns; j++) { - if (rl->conv((*slst)[j], wspace) > 0) { - free((*slst)[j]); - (*slst)[j] = mystrdup(wspace); - } - } - - return ns; -} -#endif // END OF HUNSPELL_EXPERIMENTAL CODE - -const char* Hunspell::get_wordchars() { - return pAMgr->get_wordchars(); -} - -unsigned short* Hunspell::get_wordchars_utf16(int* len) { - return pAMgr->get_wordchars_utf16(len); -} - -void Hunspell::mkinitcap(char* p) { - if (!utf8) { - if (*p != '\0') - *p = csconv[((unsigned char)*p)].cupper; - } else { - int len; - w_char u[MAXWORDLEN]; - len = u8_u16(u, MAXWORDLEN, p); - unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum); - u[0].h = (unsigned char)(i >> 8); - u[0].l = (unsigned char)(i & 0x00FF); - u16_u8(p, MAXWORDUTF8LEN, u, len); - } -} - -int Hunspell::mkinitcap2(char* p, w_char* u, int nc) { - if (!utf8) { - if (*p != '\0') - *p = csconv[((unsigned char)*p)].cupper; - } else if (nc > 0) { - unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum); - u[0].h = (unsigned char)(i >> 8); - u[0].l = (unsigned char)(i & 0x00FF); - u16_u8(p, MAXWORDUTF8LEN, u, nc); - return strlen(p); - } - return nc; -} - -int Hunspell::mkinitsmall2(char* p, w_char* u, int nc) { - if (!utf8) { - if (*p != '\0') - *p = csconv[((unsigned char)*p)].clower; - } else if (nc > 0) { - unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum); - u[0].h = (unsigned char)(i >> 8); - u[0].l = (unsigned char)(i & 0x00FF); - u16_u8(p, MAXWORDUTF8LEN, u, nc); - return strlen(p); - } - return nc; -} - -int Hunspell::add(const char* word) { - if (pHMgr[0]) - return (pHMgr[0])->add(word); - return 0; -} - -int Hunspell::add_with_affix(const char* word, const char* example) { - if (pHMgr[0]) - return (pHMgr[0])->add_with_affix(word, example); - return 0; -} - -int Hunspell::remove(const char* word) { - if (pHMgr[0]) - return (pHMgr[0])->remove(word); - return 0; -} - -const char* Hunspell::get_version() { - return pAMgr->get_version(); -} - -struct cs_info* Hunspell::get_csconv() { - return csconv; -} - -void Hunspell::cat_result(char* result, char* st) { - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } -} - -int Hunspell::analyze(char*** slst, const char* word) { - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; - w_char unicw[MAXWORDLEN]; - int wl2 = 0; - *slst = NULL; - if (!pSMgr || maxdic == 0) - return 0; - int nc = strlen(word); - if (utf8) { - if (nc >= MAXWORDUTF8LEN) - return 0; - } else { - if (nc >= MAXWORDLEN) - return 0; - } - int captype = 0; - int abbv = 0; - int wl = 0; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace, MAXWORDUTF8LEN) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - - if (wl == 0) { - if (abbv) { - for (wl = 0; wl < abbv; wl++) - cw[wl] = '.'; - cw[wl] = '\0'; - abbv = 0; - } else - return 0; - } - - char result[MAXLNLEN]; - char* st = NULL; - - *result = '\0'; - - int n = 0; - int n2 = 0; - int n3 = 0; - - // test numbers - // LANG_hu section: set dash information for suggestions - if (langnum == LANG_hu) { - while ((n < wl) && (((cw[n] <= '9') && (cw[n] >= '0')) || - (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) { - n++; - if ((cw[n] == '.') || (cw[n] == ',')) { - if (((n2 == 0) && (n > 3)) || - ((n2 > 0) && ((cw[n - 1] == '.') || (cw[n - 1] == ',')))) - break; - n2++; - n3 = n; - } - } - - if ((n == wl) && (n3 > 0) && (n - n3 > 3)) - return 0; - if ((n == wl) || ((n > 0) && ((cw[n] == '%') || (cw[n] == '\xB0')) && - checkword(cw + n, NULL, NULL))) { - mystrcat(result, cw, MAXLNLEN); - result[n - 1] = '\0'; - if (n == wl) - cat_result(result, pSMgr->suggest_morph(cw + n - 1)); - else { - char sign = cw[n]; - cw[n] = '\0'; - cat_result(result, pSMgr->suggest_morph(cw + n - 1)); - mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE - cw[n] = sign; - cat_result(result, pSMgr->suggest_morph(cw + n)); - } - return line_tok(result, slst, MSEP_REC); - } - } - // END OF LANG_hu section - - switch (captype) { - case HUHCAP: - case HUHINITCAP: - case NOCAP: { - cat_result(result, pSMgr->suggest_morph(cw)); - if (abbv) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - cat_result(result, pSMgr->suggest_morph(wspace)); - } - break; - } - case INITCAP: { - wl = mkallsmall2(cw, unicw, nc); - memcpy(wspace, cw, (wl + 1)); - wl2 = mkinitcap2(cw, unicw, nc); - cat_result(result, pSMgr->suggest_morph(wspace)); - cat_result(result, pSMgr->suggest_morph(cw)); - if (abbv) { - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - cat_result(result, pSMgr->suggest_morph(wspace)); - - memcpy(wspace, cw, wl2); - *(wspace + wl2) = '.'; - *(wspace + wl2 + 1) = '\0'; - - cat_result(result, pSMgr->suggest_morph(wspace)); - } - break; - } - case ALLCAP: { - cat_result(result, pSMgr->suggest_morph(cw)); - if (abbv) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - cat_result(result, pSMgr->suggest_morph(cw)); - } - wl = mkallsmall2(cw, unicw, nc); - memcpy(wspace, cw, (wl + 1)); - wl2 = mkinitcap2(cw, unicw, nc); - - cat_result(result, pSMgr->suggest_morph(wspace)); - cat_result(result, pSMgr->suggest_morph(cw)); - if (abbv) { - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - cat_result(result, pSMgr->suggest_morph(wspace)); - - memcpy(wspace, cw, wl2); - *(wspace + wl2) = '.'; - *(wspace + wl2 + 1) = '\0'; - - cat_result(result, pSMgr->suggest_morph(wspace)); - } - break; - } - } - - if (*result) { - // word reversing wrapper for complex prefixes - if (complexprefixes) { - if (utf8) - reverseword_utf(result); - else - reverseword(result); - } - return line_tok(result, slst, MSEP_REC); - } - - // compound word with dash (HU) I18n - char* dash = NULL; - int nresult = 0; - // LANG_hu section: set dash information for suggestions - if (langnum == LANG_hu) - dash = (char*)strchr(cw, '-'); - if ((langnum == LANG_hu) && dash) { - *dash = '\0'; - // examine 2 sides of the dash - if (dash[1] == '\0') { // base word ending with dash - if (spell(cw)) { - char* p = pSMgr->suggest_morph(cw); - if (p) { - int ret = line_tok(p, slst, MSEP_REC); - free(p); - return ret; - } - } - } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat. - if (spell(cw) && (spell("-e"))) { - st = pSMgr->suggest_morph(cw); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - mystrcat(result, "+", MAXLNLEN); // XXX spec. separator in MORPHCODE - st = pSMgr->suggest_morph("-e"); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - return line_tok(result, slst, MSEP_REC); - } - } else { - // first word ending with dash: word- XXX ??? - char r2 = *(dash + 1); - dash[0] = '-'; - dash[1] = '\0'; - nresult = spell(cw); - dash[1] = r2; - dash[0] = '\0'; - if (nresult && spell(dash + 1) && - ((strlen(dash + 1) > 1) || ((dash[1] > '0') && (dash[1] < '9')))) { - st = pSMgr->suggest_morph(cw); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - mystrcat(result, "+", MAXLNLEN); // XXX spec. separator in MORPHCODE - } - st = pSMgr->suggest_morph(dash + 1); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - return line_tok(result, slst, MSEP_REC); - } - } - // affixed number in correct word - if (nresult && (dash > cw) && - (((*(dash - 1) <= '9') && (*(dash - 1) >= '0')) || - (*(dash - 1) == '.'))) { - *dash = '-'; - n = 1; - if (*(dash - n) == '.') - n++; - // search first not a number character to left from dash - while (((dash - n) >= cw) && ((*(dash - n) == '0') || (n < 3)) && - (n < 6)) { - n++; - } - if ((dash - n) < cw) - n--; - // numbers: valami1000000-hoz - // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz, - // 56-hoz, 6-hoz - for (; n >= 1; n--) { - if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && - checkword(dash - n, NULL, NULL)) { - mystrcat(result, cw, MAXLNLEN); - result[dash - cw - n] = '\0'; - st = pSMgr->suggest_morph(dash - n); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - return line_tok(result, slst, MSEP_REC); - } - } - } - } - return 0; -} - -int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) { - *slst = NULL; - if (!pSMgr || !pln) - return 0; - char** pl2; - int pl2n = analyze(&pl2, word); - int captype = 0; - int abbv = 0; - char cw[MAXWORDUTF8LEN]; - cleanword(cw, word, &captype, &abbv); - char result[MAXLNLEN]; - *result = '\0'; - - for (int i = 0; i < pln; i++) { - cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i])); - } - freelist(&pl2, pl2n); - - if (*result) { - // allcap - if (captype == ALLCAP) - mkallcap(result); - - // line split - int linenum = line_tok(result, slst, MSEP_REC); - - // capitalize - if (captype == INITCAP || captype == HUHINITCAP) { - for (int j = 0; j < linenum; j++) - mkinitcap((*slst)[j]); - } - - // temporary filtering of prefix related errors (eg. - // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks") - - int r = 0; - for (int j = 0; j < linenum; j++) { - if (!spell((*slst)[j])) { - free((*slst)[j]); - (*slst)[j] = NULL; - } else { - if (r < j) - (*slst)[r] = (*slst)[j]; - r++; - } - } - if (r > 0) - return r; - free(*slst); - *slst = NULL; - } - return 0; -} - -int Hunspell::generate(char*** slst, const char* word, const char* pattern) { - char** pl; - int pln = analyze(&pl, pattern); - int n = generate(slst, word, pl, pln); - freelist(&pl, pln); - return uniqlist(*slst, n); -} - -// minimal XML parser functions -int Hunspell::get_xml_par(char* dest, const char* par, int max) { - char* d = dest; - if (!par) - return 0; - char end = *par; - char* dmax = dest + max; - if (end == '>') - end = '<'; - else if (end != '\'' && end != '"') - return 0; // bad XML - for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) - *d = *par; - *d = '\0'; - mystrrep(dest, "<", "<"); - mystrrep(dest, "&", "&"); - return (int)(d - dest); -} - -int Hunspell::get_langnum() const { - return langnum; -} - -int Hunspell::input_conv(const char* word, char* dest, size_t destsize) { - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - return (rl && rl->conv(word, dest, destsize) > 0); -} - -// return the beginning of the element (attr == NULL) or the attribute -const char* Hunspell::get_xml_pos(const char* s, const char* attr) { - const char* end = strchr(s, '>'); - const char* p = s; - if (attr == NULL) - return end; - do { - p = strstr(p, attr); - if (!p || p >= end) - return 0; - } while (*(p - 1) != ' ' && *(p - 1) != '\n'); - return p + strlen(attr); -} - -int Hunspell::check_xml_par(const char* q, - const char* attr, - const char* value) { - char cw[MAXWORDUTF8LEN]; - if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) && - strcmp(cw, value) == 0) - return 1; - return 0; -} - -int Hunspell::get_xml_list(char*** slst, char* list, const char* tag) { - int n = 0; - char* p; - if (!list) - return 0; - for (p = list; ((p = strstr(p, tag)) != NULL); p++) - n++; - if (n == 0) - return 0; - *slst = (char**)malloc(sizeof(char*) * n); - if (!*slst) - return 0; - for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) { - int l = strlen(p); - (*slst)[n] = (char*)malloc(l + 1); - if (!(*slst)[n]) - return n; - if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) { - free((*slst)[n]); - break; - } - } - return n; -} - -int Hunspell::spellml(char*** slst, const char* word) { - char *q, *q2; - char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN]; - q = (char*)strstr(word, "'); - if (!q2) - return 0; // bad XML input - q2 = strstr(q2, "'), MAXWORDUTF8LEN - 10)) - n = analyze(slst, cw); - if (n == 0) - return 0; - // convert the result to ana1ana2 format - std::string r; - r.append(""); - for (int i = 0; i < n; i++) { - r.append(""); - - std::string entry((*slst)[i]); - free((*slst)[i]); - mystrrep(entry, "\t", " "); - mystrrep(entry, "&", "&"); - mystrrep(entry, "<", "<"); - r.append(entry); - - r.append(""); - } - r.append(""); - (*slst)[0] = mystrdup(r.c_str()); - return 1; - } else if (check_xml_par(q, "type=", "stem")) { - if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) - return stem(slst, cw); - } else if (check_xml_par(q, "type=", "generate")) { - int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1); - if (n == 0) - return 0; - char* q3 = strstr(q2 + 1, "'), MAXWORDUTF8LEN - 1)) { - return generate(slst, cw, cw2); - } - } else { - if ((q2 = strstr(q2 + 1, "'), "")) != 0) { - int n2 = generate(slst, cw, slst2, n); - freelist(&slst2, n); - return uniqlist(*slst, n2); - } - freelist(&slst2, n); - } - } - } - return 0; -} - -#ifdef HUNSPELL_EXPERIMENTAL -// XXX is UTF-8 support OK? -char* Hunspell::morph_with_correction(const char* word) { - char cw[MAXWORDUTF8LEN]; - char wspace[MAXWORDUTF8LEN]; - if (!pSMgr || maxdic == 0) - return NULL; - w_char unicw[MAXWORDLEN]; - int nc = strlen(word); - if (utf8) { - if (nc >= MAXWORDUTF8LEN) - return NULL; - } else { - if (nc >= MAXWORDLEN) - return NULL; - } - int captype = 0; - int abbv = 0; - int wl = 0; - - // input conversion - RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; - int convstatus = rl ? rl->conv(word, wspace) : 0; - if (convstatus < 0) - return 0; - else if (convstatus > 0) - wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); - else - wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - - if (wl == 0) - return NULL; - - char result[MAXLNLEN]; - char* st = NULL; - - *result = '\0'; - - switch (captype) { - case NOCAP: { - st = pSMgr->suggest_morph_for_spelling_error(cw); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - if (abbv) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - } - break; - } - case INITCAP: { - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - st = pSMgr->suggest_morph_for_spelling_error(cw); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - if (abbv) { - memcpy(wspace, cw, wl); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - mkallsmall2(wspace, unicw, nc); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - mkinitcap(wspace); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - } - break; - } - case HUHCAP: { - st = pSMgr->suggest_morph_for_spelling_error(cw); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - memcpy(wspace, cw, (wl + 1)); - mkallsmall2(wspace, unicw, nc); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - break; - } - case ALLCAP: { - memcpy(wspace, cw, (wl + 1)); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - mkallsmall2(wspace, unicw, nc); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - mkinitcap(wspace); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - if (abbv) { - memcpy(wspace, cw, (wl + 1)); - *(wspace + wl) = '.'; - *(wspace + wl + 1) = '\0'; - if (*result) - mystrcat(result, "\n", MAXLNLEN); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - mkallsmall2(wspace, unicw, nc); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - mkinitcap(wspace); - st = pSMgr->suggest_morph_for_spelling_error(wspace); - if (st) { - if (*result) - mystrcat(result, "\n", MAXLNLEN); - mystrcat(result, st, MAXLNLEN); - free(st); - } - } - break; - } - } - - if (*result) - return mystrdup(result); - return NULL; -} - -#endif // END OF HUNSPELL_EXPERIMENTAL CODE - -Hunhandle* Hunspell_create(const char* affpath, const char* dpath) { - return (Hunhandle*)(new Hunspell(affpath, dpath)); -} - -Hunhandle* Hunspell_create_key(const char* affpath, - const char* dpath, - const char* key) { - return (Hunhandle*)(new Hunspell(affpath, dpath, key)); -} - -void Hunspell_destroy(Hunhandle* pHunspell) { - delete (Hunspell*)(pHunspell); -} - -int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) { - return ((Hunspell*)pHunspell)->add_dic(dpath); -} - -int Hunspell_spell(Hunhandle* pHunspell, const char* word) { - return ((Hunspell*)pHunspell)->spell(word); -} - -char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) { - return ((Hunspell*)pHunspell)->get_dic_encoding(); -} - -int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) { - return ((Hunspell*)pHunspell)->suggest(slst, word); -} - -int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) { - return ((Hunspell*)pHunspell)->analyze(slst, word); -} - -int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) { - return ((Hunspell*)pHunspell)->stem(slst, word); -} - -int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) { - return ((Hunspell*)pHunspell)->stem(slst, desc, n); -} - -int Hunspell_generate(Hunhandle* pHunspell, - char*** slst, - const char* word, - const char* word2) { - return ((Hunspell*)pHunspell)->generate(slst, word, word2); -} - -int Hunspell_generate2(Hunhandle* pHunspell, - char*** slst, - const char* word, - char** desc, - int n) { - return ((Hunspell*)pHunspell)->generate(slst, word, desc, n); -} - -/* functions for run-time modification of the dictionary */ - -/* add word to the run-time dictionary */ - -int Hunspell_add(Hunhandle* pHunspell, const char* word) { - return ((Hunspell*)pHunspell)->add(word); -} - -/* add word to the run-time dictionary with affix flags of - * the example (a dictionary word): Hunspell will recognize - * affixed forms of the new word, too. - */ - -int Hunspell_add_with_affix(Hunhandle* pHunspell, - const char* word, - const char* example) { - return ((Hunspell*)pHunspell)->add_with_affix(word, example); -} - -/* remove word from the run-time dictionary */ - -int Hunspell_remove(Hunhandle* pHunspell, const char* word) { - return ((Hunspell*)pHunspell)->remove(word); -} - -void Hunspell_free_list(Hunhandle*, char*** slst, int n) { - freelist(slst, n); -} - -int Hunspell::suffix_suggest(char*** slst, const char* root_word) { - struct hentry* he = NULL; - int len; - std::string w2; - const char* word; - char* ignoredchars = pAMgr->get_ignore(); - if (ignoredchars != NULL) { - w2.assign(root_word); - if (utf8) { - int ignoredchars_utf16_len; - unsigned short* ignoredchars_utf16 = - pAMgr->get_ignore_utf16(&ignoredchars_utf16_len); - remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len); - } else { - remove_ignored_chars(w2, ignoredchars); - } - word = w2.c_str(); - } else - word = root_word; - - len = strlen(word); - - if (!len) - return 0; - - char** wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*)); - if (wlst == NULL) - return -1; - *slst = wlst; - for (int i = 0; i < MAXSUGGESTION; i++) { - wlst[i] = NULL; - } - - for (int i = 0; (i < maxdic) && !he; i++) { - he = (pHMgr[i])->lookup(word); - } - if (he) { - return pAMgr->get_suffix_words(he->astr, he->alen, root_word, *slst); - } - return 0; -} diff --git a/libs/hunspell/src/hunspelldll.c b/libs/hunspell/src/hunspelldll.c deleted file mode 100644 index fdd667ce8f..0000000000 --- a/libs/hunspell/src/hunspelldll.c +++ /dev/null @@ -1,57 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Copyright (C) 2006 - * Miha Vrhovnik (http://simail.sf.net, http://xcollect.sf.net) - * All Rights Reserved. - * - * Contributor(s): - * - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** **/ -#include "hunspelldll.h" -#include - - -BOOL APIENTRY DllMain(HINSTANCE hInst /* Library instance handle. */, - DWORD reason /* Reason this function is being called. */, - LPVOID reserved /* Not used. */) { - switch (reason) { - case DLL_PROCESS_ATTACH: - break; - - case DLL_PROCESS_DETACH: - break; - - case DLL_THREAD_ATTACH: - break; - - case DLL_THREAD_DETACH: - break; - } - - /* Returns TRUE on success, FALSE on failure */ - return TRUE; -} diff --git a/libs/hunspell/src/hunzip.c++ b/libs/hunspell/src/hunzip.c++ new file mode 100644 index 0000000000..b2788a1055 --- /dev/null +++ b/libs/hunspell/src/hunzip.c++ @@ -0,0 +1,263 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Hunspell, based on MySpell. + * + * The Initial Developers of the Original Code are + * Kevin Hendricks (MySpell) and Németh László (Hunspell). + * Portions created by the Initial Developers are Copyright (C) 2002-2005 + * the Initial Developers. All Rights Reserved. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include +#include +#include + +#include "hunzip.hxx" +#include "csutil.hxx" + +#define CODELEN 65536 +#define BASEBITREC 5000 + +#define UNCOMPRESSED '\002' +#define MAGIC "hz0" +#define MAGIC_ENCRYPT "hz1" +#define MAGICLEN (sizeof(MAGIC) - 1) + +int Hunzip::fail(const char* err, const char* par) { + fprintf(stderr, err, par); + return -1; +} + +Hunzip::Hunzip(const char* file, const char* key) + : fin(NULL), bufsiz(0), lastbit(0), inc(0), inbits(0), outc(0), dec(NULL) { + in[0] = out[0] = line[0] = '\0'; + filename = mystrdup(file); + if (getcode(key) == -1) + bufsiz = -1; + else + bufsiz = getbuf(); +} + +int Hunzip::getcode(const char* key) { + unsigned char c[2]; + int i, j, n, p; + int allocatedbit = BASEBITREC; + const char* enc = key; + + if (!filename) + return -1; + + fin = myfopen(filename, "rb"); + if (!fin) + return -1; + + // read magic number + if ((fread(in, 1, 3, fin) < MAGICLEN) || + !(strncmp(MAGIC, in, MAGICLEN) == 0 || + strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) { + return fail(MSG_FORMAT, filename); + } + + // check encryption + if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) { + unsigned char cs; + if (!key) + return fail(MSG_KEY, filename); + if (fread(&c, 1, 1, fin) < 1) + return fail(MSG_FORMAT, filename); + for (cs = 0; *enc; enc++) + cs ^= *enc; + if (cs != c[0]) + return fail(MSG_KEY, filename); + enc = key; + } else + key = NULL; + + // read record count + if (fread(&c, 1, 2, fin) < 2) + return fail(MSG_FORMAT, filename); + + if (key) { + c[0] ^= *enc; + if (*(++enc) == '\0') + enc = key; + c[1] ^= *enc; + } + + n = ((int)c[0] << 8) + c[1]; + dec = (struct bit*)malloc(BASEBITREC * sizeof(struct bit)); + if (!dec) + return fail(MSG_MEMORY, filename); + dec[0].v[0] = 0; + dec[0].v[1] = 0; + + // read codes + for (i = 0; i < n; i++) { + unsigned char l; + if (fread(c, 1, 2, fin) < 2) + return fail(MSG_FORMAT, filename); + if (key) { + if (*(++enc) == '\0') + enc = key; + c[0] ^= *enc; + if (*(++enc) == '\0') + enc = key; + c[1] ^= *enc; + } + if (fread(&l, 1, 1, fin) < 1) + return fail(MSG_FORMAT, filename); + if (key) { + if (*(++enc) == '\0') + enc = key; + l ^= *enc; + } + if (fread(in, 1, l / 8 + 1, fin) < (size_t)l / 8 + 1) + return fail(MSG_FORMAT, filename); + if (key) + for (j = 0; j <= l / 8; j++) { + if (*(++enc) == '\0') + enc = key; + in[j] ^= *enc; + } + p = 0; + for (j = 0; j < l; j++) { + int b = (in[j / 8] & (1 << (7 - (j % 8)))) ? 1 : 0; + int oldp = p; + p = dec[p].v[b]; + if (p == 0) { + lastbit++; + if (lastbit == allocatedbit) { + allocatedbit += BASEBITREC; + dec = (struct bit*)realloc(dec, allocatedbit * sizeof(struct bit)); + } + dec[lastbit].v[0] = 0; + dec[lastbit].v[1] = 0; + dec[oldp].v[b] = lastbit; + p = lastbit; + } + } + dec[p].c[0] = c[0]; + dec[p].c[1] = c[1]; + } + return 0; +} + +Hunzip::~Hunzip() { + if (dec) + free(dec); + if (fin) + fclose(fin); + if (filename) + free(filename); +} + +int Hunzip::getbuf() { + int p = 0; + int o = 0; + do { + if (inc == 0) + inbits = fread(in, 1, BUFSIZE, fin) * 8; + for (; inc < inbits; inc++) { + int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0; + int oldp = p; + p = dec[p].v[b]; + if (p == 0) { + if (oldp == lastbit) { + fclose(fin); + fin = NULL; + // add last odd byte + if (dec[lastbit].c[0]) + out[o++] = dec[lastbit].c[1]; + return o; + } + out[o++] = dec[oldp].c[0]; + out[o++] = dec[oldp].c[1]; + if (o == BUFSIZE) + return o; + p = dec[p].v[b]; + } + } + inc = 0; + } while (inbits == BUFSIZE * 8); + return fail(MSG_FORMAT, filename); +} + +const char* Hunzip::getline() { + char linebuf[BUFSIZE]; + int l = 0, eol = 0, left = 0, right = 0; + if (bufsiz == -1) + return NULL; + while (l < bufsiz && !eol) { + linebuf[l++] = out[outc]; + switch (out[outc]) { + case '\t': + break; + case 31: { // escape + if (++outc == bufsiz) { + bufsiz = getbuf(); + outc = 0; + } + linebuf[l - 1] = out[outc]; + break; + } + case ' ': + break; + default: + if (((unsigned char)out[outc]) < 47) { + if (out[outc] > 32) { + right = out[outc] - 31; + if (++outc == bufsiz) { + bufsiz = getbuf(); + outc = 0; + } + } + if (out[outc] == 30) + left = 9; + else + left = out[outc]; + linebuf[l - 1] = '\n'; + eol = 1; + } + } + if (++outc == bufsiz) { + outc = 0; + bufsiz = fin ? getbuf() : -1; + } + } + if (right) + strcpy(linebuf + l - 1, line + strlen(line) - right - 1); + else + linebuf[l] = '\0'; + strcpy(line + left, linebuf); + return line; +} diff --git a/libs/hunspell/src/hunzip.cxx b/libs/hunspell/src/hunzip.cxx deleted file mode 100644 index b2788a1055..0000000000 --- a/libs/hunspell/src/hunzip.cxx +++ /dev/null @@ -1,263 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include -#include -#include - -#include "hunzip.hxx" -#include "csutil.hxx" - -#define CODELEN 65536 -#define BASEBITREC 5000 - -#define UNCOMPRESSED '\002' -#define MAGIC "hz0" -#define MAGIC_ENCRYPT "hz1" -#define MAGICLEN (sizeof(MAGIC) - 1) - -int Hunzip::fail(const char* err, const char* par) { - fprintf(stderr, err, par); - return -1; -} - -Hunzip::Hunzip(const char* file, const char* key) - : fin(NULL), bufsiz(0), lastbit(0), inc(0), inbits(0), outc(0), dec(NULL) { - in[0] = out[0] = line[0] = '\0'; - filename = mystrdup(file); - if (getcode(key) == -1) - bufsiz = -1; - else - bufsiz = getbuf(); -} - -int Hunzip::getcode(const char* key) { - unsigned char c[2]; - int i, j, n, p; - int allocatedbit = BASEBITREC; - const char* enc = key; - - if (!filename) - return -1; - - fin = myfopen(filename, "rb"); - if (!fin) - return -1; - - // read magic number - if ((fread(in, 1, 3, fin) < MAGICLEN) || - !(strncmp(MAGIC, in, MAGICLEN) == 0 || - strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) { - return fail(MSG_FORMAT, filename); - } - - // check encryption - if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) { - unsigned char cs; - if (!key) - return fail(MSG_KEY, filename); - if (fread(&c, 1, 1, fin) < 1) - return fail(MSG_FORMAT, filename); - for (cs = 0; *enc; enc++) - cs ^= *enc; - if (cs != c[0]) - return fail(MSG_KEY, filename); - enc = key; - } else - key = NULL; - - // read record count - if (fread(&c, 1, 2, fin) < 2) - return fail(MSG_FORMAT, filename); - - if (key) { - c[0] ^= *enc; - if (*(++enc) == '\0') - enc = key; - c[1] ^= *enc; - } - - n = ((int)c[0] << 8) + c[1]; - dec = (struct bit*)malloc(BASEBITREC * sizeof(struct bit)); - if (!dec) - return fail(MSG_MEMORY, filename); - dec[0].v[0] = 0; - dec[0].v[1] = 0; - - // read codes - for (i = 0; i < n; i++) { - unsigned char l; - if (fread(c, 1, 2, fin) < 2) - return fail(MSG_FORMAT, filename); - if (key) { - if (*(++enc) == '\0') - enc = key; - c[0] ^= *enc; - if (*(++enc) == '\0') - enc = key; - c[1] ^= *enc; - } - if (fread(&l, 1, 1, fin) < 1) - return fail(MSG_FORMAT, filename); - if (key) { - if (*(++enc) == '\0') - enc = key; - l ^= *enc; - } - if (fread(in, 1, l / 8 + 1, fin) < (size_t)l / 8 + 1) - return fail(MSG_FORMAT, filename); - if (key) - for (j = 0; j <= l / 8; j++) { - if (*(++enc) == '\0') - enc = key; - in[j] ^= *enc; - } - p = 0; - for (j = 0; j < l; j++) { - int b = (in[j / 8] & (1 << (7 - (j % 8)))) ? 1 : 0; - int oldp = p; - p = dec[p].v[b]; - if (p == 0) { - lastbit++; - if (lastbit == allocatedbit) { - allocatedbit += BASEBITREC; - dec = (struct bit*)realloc(dec, allocatedbit * sizeof(struct bit)); - } - dec[lastbit].v[0] = 0; - dec[lastbit].v[1] = 0; - dec[oldp].v[b] = lastbit; - p = lastbit; - } - } - dec[p].c[0] = c[0]; - dec[p].c[1] = c[1]; - } - return 0; -} - -Hunzip::~Hunzip() { - if (dec) - free(dec); - if (fin) - fclose(fin); - if (filename) - free(filename); -} - -int Hunzip::getbuf() { - int p = 0; - int o = 0; - do { - if (inc == 0) - inbits = fread(in, 1, BUFSIZE, fin) * 8; - for (; inc < inbits; inc++) { - int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0; - int oldp = p; - p = dec[p].v[b]; - if (p == 0) { - if (oldp == lastbit) { - fclose(fin); - fin = NULL; - // add last odd byte - if (dec[lastbit].c[0]) - out[o++] = dec[lastbit].c[1]; - return o; - } - out[o++] = dec[oldp].c[0]; - out[o++] = dec[oldp].c[1]; - if (o == BUFSIZE) - return o; - p = dec[p].v[b]; - } - } - inc = 0; - } while (inbits == BUFSIZE * 8); - return fail(MSG_FORMAT, filename); -} - -const char* Hunzip::getline() { - char linebuf[BUFSIZE]; - int l = 0, eol = 0, left = 0, right = 0; - if (bufsiz == -1) - return NULL; - while (l < bufsiz && !eol) { - linebuf[l++] = out[outc]; - switch (out[outc]) { - case '\t': - break; - case 31: { // escape - if (++outc == bufsiz) { - bufsiz = getbuf(); - outc = 0; - } - linebuf[l - 1] = out[outc]; - break; - } - case ' ': - break; - default: - if (((unsigned char)out[outc]) < 47) { - if (out[outc] > 32) { - right = out[outc] - 31; - if (++outc == bufsiz) { - bufsiz = getbuf(); - outc = 0; - } - } - if (out[outc] == 30) - left = 9; - else - left = out[outc]; - linebuf[l - 1] = '\n'; - eol = 1; - } - } - if (++outc == bufsiz) { - outc = 0; - bufsiz = fin ? getbuf() : -1; - } - } - if (right) - strcpy(linebuf + l - 1, line + strlen(line) - right - 1); - else - linebuf[l] = '\0'; - strcpy(line + left, linebuf); - return line; -} diff --git a/libs/hunspell/src/phonet.c++ b/libs/hunspell/src/phonet.c++ new file mode 100644 index 0000000000..2b4d2ae504 --- /dev/null +++ b/libs/hunspell/src/phonet.c++ @@ -0,0 +1,285 @@ +/* phonetic.c - generic replacement aglogithms for phonetic transformation + Copyright (C) 2000 Bjoern Jacke + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation; + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; If not, see + . + + Changelog: + + 2000-01-05 Bjoern Jacke + Initial Release insprired by the article about phonetic + transformations out of c't 25/1999 + + 2007-07-26 Bjoern Jacke + Released under MPL/GPL/LGPL tri-license for Hunspell + + 2007-08-23 Laszlo Nemeth + Porting from Aspell to Hunspell using C-like structs +*/ + +#include +#include +#include +#include + +#include "csutil.hxx" +#include "phonet.hxx" + +void init_phonet_hash(phonetable& parms) { + int i, k; + + for (i = 0; i < HASHSIZE; i++) { + parms.hash[i] = -1; + } + + for (i = 0; parms.rules[i][0] != '\0'; i += 2) { + /** set hash value **/ + k = (unsigned char)parms.rules[i][0]; + + if (parms.hash[k] < 0) { + parms.hash[k] = i; + } + } +} + +// like strcpy but safe if the strings overlap +// but only if dest < src +static inline void strmove(char* dest, char* src) { + while (*src) + *dest++ = *src++; + *dest = '\0'; +} + +static int myisalpha(char ch) { + if ((unsigned char)ch < 128) + return isalpha(ch); + return 1; +} + +/* phonetic transcription algorithm */ +/* see: http://aspell.net/man-html/Phonetic-Code.html */ +/* convert string to uppercase before this call */ +int phonet(const char* inword, char* target, int len, phonetable& parms) { + /** Do phonetic transformation. **/ + /** "len" = length of "inword" incl. '\0'. **/ + + /** result: >= 0: length of "target" **/ + /** otherwise: error **/ + + int i, j, k = 0, n, p, z; + int k0, n0, p0 = -333, z0; + char c, c0; + const char* s; + typedef unsigned char uchar; + char word[MAXPHONETUTF8LEN + 1]; + if (len == -1) + len = strlen(inword); + if (len > MAXPHONETUTF8LEN) + return 0; + strncpy(word, inword, MAXPHONETUTF8LEN); + word[MAXPHONETUTF8LEN] = '\0'; + + /** check word **/ + i = j = z = 0; + while ((c = word[i]) != '\0') { + n = parms.hash[(uchar)c]; + z0 = 0; + + if (n >= 0) { + /** check all rules for the same letter **/ + while (parms.rules[n][0] == c) { + /** check whole string **/ + k = 1; /** number of found letters **/ + p = 5; /** default priority **/ + s = parms.rules[n]; + s++; /** important for (see below) "*(s-1)" **/ + + while (*s != '\0' && word[i + k] == *s && !isdigit((unsigned char)*s) && + strchr("(-<^$", *s) == NULL) { + k++; + s++; + } + if (*s == '(') { + /** check letters in "(..)" **/ + if (myisalpha(word[i + k]) // ...could be implied? + && strchr(s + 1, word[i + k]) != NULL) { + k++; + while (*s != ')') + s++; + s++; + } + } + p0 = (int)*s; + k0 = k; + while (*s == '-' && k > 1) { + k--; + s++; + } + if (*s == '<') + s++; + if (isdigit((unsigned char)*s)) { + /** determine priority **/ + p = *s - '0'; + s++; + } + if (*s == '^' && *(s + 1) == '^') + s++; + + if (*s == '\0' || (*s == '^' && (i == 0 || !myisalpha(word[i - 1])) && + (*(s + 1) != '$' || (!myisalpha(word[i + k0])))) || + (*s == '$' && i > 0 && myisalpha(word[i - 1]) && + (!myisalpha(word[i + k0])))) { + /** search for followup rules, if: **/ + /** parms.followup and k > 1 and NO '-' in searchstring **/ + c0 = word[i + k - 1]; + n0 = parms.hash[(uchar)c0]; + + // if (parms.followup && k > 1 && n0 >= 0 + if (k > 1 && n0 >= 0 && p0 != (int)'-' && word[i + k] != '\0') { + /** test follow-up rule for "word[i+k]" **/ + while (parms.rules[n0][0] == c0) { + /** check whole string **/ + k0 = k; + p0 = 5; + s = parms.rules[n0]; + s++; + while (*s != '\0' && word[i + k0] == *s && + !isdigit((unsigned char)*s) && + strchr("(-<^$", *s) == NULL) { + k0++; + s++; + } + if (*s == '(') { + /** check letters **/ + if (myisalpha(word[i + k0]) && + strchr(s + 1, word[i + k0]) != NULL) { + k0++; + while (*s != ')' && *s != '\0') + s++; + if (*s == ')') + s++; + } + } + while (*s == '-') { + /** "k0" gets NOT reduced **/ + /** because "if (k0 == k)" **/ + s++; + } + if (*s == '<') + s++; + if (isdigit((unsigned char)*s)) { + p0 = *s - '0'; + s++; + } + + if (*s == '\0' + /** *s == '^' cuts **/ + || (*s == '$' && !myisalpha(word[i + k0]))) { + if (k0 == k) { + /** this is just a piece of the string **/ + n0 += 2; + continue; + } + + if (p0 < p) { + /** priority too low **/ + n0 += 2; + continue; + } + /** rule fits; stop search **/ + break; + } + n0 += 2; + } /** End of "while (parms.rules[n0][0] == c0)" **/ + + if (p0 >= p && parms.rules[n0][0] == c0) { + n += 2; + continue; + } + } /** end of follow-up stuff **/ + + /** replace string **/ + s = parms.rules[n + 1]; + p0 = (parms.rules[n][0] != '\0' && + strchr(parms.rules[n] + 1, '<') != NULL) + ? 1 + : 0; + if (p0 == 1 && z == 0) { + /** rule with '<' is used **/ + if (j > 0 && *s != '\0' && + (target[j - 1] == c || target[j - 1] == *s)) { + j--; + } + z0 = 1; + z = 1; + k0 = 0; + while (*s != '\0' && word[i + k0] != '\0') { + word[i + k0] = *s; + k0++; + s++; + } + if (k > k0) + strmove(&word[0] + i + k0, &word[0] + i + k); + + /** new "actual letter" **/ + c = word[i]; + } else { /** no '<' rule used **/ + i += k - 1; + z = 0; + while (*s != '\0' && *(s + 1) != '\0' && j < len) { + if (j == 0 || target[j - 1] != *s) { + target[j] = *s; + j++; + } + s++; + } + /** new "actual letter" **/ + c = *s; + if (parms.rules[n][0] != '\0' && + strstr(parms.rules[n] + 1, "^^") != NULL) { + if (c != '\0') { + target[j] = c; + j++; + } + strmove(&word[0], &word[0] + i + 1); + i = 0; + z0 = 1; + } + } + break; + } /** end of follow-up stuff **/ + n += 2; + } /** end of while (parms.rules[n][0] == c) **/ + } /** end of if (n >= 0) **/ + if (z0 == 0) { + // if (k && (assert(p0!=-333),!p0) && j < len && c != '\0' + // && (!parms.collapse_result || j == 0 || target[j-1] != + // c)){ + if (k && !p0 && j < len && c != '\0' && + (1 || j == 0 || target[j - 1] != c)) { + /** condense only double letters **/ + target[j] = c; + /// printf("\n setting \n"); + j++; + } + + i++; + z = 0; + k = 0; + } + } /** end of while ((c = word[i]) != '\0') **/ + + target[j] = '\0'; + return (j); + +} /** end of function "phonet" **/ diff --git a/libs/hunspell/src/phonet.cxx b/libs/hunspell/src/phonet.cxx deleted file mode 100644 index 2b4d2ae504..0000000000 --- a/libs/hunspell/src/phonet.cxx +++ /dev/null @@ -1,285 +0,0 @@ -/* phonetic.c - generic replacement aglogithms for phonetic transformation - Copyright (C) 2000 Bjoern Jacke - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License version 2.1 as published by the Free Software Foundation; - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; If not, see - . - - Changelog: - - 2000-01-05 Bjoern Jacke - Initial Release insprired by the article about phonetic - transformations out of c't 25/1999 - - 2007-07-26 Bjoern Jacke - Released under MPL/GPL/LGPL tri-license for Hunspell - - 2007-08-23 Laszlo Nemeth - Porting from Aspell to Hunspell using C-like structs -*/ - -#include -#include -#include -#include - -#include "csutil.hxx" -#include "phonet.hxx" - -void init_phonet_hash(phonetable& parms) { - int i, k; - - for (i = 0; i < HASHSIZE; i++) { - parms.hash[i] = -1; - } - - for (i = 0; parms.rules[i][0] != '\0'; i += 2) { - /** set hash value **/ - k = (unsigned char)parms.rules[i][0]; - - if (parms.hash[k] < 0) { - parms.hash[k] = i; - } - } -} - -// like strcpy but safe if the strings overlap -// but only if dest < src -static inline void strmove(char* dest, char* src) { - while (*src) - *dest++ = *src++; - *dest = '\0'; -} - -static int myisalpha(char ch) { - if ((unsigned char)ch < 128) - return isalpha(ch); - return 1; -} - -/* phonetic transcription algorithm */ -/* see: http://aspell.net/man-html/Phonetic-Code.html */ -/* convert string to uppercase before this call */ -int phonet(const char* inword, char* target, int len, phonetable& parms) { - /** Do phonetic transformation. **/ - /** "len" = length of "inword" incl. '\0'. **/ - - /** result: >= 0: length of "target" **/ - /** otherwise: error **/ - - int i, j, k = 0, n, p, z; - int k0, n0, p0 = -333, z0; - char c, c0; - const char* s; - typedef unsigned char uchar; - char word[MAXPHONETUTF8LEN + 1]; - if (len == -1) - len = strlen(inword); - if (len > MAXPHONETUTF8LEN) - return 0; - strncpy(word, inword, MAXPHONETUTF8LEN); - word[MAXPHONETUTF8LEN] = '\0'; - - /** check word **/ - i = j = z = 0; - while ((c = word[i]) != '\0') { - n = parms.hash[(uchar)c]; - z0 = 0; - - if (n >= 0) { - /** check all rules for the same letter **/ - while (parms.rules[n][0] == c) { - /** check whole string **/ - k = 1; /** number of found letters **/ - p = 5; /** default priority **/ - s = parms.rules[n]; - s++; /** important for (see below) "*(s-1)" **/ - - while (*s != '\0' && word[i + k] == *s && !isdigit((unsigned char)*s) && - strchr("(-<^$", *s) == NULL) { - k++; - s++; - } - if (*s == '(') { - /** check letters in "(..)" **/ - if (myisalpha(word[i + k]) // ...could be implied? - && strchr(s + 1, word[i + k]) != NULL) { - k++; - while (*s != ')') - s++; - s++; - } - } - p0 = (int)*s; - k0 = k; - while (*s == '-' && k > 1) { - k--; - s++; - } - if (*s == '<') - s++; - if (isdigit((unsigned char)*s)) { - /** determine priority **/ - p = *s - '0'; - s++; - } - if (*s == '^' && *(s + 1) == '^') - s++; - - if (*s == '\0' || (*s == '^' && (i == 0 || !myisalpha(word[i - 1])) && - (*(s + 1) != '$' || (!myisalpha(word[i + k0])))) || - (*s == '$' && i > 0 && myisalpha(word[i - 1]) && - (!myisalpha(word[i + k0])))) { - /** search for followup rules, if: **/ - /** parms.followup and k > 1 and NO '-' in searchstring **/ - c0 = word[i + k - 1]; - n0 = parms.hash[(uchar)c0]; - - // if (parms.followup && k > 1 && n0 >= 0 - if (k > 1 && n0 >= 0 && p0 != (int)'-' && word[i + k] != '\0') { - /** test follow-up rule for "word[i+k]" **/ - while (parms.rules[n0][0] == c0) { - /** check whole string **/ - k0 = k; - p0 = 5; - s = parms.rules[n0]; - s++; - while (*s != '\0' && word[i + k0] == *s && - !isdigit((unsigned char)*s) && - strchr("(-<^$", *s) == NULL) { - k0++; - s++; - } - if (*s == '(') { - /** check letters **/ - if (myisalpha(word[i + k0]) && - strchr(s + 1, word[i + k0]) != NULL) { - k0++; - while (*s != ')' && *s != '\0') - s++; - if (*s == ')') - s++; - } - } - while (*s == '-') { - /** "k0" gets NOT reduced **/ - /** because "if (k0 == k)" **/ - s++; - } - if (*s == '<') - s++; - if (isdigit((unsigned char)*s)) { - p0 = *s - '0'; - s++; - } - - if (*s == '\0' - /** *s == '^' cuts **/ - || (*s == '$' && !myisalpha(word[i + k0]))) { - if (k0 == k) { - /** this is just a piece of the string **/ - n0 += 2; - continue; - } - - if (p0 < p) { - /** priority too low **/ - n0 += 2; - continue; - } - /** rule fits; stop search **/ - break; - } - n0 += 2; - } /** End of "while (parms.rules[n0][0] == c0)" **/ - - if (p0 >= p && parms.rules[n0][0] == c0) { - n += 2; - continue; - } - } /** end of follow-up stuff **/ - - /** replace string **/ - s = parms.rules[n + 1]; - p0 = (parms.rules[n][0] != '\0' && - strchr(parms.rules[n] + 1, '<') != NULL) - ? 1 - : 0; - if (p0 == 1 && z == 0) { - /** rule with '<' is used **/ - if (j > 0 && *s != '\0' && - (target[j - 1] == c || target[j - 1] == *s)) { - j--; - } - z0 = 1; - z = 1; - k0 = 0; - while (*s != '\0' && word[i + k0] != '\0') { - word[i + k0] = *s; - k0++; - s++; - } - if (k > k0) - strmove(&word[0] + i + k0, &word[0] + i + k); - - /** new "actual letter" **/ - c = word[i]; - } else { /** no '<' rule used **/ - i += k - 1; - z = 0; - while (*s != '\0' && *(s + 1) != '\0' && j < len) { - if (j == 0 || target[j - 1] != *s) { - target[j] = *s; - j++; - } - s++; - } - /** new "actual letter" **/ - c = *s; - if (parms.rules[n][0] != '\0' && - strstr(parms.rules[n] + 1, "^^") != NULL) { - if (c != '\0') { - target[j] = c; - j++; - } - strmove(&word[0], &word[0] + i + 1); - i = 0; - z0 = 1; - } - } - break; - } /** end of follow-up stuff **/ - n += 2; - } /** end of while (parms.rules[n][0] == c) **/ - } /** end of if (n >= 0) **/ - if (z0 == 0) { - // if (k && (assert(p0!=-333),!p0) && j < len && c != '\0' - // && (!parms.collapse_result || j == 0 || target[j-1] != - // c)){ - if (k && !p0 && j < len && c != '\0' && - (1 || j == 0 || target[j - 1] != c)) { - /** condense only double letters **/ - target[j] = c; - /// printf("\n setting \n"); - j++; - } - - i++; - z = 0; - k = 0; - } - } /** end of while ((c = word[i]) != '\0') **/ - - target[j] = '\0'; - return (j); - -} /** end of function "phonet" **/ diff --git a/libs/hunspell/src/replist.c++ b/libs/hunspell/src/replist.c++ new file mode 100644 index 0000000000..ace6c4aaf8 --- /dev/null +++ b/libs/hunspell/src/replist.c++ @@ -0,0 +1,175 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Hunspell, based on MySpell. + * + * The Initial Developers of the Original Code are + * Kevin Hendricks (MySpell) and Németh László (Hunspell). + * Portions created by the Initial Developers are Copyright (C) 2002-2005 + * the Initial Developers. All Rights Reserved. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "replist.hxx" +#include "csutil.hxx" + +RepList::RepList(int n) { + dat = (replentry**)malloc(sizeof(replentry*) * n); + if (dat == 0) + size = 0; + else + size = n; + pos = 0; +} + +RepList::~RepList() { + for (int i = 0; i < pos; i++) { + free(dat[i]->pattern); + free(dat[i]->pattern2); + free(dat[i]); + } + free(dat); +} + +int RepList::get_pos() { + return pos; +} + +replentry* RepList::item(int n) { + return dat[n]; +} + +int RepList::near(const char* word) { + int p1 = 0; + int p2 = pos; + while ((p2 - p1) > 1) { + int m = (p1 + p2) / 2; + int c = strcmp(word, dat[m]->pattern); + if (c <= 0) { + if (c < 0) + p2 = m; + else + p1 = p2 = m; + } else + p1 = m; + } + return p1; +} + +int RepList::match(const char* word, int n) { + if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0) + return strlen(dat[n]->pattern); + return 0; +} + +int RepList::add(char* pat1, char* pat2) { + if (pos >= size || pat1 == NULL || pat2 == NULL) + return 1; + replentry* r = (replentry*)malloc(sizeof(replentry)); + if (r == NULL) + return 1; + r->pattern = mystrrep(pat1, "_", " "); + r->pattern2 = mystrrep(pat2, "_", " "); + r->start = false; + r->end = false; + dat[pos++] = r; + for (int i = pos - 1; i > 0; i--) { + r = dat[i]; + if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) { + dat[i] = dat[i - 1]; + dat[i - 1] = r; + } else + break; + } + return 0; +} + +int RepList::conv(const char* word, char* dest, size_t destsize) { + int stl = 0; + int change = 0; + for (size_t i = 0; i < strlen(word); i++) { + int n = near(word + i); + int l = match(word + i, n); + if (l) { + size_t replen = strlen(dat[n]->pattern2); + if (stl + replen >= destsize) + return -1; + strcpy(dest + stl, dat[n]->pattern2); + stl += replen; + i += l - 1; + change = 1; + } else { + if (stl + 1 >= destsize) + return -1; + dest[stl++] = word[i]; + } + } + dest[stl] = '\0'; + return change; +} diff --git a/libs/hunspell/src/replist.cxx b/libs/hunspell/src/replist.cxx deleted file mode 100644 index ace6c4aaf8..0000000000 --- a/libs/hunspell/src/replist.cxx +++ /dev/null @@ -1,175 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -/* - * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada - * And Contributors. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. All modifications to the source code must be clearly marked as - * such. Binary redistributions based on modified source code - * must be clearly marked as modified versions in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include - -#include "replist.hxx" -#include "csutil.hxx" - -RepList::RepList(int n) { - dat = (replentry**)malloc(sizeof(replentry*) * n); - if (dat == 0) - size = 0; - else - size = n; - pos = 0; -} - -RepList::~RepList() { - for (int i = 0; i < pos; i++) { - free(dat[i]->pattern); - free(dat[i]->pattern2); - free(dat[i]); - } - free(dat); -} - -int RepList::get_pos() { - return pos; -} - -replentry* RepList::item(int n) { - return dat[n]; -} - -int RepList::near(const char* word) { - int p1 = 0; - int p2 = pos; - while ((p2 - p1) > 1) { - int m = (p1 + p2) / 2; - int c = strcmp(word, dat[m]->pattern); - if (c <= 0) { - if (c < 0) - p2 = m; - else - p1 = p2 = m; - } else - p1 = m; - } - return p1; -} - -int RepList::match(const char* word, int n) { - if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0) - return strlen(dat[n]->pattern); - return 0; -} - -int RepList::add(char* pat1, char* pat2) { - if (pos >= size || pat1 == NULL || pat2 == NULL) - return 1; - replentry* r = (replentry*)malloc(sizeof(replentry)); - if (r == NULL) - return 1; - r->pattern = mystrrep(pat1, "_", " "); - r->pattern2 = mystrrep(pat2, "_", " "); - r->start = false; - r->end = false; - dat[pos++] = r; - for (int i = pos - 1; i > 0; i--) { - r = dat[i]; - if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) { - dat[i] = dat[i - 1]; - dat[i - 1] = r; - } else - break; - } - return 0; -} - -int RepList::conv(const char* word, char* dest, size_t destsize) { - int stl = 0; - int change = 0; - for (size_t i = 0; i < strlen(word); i++) { - int n = near(word + i); - int l = match(word + i, n); - if (l) { - size_t replen = strlen(dat[n]->pattern2); - if (stl + replen >= destsize) - return -1; - strcpy(dest + stl, dat[n]->pattern2); - stl += replen; - i += l - 1; - change = 1; - } else { - if (stl + 1 >= destsize) - return -1; - dest[stl++] = word[i]; - } - } - dest[stl] = '\0'; - return change; -} diff --git a/libs/hunspell/src/suggestmgr.c++ b/libs/hunspell/src/suggestmgr.c++ new file mode 100644 index 0000000000..4269a1181a --- /dev/null +++ b/libs/hunspell/src/suggestmgr.c++ @@ -0,0 +1,2376 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Hunspell, based on MySpell. + * + * The Initial Developers of the Original Code are + * Kevin Hendricks (MySpell) and Németh László (Hunspell). + * Portions created by the Initial Developers are Copyright (C) 2002-2005 + * the Initial Developers. All Rights Reserved. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "suggestmgr.hxx" +#include "htypes.hxx" +#include "csutil.hxx" + +const w_char W_VLINE = {'\0', '|'}; + +SuggestMgr::SuggestMgr(const char* tryme, int maxn, AffixMgr* aptr) { + // register affix manager and check in string of chars to + // try when building candidate suggestions + pAMgr = aptr; + + csconv = NULL; + + ckeyl = 0; + ckey = NULL; + ckey_utf = NULL; + + ctryl = 0; + ctry = NULL; + ctry_utf = NULL; + + utf8 = 0; + langnum = 0; + complexprefixes = 0; + + maxSug = maxn; + nosplitsugs = 0; + maxngramsugs = MAXNGRAMSUGS; + maxcpdsugs = MAXCOMPOUNDSUGS; + + if (pAMgr) { + langnum = pAMgr->get_langnum(); + ckey = pAMgr->get_key_string(); + nosplitsugs = pAMgr->get_nosplitsugs(); + if (pAMgr->get_maxngramsugs() >= 0) + maxngramsugs = pAMgr->get_maxngramsugs(); + utf8 = pAMgr->get_utf8(); + if (pAMgr->get_maxcpdsugs() >= 0) + maxcpdsugs = pAMgr->get_maxcpdsugs(); + if (!utf8) { + char* enc = pAMgr->get_encoding(); + csconv = get_current_cs(enc); + free(enc); + } + complexprefixes = pAMgr->get_complexprefixes(); + } + + if (ckey) { + if (utf8) { + w_char t[MAXSWL]; + ckeyl = u8_u16(t, MAXSWL, ckey); + ckey_utf = (w_char*)malloc(ckeyl * sizeof(w_char)); + if (ckey_utf) + memcpy(ckey_utf, t, ckeyl * sizeof(w_char)); + else + ckeyl = 0; + } else { + ckeyl = strlen(ckey); + } + } + + if (tryme) { + ctry = mystrdup(tryme); + if (ctry) + ctryl = strlen(ctry); + if (ctry && utf8) { + w_char t[MAXSWL]; + ctryl = u8_u16(t, MAXSWL, tryme); + ctry_utf = (w_char*)malloc(ctryl * sizeof(w_char)); + if (ctry_utf) + memcpy(ctry_utf, t, ctryl * sizeof(w_char)); + else + ctryl = 0; + } + } +} + +SuggestMgr::~SuggestMgr() { + pAMgr = NULL; + if (ckey) + free(ckey); + ckey = NULL; + if (ckey_utf) + free(ckey_utf); + ckey_utf = NULL; + ckeyl = 0; + if (ctry) + free(ctry); + ctry = NULL; + if (ctry_utf) + free(ctry_utf); + ctry_utf = NULL; + ctryl = 0; + maxSug = 0; +#ifdef MOZILLA_CLIENT + delete[] csconv; +#endif +} + +int SuggestMgr::testsug(char** wlst, + const char* candidate, + int wl, + int ns, + int cpdsuggest, + int* timer, + clock_t* timelimit) { + int cwrd = 1; + if (ns == maxSug) + return maxSug; + for (int k = 0; k < ns; k++) { + if (strcmp(candidate, wlst[k]) == 0) { + cwrd = 0; + break; + } + } + if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { + wlst[ns] = mystrdup(candidate); + if (wlst[ns] == NULL) { + for (int j = 0; j < ns; j++) + free(wlst[j]); + return -1; + } + ns++; + } + return ns; +} + +// generate suggestions for a misspelled word +// pass in address of array of char * pointers +// onlycompoundsug: probably bad suggestions (need for ngram sugs, too) + +int SuggestMgr::suggest(char*** slst, + const char* w, + int nsug, + int* onlycompoundsug) { + int nocompoundtwowords = 0; + char** wlst; + w_char word_utf[MAXSWL]; + int wl = 0; + int nsugorig = nsug; + std::string w2; + const char* word = w; + int oldSug = 0; + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + w2.assign(w); + if (utf8) + reverseword_utf(w2); + else + reverseword(w2); + word = w2.c_str(); + } + + if (*slst) { + wlst = *slst; + } else { + wlst = (char**)malloc(maxSug * sizeof(char*)); + if (wlst == NULL) + return -1; + for (int i = 0; i < maxSug; i++) { + wlst[i] = NULL; + } + } + + if (utf8) { + wl = u8_u16(word_utf, MAXSWL, word); + if (wl == -1) { + *slst = wlst; + return nsug; + } + } + + for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0); + cpdsuggest++) { + // limit compound suggestion + if (cpdsuggest > 0) + oldSug = nsug; + + // suggestions for an uppercase word (html -> HTML) + if ((nsug < maxSug) && (nsug > -1)) { + nsug = (utf8) ? capchars_utf(wlst, word_utf, wl, nsug, cpdsuggest) + : capchars(wlst, word, nsug, cpdsuggest); + } + + // perhaps we made a typical fault of spelling + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { + nsug = replchars(wlst, word, nsug, cpdsuggest); + } + + // perhaps we made chose the wrong char from a related set + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { + nsug = mapchars(wlst, word, nsug, cpdsuggest); + } + + // only suggest compound words when no other suggestion + if ((cpdsuggest == 0) && (nsug > nsugorig)) + nocompoundtwowords = 1; + + // did we swap the order of chars by mistake + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { + nsug = (utf8) ? swapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) + : swapchar(wlst, word, nsug, cpdsuggest); + } + + // did we swap the order of non adjacent chars by mistake + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { + nsug = (utf8) ? longswapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) + : longswapchar(wlst, word, nsug, cpdsuggest); + } + + // did we just hit the wrong key in place of a good char (case and keyboard) + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { + nsug = (utf8) ? badcharkey_utf(wlst, word_utf, wl, nsug, cpdsuggest) + : badcharkey(wlst, word, nsug, cpdsuggest); + } + + // did we add a char that should not be there + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { + nsug = (utf8) ? extrachar_utf(wlst, word_utf, wl, nsug, cpdsuggest) + : extrachar(wlst, word, nsug, cpdsuggest); + } + + // did we forgot a char + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { + nsug = (utf8) ? forgotchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) + : forgotchar(wlst, word, nsug, cpdsuggest); + } + + // did we move a char + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { + nsug = (utf8) ? movechar_utf(wlst, word_utf, wl, nsug, cpdsuggest) + : movechar(wlst, word, nsug, cpdsuggest); + } + + // did we just hit the wrong key in place of a good char + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { + nsug = (utf8) ? badchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) + : badchar(wlst, word, nsug, cpdsuggest); + } + + // did we double two characters + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { + nsug = (utf8) ? doubletwochars_utf(wlst, word_utf, wl, nsug, cpdsuggest) + : doubletwochars(wlst, word, nsug, cpdsuggest); + } + + // perhaps we forgot to hit space and two words ran together + if (!nosplitsugs && (nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { + nsug = twowords(wlst, word, nsug, cpdsuggest); + } + + } // repeating ``for'' statement compounding support + + if (nsug < 0) { + // we ran out of memory - we should free up as much as possible + for (int i = 0; i < maxSug; i++) + if (wlst[i] != NULL) + free(wlst[i]); + free(wlst); + wlst = NULL; + } + + if (!nocompoundtwowords && (nsug > 0) && onlycompoundsug) + *onlycompoundsug = 1; + + *slst = wlst; + return nsug; +} + +// generate suggestions for a word with typical mistake +// pass in address of array of char * pointers +#ifdef HUNSPELL_EXPERIMENTAL +int SuggestMgr::suggest_auto(char*** slst, const char* w, int nsug) { + int nocompoundtwowords = 0; + char** wlst; + int oldSug; + + char w2[MAXWORDUTF8LEN]; + const char* word = w; + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + strcpy(w2, w); + if (utf8) + reverseword_utf(w2); + else + reverseword(w2); + word = w2; + } + + if (*slst) { + wlst = *slst; + } else { + wlst = (char**)malloc(maxSug * sizeof(char*)); + if (wlst == NULL) + return -1; + } + + for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0); + cpdsuggest++) { + // limit compound suggestion + if (cpdsuggest > 0) + oldSug = nsug; + + // perhaps we made a typical fault of spelling + if ((nsug < maxSug) && (nsug > -1)) + nsug = replchars(wlst, word, nsug, cpdsuggest); + + // perhaps we made chose the wrong char from a related set + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) + nsug = mapchars(wlst, word, nsug, cpdsuggest); + + if ((cpdsuggest == 0) && (nsug > 0)) + nocompoundtwowords = 1; + + // perhaps we forgot to hit space and two words ran together + + if ((nsug < maxSug) && (nsug > -1) && + (!cpdsuggest || (nsug < oldSug + maxcpdsugs)) && + check_forbidden(word, strlen(word))) { + nsug = twowords(wlst, word, nsug, cpdsuggest); + } + + } // repeating ``for'' statement compounding support + + if (nsug < 0) { + for (int i = 0; i < maxSug; i++) + if (wlst[i] != NULL) + free(wlst[i]); + free(wlst); + return -1; + } + + *slst = wlst; + return nsug; +} +#endif // END OF HUNSPELL_EXPERIMENTAL CODE + +// suggestions for an uppercase word (html -> HTML) +int SuggestMgr::capchars_utf(char** wlst, + const w_char* word, + int wl, + int ns, + int cpdsuggest) { + char candidate[MAXSWUTF8L]; + w_char candidate_utf[MAXSWL]; + memcpy(candidate_utf, word, wl * sizeof(w_char)); + mkallcap_utf(candidate_utf, wl, langnum); + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); + return testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, + NULL); +} + +// suggestions for an uppercase word (html -> HTML) +int SuggestMgr::capchars(char** wlst, + const char* word, + int ns, + int cpdsuggest) { + std::string candidate(word); + mkallcap(candidate, csconv); + return testsug(wlst, candidate.data(), candidate.size(), ns, cpdsuggest, NULL, + NULL); +} + +// suggestions for when chose the wrong char out of a related set +int SuggestMgr::mapchars(char** wlst, + const char* word, + int ns, + int cpdsuggest) { + char candidate[MAXSWUTF8L]; + clock_t timelimit; + int timer; + candidate[0] = '\0'; + + int wl = strlen(word); + if (wl < 2 || !pAMgr) + return ns; + + int nummap = pAMgr->get_nummap(); + struct mapentry* maptable = pAMgr->get_maptable(); + if (maptable == NULL) + return ns; + + timelimit = clock(); + timer = MINTIMER; + return map_related(word, (char*)&candidate, 0, 0, wlst, cpdsuggest, ns, + maptable, nummap, &timer, &timelimit); +} + +int SuggestMgr::map_related(const char* word, + char* candidate, + int wn, + int cn, + char** wlst, + int cpdsuggest, + int ns, + const mapentry* maptable, + int nummap, + int* timer, + clock_t* timelimit) { + if (*(word + wn) == '\0') { + int cwrd = 1; + *(candidate + cn) = '\0'; + int wl = strlen(candidate); + for (int m = 0; m < ns; m++) { + if (strcmp(candidate, wlst[m]) == 0) { + cwrd = 0; + break; + } + } + if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { + if (ns < maxSug) { + wlst[ns] = mystrdup(candidate); + if (wlst[ns] == NULL) + return -1; + ns++; + } + } + return ns; + } + int in_map = 0; + for (int j = 0; j < nummap; j++) { + for (int k = 0; k < maptable[j].len; k++) { + int len = strlen(maptable[j].set[k]); + if (strncmp(maptable[j].set[k], word + wn, len) == 0) { + in_map = 1; + for (int l = 0; l < maptable[j].len; l++) { + strcpy(candidate + cn, maptable[j].set[l]); + ns = map_related(word, candidate, wn + len, strlen(candidate), wlst, + cpdsuggest, ns, maptable, nummap, timer, timelimit); + if (!(*timer)) + return ns; + } + } + } + } + if (!in_map) { + *(candidate + cn) = *(word + wn); + ns = map_related(word, candidate, wn + 1, cn + 1, wlst, cpdsuggest, ns, + maptable, nummap, timer, timelimit); + } + return ns; +} + +// suggestions for a typical fault of spelling, that +// differs with more, than 1 letter from the right form. +int SuggestMgr::replchars(char** wlst, + const char* word, + int ns, + int cpdsuggest) { + char candidate[MAXSWUTF8L]; + const char* r; + int lenr, lenp; + int wl = strlen(word); + if (wl < 2 || !pAMgr) + return ns; + int numrep = pAMgr->get_numrep(); + struct replentry* reptable = pAMgr->get_reptable(); + if (reptable == NULL) + return ns; + for (int i = 0; i < numrep; i++) { + r = word; + lenr = strlen(reptable[i].pattern2); + lenp = strlen(reptable[i].pattern); + // search every occurence of the pattern in the word + while ((r = strstr(r, reptable[i].pattern)) != NULL && + (!reptable[i].end || strlen(r) == strlen(reptable[i].pattern)) && + (!reptable[i].start || r == word)) { + strcpy(candidate, word); + if (r - word + lenr + strlen(r + lenp) >= MAXSWUTF8L) + break; + strcpy(candidate + (r - word), reptable[i].pattern2); + strcpy(candidate + (r - word) + lenr, r + lenp); + ns = testsug(wlst, candidate, wl - lenp + lenr, ns, cpdsuggest, NULL, + NULL); + if (ns == -1) + return -1; + // check REP suggestions with space + char* sp = strchr(candidate, ' '); + if (sp) { + char* prev = candidate; + while (sp) { + *sp = '\0'; + if (checkword(prev, strlen(prev), 0, NULL, NULL)) { + int oldns = ns; + *sp = ' '; + ns = testsug(wlst, sp + 1, strlen(sp + 1), ns, cpdsuggest, NULL, + NULL); + if (ns == -1) + return -1; + if (oldns < ns) { + free(wlst[ns - 1]); + wlst[ns - 1] = mystrdup(candidate); + if (!wlst[ns - 1]) + return -1; + } + } + *sp = ' '; + prev = sp + 1; + sp = strchr(prev, ' '); + } + } + r++; // search for the next letter + } + } + return ns; +} + +// perhaps we doubled two characters (pattern aba -> ababa, for example vacation +// -> vacacation) +int SuggestMgr::doubletwochars(char** wlst, + const char* word, + int ns, + int cpdsuggest) { + char candidate[MAXSWUTF8L]; + int state = 0; + int wl = strlen(word); + if (wl < 5 || !pAMgr) + return ns; + for (int i = 2; i < wl; i++) { + if (word[i] == word[i - 2]) { + state++; + if (state == 3) { + strcpy(candidate, word); + strcpy(candidate + i - 1, word + i + 1); + ns = testsug(wlst, candidate, wl - 2, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + state = 0; + } + } else { + state = 0; + } + } + return ns; +} + +// perhaps we doubled two characters (pattern aba -> ababa, for example vacation +// -> vacacation) +int SuggestMgr::doubletwochars_utf(char** wlst, + const w_char* word, + int wl, + int ns, + int cpdsuggest) { + w_char candidate_utf[MAXSWL]; + char candidate[MAXSWUTF8L]; + int state = 0; + if (wl < 5 || !pAMgr) + return ns; + for (int i = 2; i < wl; i++) { + if (w_char_eq(word[i], word[i - 2])) { + state++; + if (state == 3) { + memcpy(candidate_utf, word, (i - 1) * sizeof(w_char)); + memcpy(candidate_utf + i - 1, word + i + 1, + (wl - i - 1) * sizeof(w_char)); + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl - 2); + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, + NULL); + if (ns == -1) + return -1; + state = 0; + } + } else { + state = 0; + } + } + return ns; +} + +// error is wrong char in place of correct one (case and keyboard related +// version) +int SuggestMgr::badcharkey(char** wlst, + const char* word, + int ns, + int cpdsuggest) { + char tmpc; + char candidate[MAXSWUTF8L]; + int wl = strlen(word); + strcpy(candidate, word); + // swap out each char one by one and try uppercase and neighbor + // keyboard chars in its place to see if that makes a good word + + for (int i = 0; i < wl; i++) { + tmpc = candidate[i]; + // check with uppercase letters + candidate[i] = csconv[((unsigned char)tmpc)].cupper; + if (tmpc != candidate[i]) { + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + candidate[i] = tmpc; + } + // check neighbor characters in keyboard string + if (!ckey) + continue; + char* loc = strchr(ckey, tmpc); + while (loc) { + if ((loc > ckey) && (*(loc - 1) != '|')) { + candidate[i] = *(loc - 1); + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + } + if ((*(loc + 1) != '|') && (*(loc + 1) != '\0')) { + candidate[i] = *(loc + 1); + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + } + loc = strchr(loc + 1, tmpc); + } + candidate[i] = tmpc; + } + return ns; +} + +// error is wrong char in place of correct one (case and keyboard related +// version) +int SuggestMgr::badcharkey_utf(char** wlst, + const w_char* word, + int wl, + int ns, + int cpdsuggest) { + w_char tmpc; + w_char candidate_utf[MAXSWL]; + char candidate[MAXSWUTF8L]; + memcpy(candidate_utf, word, wl * sizeof(w_char)); + // swap out each char one by one and try all the tryme + // chars in its place to see if that makes a good word + for (int i = 0; i < wl; i++) { + tmpc = candidate_utf[i]; + // check with uppercase letters + mkallcap_utf(candidate_utf + i, 1, langnum); + if (!w_char_eq(tmpc, candidate_utf[i])) { + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, + NULL); + if (ns == -1) + return -1; + candidate_utf[i] = tmpc; + } + // check neighbor characters in keyboard string + if (!ckey) + continue; + w_char* loc = ckey_utf; + while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc)) + loc++; + while (loc < (ckey_utf + ckeyl)) { + if ((loc > ckey_utf) && !w_char_eq(*(loc - 1), W_VLINE)) { + candidate_utf[i] = *(loc - 1); + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, + NULL); + if (ns == -1) + return -1; + } + if (((loc + 1) < (ckey_utf + ckeyl)) && !w_char_eq(*(loc + 1), W_VLINE)) { + candidate_utf[i] = *(loc + 1); + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, + NULL); + if (ns == -1) + return -1; + } + do { + loc++; + } while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc)); + } + candidate_utf[i] = tmpc; + } + return ns; +} + +// error is wrong char in place of correct one +int SuggestMgr::badchar(char** wlst, const char* word, int ns, int cpdsuggest) { + char tmpc; + char candidate[MAXSWUTF8L]; + clock_t timelimit = clock(); + int timer = MINTIMER; + int wl = strlen(word); + strcpy(candidate, word); + // swap out each char one by one and try all the tryme + // chars in its place to see if that makes a good word + for (int j = 0; j < ctryl; j++) { + for (int i = wl - 1; i >= 0; i--) { + tmpc = candidate[i]; + if (ctry[j] == tmpc) + continue; + candidate[i] = ctry[j]; + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, &timer, &timelimit); + if (ns == -1) + return -1; + if (!timer) + return ns; + candidate[i] = tmpc; + } + } + return ns; +} + +// error is wrong char in place of correct one +int SuggestMgr::badchar_utf(char** wlst, + const w_char* word, + int wl, + int ns, + int cpdsuggest) { + w_char tmpc; + w_char candidate_utf[MAXSWL]; + char candidate[MAXSWUTF8L]; + clock_t timelimit = clock(); + int timer = MINTIMER; + memcpy(candidate_utf, word, wl * sizeof(w_char)); + // swap out each char one by one and try all the tryme + // chars in its place to see if that makes a good word + for (int j = 0; j < ctryl; j++) { + for (int i = wl - 1; i >= 0; i--) { + tmpc = candidate_utf[i]; + if (w_char_eq(tmpc, ctry_utf[j])) + continue; + candidate_utf[i] = ctry_utf[j]; + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer, + &timelimit); + if (ns == -1) + return -1; + if (!timer) + return ns; + candidate_utf[i] = tmpc; + } + } + return ns; +} + +// error is word has an extra letter it does not need +int SuggestMgr::extrachar_utf(char** wlst, + const w_char* word, + int wl, + int ns, + int cpdsuggest) { + char candidate[MAXSWUTF8L]; + w_char candidate_utf[MAXSWL]; + w_char* p; + w_char tmpc = W_VLINE; // not used value, only for VCC warning message + if (wl < 2) + return ns; + // try omitting one char of word at a time + memcpy(candidate_utf, word, wl * sizeof(w_char)); + for (p = candidate_utf + wl - 1; p >= candidate_utf; p--) { + w_char tmpc2 = *p; + if (p < candidate_utf + wl - 1) + *p = tmpc; + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl - 1); + ns = + testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + tmpc = tmpc2; + } + return ns; +} + +// error is word has an extra letter it does not need +int SuggestMgr::extrachar(char** wlst, + const char* word, + int ns, + int cpdsuggest) { + char tmpc = '\0'; + char candidate[MAXSWUTF8L]; + char* p; + int wl = strlen(word); + if (wl < 2) + return ns; + // try omitting one char of word at a time + strcpy(candidate, word); + for (p = candidate + wl - 1; p >= candidate; p--) { + char tmpc2 = *p; + *p = tmpc; + ns = testsug(wlst, candidate, wl - 1, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + tmpc = tmpc2; + } + return ns; +} + +// error is missing a letter it needs +int SuggestMgr::forgotchar(char** wlst, + const char* word, + int ns, + int cpdsuggest) { + char candidate[MAXSWUTF8L + 4]; + char* p; + clock_t timelimit = clock(); + int timer = MINTIMER; + int wl = strlen(word); + // try inserting a tryme character before every letter (and the null + // terminator) + for (int i = 0; i < ctryl; i++) { + strcpy(candidate, word); + for (p = candidate + wl; p >= candidate; p--) { + *(p + 1) = *p; + *p = ctry[i]; + ns = testsug(wlst, candidate, wl + 1, ns, cpdsuggest, &timer, &timelimit); + if (ns == -1) + return -1; + if (!timer) + return ns; + } + } + return ns; +} + +// error is missing a letter it needs +int SuggestMgr::forgotchar_utf(char** wlst, + const w_char* word, + int wl, + int ns, + int cpdsuggest) { + w_char candidate_utf[MAXSWL + 1]; + char candidate[MAXSWUTF8L + 4]; + w_char* p; + clock_t timelimit = clock(); + int timer = MINTIMER; + // try inserting a tryme character at the end of the word and before every + // letter + for (int i = 0; i < ctryl; i++) { + memcpy(candidate_utf, word, wl * sizeof(w_char)); + for (p = candidate_utf + wl; p >= candidate_utf; p--) { + *(p + 1) = *p; + *p = ctry_utf[i]; + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl + 1); + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer, + &timelimit); + if (ns == -1) + return -1; + if (!timer) + return ns; + } + } + return ns; +} + +/* error is should have been two words */ +int SuggestMgr::twowords(char** wlst, + const char* word, + int ns, + int cpdsuggest) { + char candidate[MAXSWUTF8L]; + char* p; + int c1, c2; + int forbidden = 0; + int cwrd; + + int wl = strlen(word); + if (wl < 3) + return ns; + + if (langnum == LANG_hu) + forbidden = check_forbidden(word, wl); + + strcpy(candidate + 1, word); + // split the string into two pieces after every char + // if both pieces are good words make them a suggestion + for (p = candidate + 1; p[1] != '\0'; p++) { + p[-1] = *p; + // go to end of the UTF-8 character + while (utf8 && ((p[1] & 0xc0) == 0x80)) { + *p = p[1]; + p++; + } + if (utf8 && p[1] == '\0') + break; // last UTF-8 character + *p = '\0'; + c1 = checkword(candidate, strlen(candidate), cpdsuggest, NULL, NULL); + if (c1) { + c2 = checkword((p + 1), strlen(p + 1), cpdsuggest, NULL, NULL); + if (c2) { + *p = ' '; + + // spec. Hungarian code (need a better compound word support) + if ((langnum == LANG_hu) && !forbidden && + // if 3 repeating letter, use - instead of space + (((p[-1] == p[1]) && + (((p > candidate + 1) && (p[-1] == p[-2])) || (p[-1] == p[2]))) || + // or multiple compounding, with more, than 6 syllables + ((c1 == 3) && (c2 >= 2)))) + *p = '-'; + + cwrd = 1; + for (int k = 0; k < ns; k++) { + if (strcmp(candidate, wlst[k]) == 0) { + cwrd = 0; + break; + } + } + if (ns < maxSug) { + if (cwrd) { + wlst[ns] = mystrdup(candidate); + if (wlst[ns] == NULL) + return -1; + ns++; + } + } else + return ns; + // add two word suggestion with dash, if TRY string contains + // "a" or "-" + // NOTE: cwrd doesn't modified for REP twoword sugg. + if (ctry && (strchr(ctry, 'a') || strchr(ctry, '-')) && + mystrlen(p + 1) > 1 && mystrlen(candidate) - mystrlen(p) > 1) { + *p = '-'; + for (int k = 0; k < ns; k++) { + if (strcmp(candidate, wlst[k]) == 0) { + cwrd = 0; + break; + } + } + if (ns < maxSug) { + if (cwrd) { + wlst[ns] = mystrdup(candidate); + if (wlst[ns] == NULL) + return -1; + ns++; + } + } else + return ns; + } + } + } + } + return ns; +} + +// error is adjacent letter were swapped +int SuggestMgr::swapchar(char** wlst, + const char* word, + int ns, + int cpdsuggest) { + char candidate[MAXSWUTF8L]; + char* p; + char tmpc; + int wl = strlen(word); + // try swapping adjacent chars one by one + strcpy(candidate, word); + for (p = candidate; p[1] != 0; p++) { + tmpc = *p; + *p = p[1]; + p[1] = tmpc; + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + p[1] = *p; + *p = tmpc; + } + // try double swaps for short words + // ahev -> have, owudl -> would + if (wl == 4 || wl == 5) { + candidate[0] = word[1]; + candidate[1] = word[0]; + candidate[2] = word[2]; + candidate[wl - 2] = word[wl - 1]; + candidate[wl - 1] = word[wl - 2]; + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + if (wl == 5) { + candidate[0] = word[0]; + candidate[1] = word[2]; + candidate[2] = word[1]; + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + } + } + return ns; +} + +// error is adjacent letter were swapped +int SuggestMgr::swapchar_utf(char** wlst, + const w_char* word, + int wl, + int ns, + int cpdsuggest) { + w_char candidate_utf[MAXSWL]; + char candidate[MAXSWUTF8L]; + w_char* p; + w_char tmpc; + int len = 0; + // try swapping adjacent chars one by one + memcpy(candidate_utf, word, wl * sizeof(w_char)); + for (p = candidate_utf; p < (candidate_utf + wl - 1); p++) { + tmpc = *p; + *p = p[1]; + p[1] = tmpc; + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); + if (len == 0) + len = strlen(candidate); + ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + p[1] = *p; + *p = tmpc; + } + // try double swaps for short words + // ahev -> have, owudl -> would, suodn -> sound + if (wl == 4 || wl == 5) { + candidate_utf[0] = word[1]; + candidate_utf[1] = word[0]; + candidate_utf[2] = word[2]; + candidate_utf[wl - 2] = word[wl - 1]; + candidate_utf[wl - 1] = word[wl - 2]; + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); + ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + if (wl == 5) { + candidate_utf[0] = word[0]; + candidate_utf[1] = word[2]; + candidate_utf[2] = word[1]; + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); + ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + } + } + return ns; +} + +// error is not adjacent letter were swapped +int SuggestMgr::longswapchar(char** wlst, + const char* word, + int ns, + int cpdsuggest) { + char candidate[MAXSWUTF8L]; + char* p; + char* q; + char tmpc; + int wl = strlen(word); + // try swapping not adjacent chars one by one + strcpy(candidate, word); + for (p = candidate; *p != 0; p++) { + for (q = candidate; *q != 0; q++) { + if (abs((int)(p - q)) > 1) { + tmpc = *p; + *p = *q; + *q = tmpc; + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + *q = *p; + *p = tmpc; + } + } + } + return ns; +} + +// error is adjacent letter were swapped +int SuggestMgr::longswapchar_utf(char** wlst, + const w_char* word, + int wl, + int ns, + int cpdsuggest) { + w_char candidate_utf[MAXSWL]; + char candidate[MAXSWUTF8L]; + w_char* p; + w_char* q; + w_char tmpc; + // try swapping not adjacent chars + memcpy(candidate_utf, word, wl * sizeof(w_char)); + for (p = candidate_utf; p < (candidate_utf + wl); p++) { + for (q = candidate_utf; q < (candidate_utf + wl); q++) { + if (abs((int)(p - q)) > 1) { + tmpc = *p; + *p = *q; + *q = tmpc; + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, + NULL); + if (ns == -1) + return -1; + *q = *p; + *p = tmpc; + } + } + } + return ns; +} + +// error is a letter was moved +int SuggestMgr::movechar(char** wlst, + const char* word, + int ns, + int cpdsuggest) { + char candidate[MAXSWUTF8L]; + char* p; + char* q; + char tmpc; + + int wl = strlen(word); + // try moving a char + strcpy(candidate, word); + for (p = candidate; *p != 0; p++) { + for (q = p + 1; (*q != 0) && ((q - p) < 10); q++) { + tmpc = *(q - 1); + *(q - 1) = *q; + *q = tmpc; + if ((q - p) < 2) + continue; // omit swap char + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + } + strcpy(candidate, word); + } + for (p = candidate + wl - 1; p > candidate; p--) { + for (q = p - 1; (q >= candidate) && ((p - q) < 10); q--) { + tmpc = *(q + 1); + *(q + 1) = *q; + *q = tmpc; + if ((p - q) < 2) + continue; // omit swap char + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); + if (ns == -1) + return -1; + } + strcpy(candidate, word); + } + return ns; +} + +// error is a letter was moved +int SuggestMgr::movechar_utf(char** wlst, + const w_char* word, + int wl, + int ns, + int cpdsuggest) { + w_char candidate_utf[MAXSWL]; + char candidate[MAXSWUTF8L]; + w_char* p; + w_char* q; + w_char tmpc; + // try moving a char + memcpy(candidate_utf, word, wl * sizeof(w_char)); + for (p = candidate_utf; p < (candidate_utf + wl); p++) { + for (q = p + 1; (q < (candidate_utf + wl)) && ((q - p) < 10); q++) { + tmpc = *(q - 1); + *(q - 1) = *q; + *q = tmpc; + if ((q - p) < 2) + continue; // omit swap char + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, + NULL); + if (ns == -1) + return -1; + } + memcpy(candidate_utf, word, wl * sizeof(w_char)); + } + for (p = candidate_utf + wl - 1; p > candidate_utf; p--) { + for (q = p - 1; (q >= candidate_utf) && ((p - q) < 10); q--) { + tmpc = *(q + 1); + *(q + 1) = *q; + *q = tmpc; + if ((p - q) < 2) + continue; // omit swap char + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, + NULL); + if (ns == -1) + return -1; + } + memcpy(candidate_utf, word, wl * sizeof(w_char)); + } + return ns; +} + +// generate a set of suggestions for very poorly spelled words +int SuggestMgr::ngsuggest(char** wlst, + char* w, + int ns, + HashMgr** pHMgr, + int md) { + int i, j; + int lval; + int sc; + int lp, lpphon; + int nonbmp = 0; + + // exhaustively search through all root words + // keeping track of the MAX_ROOTS most similar root words + struct hentry* roots[MAX_ROOTS]; + char* rootsphon[MAX_ROOTS]; + int scores[MAX_ROOTS]; + int scoresphon[MAX_ROOTS]; + for (i = 0; i < MAX_ROOTS; i++) { + roots[i] = NULL; + scores[i] = -100 * i; + rootsphon[i] = NULL; + scoresphon[i] = -100 * i; + } + lp = MAX_ROOTS - 1; + lpphon = MAX_ROOTS - 1; + int low = NGRAM_LOWERING; + + std::string w2; + char f[MAXSWUTF8L]; + const char* word = w; + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + w2.assign(w); + if (utf8) + reverseword_utf(w2); + else + reverseword(w2); + word = w2.c_str(); + } + + char mw[MAXSWUTF8L]; + w_char u8[MAXSWL]; + int nc = strlen(word); + int n = (utf8) ? u8_u16(u8, MAXSWL, word) : nc; + + // set character based ngram suggestion for words with non-BMP Unicode + // characters + if (n == -1) { + utf8 = 0; // XXX not state-free + n = nc; + nonbmp = 1; + low = 0; + } + + struct hentry* hp = NULL; + int col = -1; + phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL; + char target[MAXSWUTF8L]; + std::string candidate; + if (ph) { + if (utf8) { + std::vector _w; + int _wl = u8_u16(_w, word); + mkallcap_utf(_w, _wl, langnum); + u16_u8(candidate, _w); + } else { + candidate.assign(word); + if (!nonbmp) + mkallcap(candidate, csconv); + } + phonet(candidate.c_str(), target, nc, + *ph); // XXX phonet() is 8-bit (nc, not n) + } + + FLAG forbiddenword = pAMgr ? pAMgr->get_forbiddenword() : FLAG_NULL; + FLAG nosuggest = pAMgr ? pAMgr->get_nosuggest() : FLAG_NULL; + FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL; + FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL; + + for (i = 0; i < md; i++) { + while (0 != (hp = (pHMgr[i])->walk_hashtable(col, hp))) { + if ((hp->astr) && (pAMgr) && + (TESTAFF(hp->astr, forbiddenword, hp->alen) || + TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) || + TESTAFF(hp->astr, nosuggest, hp->alen) || + TESTAFF(hp->astr, nongramsuggest, hp->alen) || + TESTAFF(hp->astr, onlyincompound, hp->alen))) + continue; + + sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) + + leftcommonsubstring(word, HENTRY_WORD(hp)); + + // check special pronounciation + if ((hp->var & H_OPT_PHON) && + copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) { + int sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) + + +leftcommonsubstring(word, f); + if (sc2 > sc) + sc = sc2; + } + + int scphon = -20000; + if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) { + char target2[MAXSWUTF8L]; + if (utf8) { + std::vector _w; + int _wl = u8_u16(_w, HENTRY_WORD(hp)); + mkallcap_utf(_w, _wl, langnum); + u16_u8(candidate, _w); + } else { + candidate.assign(HENTRY_WORD(hp)); + mkallcap(candidate, csconv); + } + phonet(candidate.c_str(), target2, -1, *ph); + scphon = 2 * ngram(3, target, target2, NGRAM_LONGER_WORSE); + } + + if (sc > scores[lp]) { + scores[lp] = sc; + roots[lp] = hp; + lval = sc; + for (j = 0; j < MAX_ROOTS; j++) + if (scores[j] < lval) { + lp = j; + lval = scores[j]; + } + } + + if (scphon > scoresphon[lpphon]) { + scoresphon[lpphon] = scphon; + rootsphon[lpphon] = HENTRY_WORD(hp); + lval = scphon; + for (j = 0; j < MAX_ROOTS; j++) + if (scoresphon[j] < lval) { + lpphon = j; + lval = scoresphon[j]; + } + } + } + } + + // find minimum threshold for a passable suggestion + // mangle original word three differnt ways + // and score them to generate a minimum acceptable score + int thresh = 0; + for (int sp = 1; sp < 4; sp++) { + if (utf8) { + for (int k = sp; k < n; k += 4) + *((unsigned short*)u8 + k) = '*'; + u16_u8(mw, MAXSWUTF8L, u8, n); + thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low); + } else { + strcpy(mw, word); + for (int k = sp; k < n; k += 4) + *(mw + k) = '*'; + thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low); + } + } + thresh = thresh / 3; + thresh--; + + // now expand affixes on each of these root words and + // and use length adjusted ngram scores to select + // possible suggestions + char* guess[MAX_GUESS]; + char* guessorig[MAX_GUESS]; + int gscore[MAX_GUESS]; + for (i = 0; i < MAX_GUESS; i++) { + guess[i] = NULL; + guessorig[i] = NULL; + gscore[i] = -100 * i; + } + + lp = MAX_GUESS - 1; + + struct guessword* glst; + glst = (struct guessword*)calloc(MAX_WORDS, sizeof(struct guessword)); + if (!glst) { + if (nonbmp) + utf8 = 1; + return ns; + } + + for (i = 0; i < MAX_ROOTS; i++) { + if (roots[i]) { + struct hentry* rp = roots[i]; + int nw = pAMgr->expand_rootword( + glst, MAX_WORDS, HENTRY_WORD(rp), rp->blen, rp->astr, rp->alen, word, + nc, + ((rp->var & H_OPT_PHON) ? copy_field(f, HENTRY_DATA(rp), MORPH_PHON) + : NULL)); + + for (int k = 0; k < nw; k++) { + sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH + low) + + leftcommonsubstring(word, glst[k].word); + + if (sc > thresh) { + if (sc > gscore[lp]) { + if (guess[lp]) { + free(guess[lp]); + if (guessorig[lp]) { + free(guessorig[lp]); + guessorig[lp] = NULL; + } + } + gscore[lp] = sc; + guess[lp] = glst[k].word; + guessorig[lp] = glst[k].orig; + lval = sc; + for (j = 0; j < MAX_GUESS; j++) + if (gscore[j] < lval) { + lp = j; + lval = gscore[j]; + } + } else { + free(glst[k].word); + if (glst[k].orig) + free(glst[k].orig); + } + } else { + free(glst[k].word); + if (glst[k].orig) + free(glst[k].orig); + } + } + } + } + free(glst); + + // now we are done generating guesses + // sort in order of decreasing score + + bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS); + if (ph) + bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS); + + // weight suggestions with a similarity index, based on + // the longest common subsequent algorithm and resort + + int is_swap = 0; + int re = 0; + double fact = 1.0; + if (pAMgr) { + int maxd = pAMgr->get_maxdiff(); + if (maxd >= 0) + fact = (10.0 - maxd) / 5.0; + } + + for (i = 0; i < MAX_GUESS; i++) { + if (guess[i]) { + // lowering guess[i] + std::string gl; + int len; + if (utf8) { + std::vector _w; + len = u8_u16(_w, guess[i]); + mkallsmall_utf(_w, len, langnum); + u16_u8(gl, _w); + } else { + gl.assign(guess[i]); + if (!nonbmp) + mkallsmall(gl, csconv); + len = strlen(guess[i]); + } + + int _lcs = lcslen(word, gl.c_str()); + + // same characters with different casing + if ((n == len) && (n == _lcs)) { + gscore[i] += 2000; + break; + } + // using 2-gram instead of 3, and other weightening + + re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) + + ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED); + + gscore[i] = + // length of longest common subsequent minus length difference + 2 * _lcs - abs((int)(n - len)) + + // weight length of the left common substring + leftcommonsubstring(word, gl.c_str()) + + // weight equal character positions + (!nonbmp && commoncharacterpositions(word, gl.c_str(), &is_swap) + ? 1 + : 0) + + // swap character (not neighboring) + ((is_swap) ? 10 : 0) + + // ngram + ngram(4, word, gl, NGRAM_ANY_MISMATCH + low) + + // weighted ngrams + re + + // different limit for dictionaries with PHONE rules + (ph ? (re < len * fact ? -1000 : 0) + : (re < (n + len) * fact ? -1000 : 0)); + } + } + + bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS); + + // phonetic version + if (ph) + for (i = 0; i < MAX_ROOTS; i++) { + if (rootsphon[i]) { + // lowering rootphon[i] + std::string gl; + int len; + if (utf8) { + std::vector _w; + len = u8_u16(_w, rootsphon[i]); + mkallsmall_utf(_w, len, langnum); + u16_u8(gl, _w); + } else { + gl.assign(rootsphon[i]); + if (!nonbmp) + mkallsmall(gl, csconv); + len = strlen(rootsphon[i]); + } + + // heuristic weigthing of ngram scores + scoresphon[i] += 2 * lcslen(word, gl) - abs((int)(n - len)) + + // weight length of the left common substring + leftcommonsubstring(word, gl.c_str()); + } + } + + if (ph) + bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS); + + // copy over + int oldns = ns; + + int same = 0; + for (i = 0; i < MAX_GUESS; i++) { + if (guess[i]) { + if ((ns < oldns + maxngramsugs) && (ns < maxSug) && + (!same || (gscore[i] > 1000))) { + int unique = 1; + // leave only excellent suggestions, if exists + if (gscore[i] > 1000) + same = 1; + else if (gscore[i] < -100) { + same = 1; + // keep the best ngram suggestions, unless in ONLYMAXDIFF mode + if (ns > oldns || (pAMgr && pAMgr->get_onlymaxdiff())) { + free(guess[i]); + if (guessorig[i]) + free(guessorig[i]); + continue; + } + } + for (j = 0; j < ns; j++) { + // don't suggest previous suggestions or a previous suggestion with + // prefixes or affixes + if ((!guessorig[i] && strstr(guess[i], wlst[j])) || + (guessorig[i] && strstr(guessorig[i], wlst[j])) || + // check forbidden words + !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) { + unique = 0; + break; + } + } + if (unique) { + wlst[ns++] = guess[i]; + if (guessorig[i]) { + free(guess[i]); + wlst[ns - 1] = guessorig[i]; + } + } else { + free(guess[i]); + if (guessorig[i]) + free(guessorig[i]); + } + } else { + free(guess[i]); + if (guessorig[i]) + free(guessorig[i]); + } + } + } + + oldns = ns; + if (ph) + for (i = 0; i < MAX_ROOTS; i++) { + if (rootsphon[i]) { + if ((ns < oldns + MAXPHONSUGS) && (ns < maxSug)) { + int unique = 1; + for (j = 0; j < ns; j++) { + // don't suggest previous suggestions or a previous suggestion with + // prefixes or affixes + if (strstr(rootsphon[i], wlst[j]) || + // check forbidden words + !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) { + unique = 0; + break; + } + } + if (unique) { + wlst[ns++] = mystrdup(rootsphon[i]); + if (!wlst[ns - 1]) + return ns - 1; + } + } + } + } + + if (nonbmp) + utf8 = 1; + return ns; +} + +// see if a candidate suggestion is spelled correctly +// needs to check both root words and words with affixes + +// obsolote MySpell-HU modifications: +// return value 2 and 3 marks compounding with hyphen (-) +// `3' marks roots without suffix +int SuggestMgr::checkword(const char* word, + int len, + int cpdsuggest, + int* timer, + clock_t* timelimit) { + struct hentry* rv = NULL; + struct hentry* rv2 = NULL; + int nosuffix = 0; + + // check time limit + if (timer) { + (*timer)--; + if (!(*timer) && timelimit) { + if ((clock() - *timelimit) > TIMELIMIT) + return 0; + *timer = MAXPLUSTIMER; + } + } + + if (pAMgr) { + if (cpdsuggest == 1) { + if (pAMgr->get_compound()) { + rv = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 1, + 0); // EXT + if (rv && + (!(rv2 = pAMgr->lookup(word)) || !rv2->astr || + !(TESTAFF(rv2->astr, pAMgr->get_forbiddenword(), rv2->alen) || + TESTAFF(rv2->astr, pAMgr->get_nosuggest(), rv2->alen)))) + return 3; // XXX obsolote categorisation + only ICONV needs affix + // flag check? + } + return 0; + } + + rv = pAMgr->lookup(word); + + if (rv) { + if ((rv->astr) && + (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen))) + return 0; + while (rv) { + if (rv->astr && + (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) { + rv = rv->next_homonym; + } else + break; + } + } else + rv = pAMgr->prefix_check(word, len, + 0); // only prefix, and prefix + suffix XXX + + if (rv) { + nosuffix = 1; + } else { + rv = pAMgr->suffix_check(word, len, 0, NULL, NULL, 0, + NULL); // only suffix + } + + if (!rv && pAMgr->have_contclass()) { + rv = pAMgr->suffix_check_twosfx(word, len, 0, NULL, FLAG_NULL); + if (!rv) + rv = pAMgr->prefix_check_twosfx(word, len, 1, FLAG_NULL); + } + + // check forbidden words + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) + return 0; + + if (rv) { // XXX obsolote + if ((pAMgr->get_compoundflag()) && + TESTAFF(rv->astr, pAMgr->get_compoundflag(), rv->alen)) + return 2 + nosuffix; + return 1; + } + } + return 0; +} + +int SuggestMgr::check_forbidden(const char* word, int len) { + struct hentry* rv = NULL; + + if (pAMgr) { + rv = pAMgr->lookup(word); + if (rv && rv->astr && + (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) + rv = NULL; + if (!(pAMgr->prefix_check(word, len, 1))) + rv = pAMgr->suffix_check(word, len, 0, NULL, NULL, 0, + NULL); // prefix+suffix, suffix + // check forbidden words + if ((rv) && (rv->astr) && + TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen)) + return 1; + } + return 0; +} + +#ifdef HUNSPELL_EXPERIMENTAL +// suggest possible stems +int SuggestMgr::suggest_pos_stems(char*** slst, const char* w, int nsug) { + char** wlst; + + struct hentry* rv = NULL; + + char w2[MAXSWUTF8L]; + const char* word = w; + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + strcpy(w2, w); + if (utf8) + reverseword_utf(w2); + else + reverseword(w2); + word = w2; + } + + int wl = strlen(word); + + if (*slst) { + wlst = *slst; + } else { + wlst = (char**)calloc(maxSug, sizeof(char*)); + if (wlst == NULL) + return -1; + } + + rv = pAMgr->suffix_check(word, wl, 0, NULL, wlst, maxSug, &nsug); + + // delete dash from end of word + if (nsug > 0) { + for (int j = 0; j < nsug; j++) { + if (wlst[j][strlen(wlst[j]) - 1] == '-') + wlst[j][strlen(wlst[j]) - 1] = '\0'; + } + } + + *slst = wlst; + return nsug; +} +#endif // END OF HUNSPELL_EXPERIMENTAL CODE + +char* SuggestMgr::suggest_morph(const char* w) { + char result[MAXLNLEN]; + char* r = (char*)result; + char* st; + + struct hentry* rv = NULL; + + *result = '\0'; + + if (!pAMgr) + return NULL; + + std::string w2; + const char* word = w; + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + w2.assign(w); + if (utf8) + reverseword_utf(w2); + else + reverseword(w2); + word = w2.c_str(); + } + + rv = pAMgr->lookup(word); + + while (rv) { + if ((!rv->astr) || + !(TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) { + if (!HENTRY_FIND(rv, MORPH_STEM)) { + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, MORPH_STEM, MAXLNLEN); + mystrcat(result, word, MAXLNLEN); + } + if (HENTRY_DATA(rv)) { + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); + } + mystrcat(result, "\n", MAXLNLEN); + } + rv = rv->next_homonym; + } + + st = pAMgr->affix_check_morph(word, strlen(word)); + if (st) { + mystrcat(result, st, MAXLNLEN); + free(st); + } + + if (pAMgr->get_compound() && (*result == '\0')) + pAMgr->compound_check_morph(word, strlen(word), 0, 0, 100, 0, NULL, 0, &r, + NULL); + + return (*result) ? mystrdup(line_uniq(result, MSEP_REC)) : NULL; +} + +#ifdef HUNSPELL_EXPERIMENTAL +char* SuggestMgr::suggest_morph_for_spelling_error(const char* word) { + char* p = NULL; + char** wlst = (char**)calloc(maxSug, sizeof(char*)); + if (!**wlst) + return NULL; + // we will use only the first suggestion + for (int i = 0; i < maxSug - 1; i++) + wlst[i] = ""; + int ns = suggest(&wlst, word, maxSug - 1, NULL); + if (ns == maxSug) { + p = suggest_morph(wlst[maxSug - 1]); + free(wlst[maxSug - 1]); + } + if (wlst) + free(wlst); + return p; +} +#endif // END OF HUNSPELL_EXPERIMENTAL CODE + +/* affixation */ +char* SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { + char result[MAXLNLEN]; + *result = '\0'; + int sfxcount = get_sfxcount(pattern); + + if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) + return NULL; + + if (HENTRY_DATA(rv)) { + char* aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen, + HENTRY_DATA(rv), pattern, 0); + if (aff) { + mystrcat(result, aff, MAXLNLEN); + mystrcat(result, "\n", MAXLNLEN); + free(aff); + } + } + + // check all allomorphs + char allomorph[MAXLNLEN]; + char* p = NULL; + if (HENTRY_DATA(rv)) + p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH); + while (p) { + struct hentry* rv2 = NULL; + p += MORPH_TAG_LEN; + int plen = fieldlen(p); + strncpy(allomorph, p, plen); + allomorph[plen] = '\0'; + rv2 = pAMgr->lookup(allomorph); + while (rv2) { + // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <= + // sfxcount) { + if (HENTRY_DATA(rv2)) { + char* st = (char*)strstr(HENTRY_DATA2(rv2), MORPH_STEM); + if (st && (strncmp(st + MORPH_TAG_LEN, HENTRY_WORD(rv), + fieldlen(st + MORPH_TAG_LEN)) == 0)) { + char* aff = pAMgr->morphgen(HENTRY_WORD(rv2), rv2->blen, rv2->astr, + rv2->alen, HENTRY_DATA(rv2), pattern, 0); + if (aff) { + mystrcat(result, aff, MAXLNLEN); + mystrcat(result, "\n", MAXLNLEN); + free(aff); + } + } + } + rv2 = rv2->next_homonym; + } + p = strstr(p + plen, MORPH_ALLOMORPH); + } + + return (*result) ? mystrdup(result) : NULL; +} + +char* SuggestMgr::suggest_gen(char** desc, int n, const char* pattern) { + if (n == 0 || !pAMgr) + return NULL; + + char result[MAXLNLEN]; + char result2[MAXLNLEN]; + std::string newpattern; + *result2 = '\0'; + struct hentry* rv = NULL; + + // search affixed forms with and without derivational suffixes + while (1) { + for (int k = 0; k < n; k++) { + *result = '\0'; + // add compound word parts (except the last one) + char* s = (char*)desc[k]; + char* part = strstr(s, MORPH_PART); + if (part) { + char* nextpart = strstr(part + 1, MORPH_PART); + while (nextpart) { + copy_field(result + strlen(result), part, MORPH_PART); + part = nextpart; + nextpart = strstr(part + 1, MORPH_PART); + } + s = part; + } + + char** pl; + std::string tok(s); + size_t pos = tok.find(" | "); + while (pos != std::string::npos) { + tok[pos + 1] = MSEP_ALT; + pos = tok.find(" | ", pos); + } + int pln = line_tok(tok.c_str(), &pl, MSEP_ALT); + for (int i = 0; i < pln; i++) { + // remove inflectional and terminal suffixes + char* is = strstr(pl[i], MORPH_INFL_SFX); + if (is) + *is = '\0'; + char* ts = strstr(pl[i], MORPH_TERM_SFX); + while (ts) { + *ts = '_'; + ts = strstr(pl[i], MORPH_TERM_SFX); + } + char* st = strstr(s, MORPH_STEM); + if (st) { + copy_field(tok, st, MORPH_STEM); + rv = pAMgr->lookup(tok.c_str()); + while (rv) { + std::string newpat(pl[i]); + newpat.append(pattern); + char* sg = suggest_hentry_gen(rv, newpat.c_str()); + if (!sg) + sg = suggest_hentry_gen(rv, pattern); + if (sg) { + char** gen; + int genl = line_tok(sg, &gen, MSEP_REC); + free(sg); + sg = NULL; + for (int j = 0; j < genl; j++) { + if (strstr(pl[i], MORPH_SURF_PFX)) { + int r2l = strlen(result2); + result2[r2l] = MSEP_REC; + strcpy(result2 + r2l + 1, result); + copy_field(result2 + strlen(result2), pl[i], MORPH_SURF_PFX); + mystrcat(result2, gen[j], MAXLNLEN); + } else { + sprintf(result2 + strlen(result2), "%c%s%s", MSEP_REC, result, + gen[j]); + } + } + freelist(&gen, genl); + } + rv = rv->next_homonym; + } + } + } + freelist(&pl, pln); + } + + if (*result2 || !strstr(pattern, MORPH_DERI_SFX)) + break; + + newpattern.assign(pattern); + mystrrep(newpattern, MORPH_DERI_SFX, MORPH_TERM_SFX); + pattern = newpattern.c_str(); + } + return (*result2 ? mystrdup(result2) : NULL); +} + +// generate an n-gram score comparing s1 and s2 +int SuggestMgr::ngram(int n, + const std::string& s1, + const std::string& s2, + int opt) { + int nscore = 0; + int ns; + int l1; + int l2; + int test = 0; + + if (utf8) { + std::vector su1; + std::vector su2; + l1 = u8_u16(su1, s1); + l2 = u8_u16(su2, s2); + if ((l2 <= 0) || (l1 == -1)) + return 0; + // lowering dictionary word + if (opt & NGRAM_LOWERING) + mkallsmall_utf(su2, l2, langnum); + for (int j = 1; j <= n; j++) { + ns = 0; + for (int i = 0; i <= (l1 - j); i++) { + int k = 0; + for (int l = 0; l <= (l2 - j); l++) { + for (k = 0; k < j; k++) { + w_char& c1 = su1[i + k]; + w_char& c2 = su2[l + k]; + if ((c1.l != c2.l) || (c1.h != c2.h)) + break; + } + if (k == j) { + ns++; + break; + } + } + if (k != j && opt & NGRAM_WEIGHTED) { + ns--; + test++; + if (i == 0 || i == l1 - j) + ns--; // side weight + } + } + nscore = nscore + ns; + if (ns < 2 && !(opt & NGRAM_WEIGHTED)) + break; + } + } else { + l2 = s2.size(); + if (l2 == 0) + return 0; + l1 = s1.size(); + std::string t(s2); + if (opt & NGRAM_LOWERING) + mkallsmall(t, csconv); + for (int j = 1; j <= n; j++) { + ns = 0; + for (int i = 0; i <= (l1 - j); i++) { + std::string temp(s1.substr(i, j)); + if (t.find(temp) != std::string::npos) { + ns++; + } else if (opt & NGRAM_WEIGHTED) { + ns--; + test++; + if (i == 0 || i == l1 - j) + ns--; // side weight + } + } + nscore = nscore + ns; + if (ns < 2 && !(opt & NGRAM_WEIGHTED)) + break; + } + } + + ns = 0; + if (opt & NGRAM_LONGER_WORSE) + ns = (l2 - l1) - 2; + if (opt & NGRAM_ANY_MISMATCH) + ns = abs(l2 - l1) - 2; + ns = (nscore - ((ns > 0) ? ns : 0)); + return ns; +} + +// length of the left common substring of s1 and (decapitalised) s2 +int SuggestMgr::leftcommonsubstring(const char* s1, const char* s2) { + if (utf8) { + w_char su1[MAXSWL]; + w_char su2[MAXSWL]; + su1[0].l = su2[0].l = su1[0].h = su2[0].h = 0; + // decapitalize dictionary word + if (complexprefixes) { + int l1 = u8_u16(su1, MAXSWL, s1); + int l2 = u8_u16(su2, MAXSWL, s2); + if (*((short*)su1 + l1 - 1) == *((short*)su2 + l2 - 1)) + return 1; + } else { + int i; + u8_u16(su1, 1, s1); + u8_u16(su2, 1, s2); + unsigned short idx = (su2->h << 8) + su2->l; + unsigned short otheridx = (su1->h << 8) + su1->l; + if (otheridx != idx && (otheridx != unicodetolower(idx, langnum))) + return 0; + int l1 = u8_u16(su1, MAXSWL, s1); + int l2 = u8_u16(su2, MAXSWL, s2); + for (i = 1; (i < l1) && (i < l2) && (su1[i].l == su2[i].l) && + (su1[i].h == su2[i].h); + i++) + ; + return i; + } + } else { + if (complexprefixes) { + int l1 = strlen(s1); + int l2 = strlen(s2); + if (*(s2 + l1 - 1) == *(s2 + l2 - 1)) + return 1; + } else { + const char* olds = s1; + // decapitalise dictionary word + if ((*s1 != *s2) && (*s1 != csconv[((unsigned char)*s2)].clower)) + return 0; + do { + s1++; + s2++; + } while ((*s1 == *s2) && (*s1 != '\0')); + return (int)(s1 - olds); + } + } + return 0; +} + +int SuggestMgr::commoncharacterpositions(const char* s1, + const char* s2, + int* is_swap) { + int num = 0; + int diff = 0; + int diffpos[2]; + *is_swap = 0; + if (utf8) { + w_char su1[MAXSWL]; + w_char su2[MAXSWL]; + int l1 = u8_u16(su1, MAXSWL, s1); + int l2 = u8_u16(su2, MAXSWL, s2); + + if (l1 <= 0 || l2 <= 0) + return 0; + + // decapitalize dictionary word + if (complexprefixes) { + mkallsmall_utf(su2 + l2 - 1, 1, langnum); + } else { + mkallsmall_utf(su2, 1, langnum); + } + for (int i = 0; (i < l1) && (i < l2); i++) { + if (((short*)su1)[i] == ((short*)su2)[i]) { + num++; + } else { + if (diff < 2) + diffpos[diff] = i; + diff++; + } + } + if ((diff == 2) && (l1 == l2) && + (((short*)su1)[diffpos[0]] == ((short*)su2)[diffpos[1]]) && + (((short*)su1)[diffpos[1]] == ((short*)su2)[diffpos[0]])) + *is_swap = 1; + } else { + size_t i; + std::string t(s2); + // decapitalize dictionary word + if (complexprefixes) { + size_t l2 = t.size(); + t[l2 - 1] = csconv[(unsigned char)t[l2 - 1]].clower; + } else { + mkallsmall(t, csconv); + } + for (i = 0; (*(s1 + i) != 0) && i < t.size(); i++) { + if (*(s1 + i) == t[i]) { + num++; + } else { + if (diff < 2) + diffpos[diff] = i; + diff++; + } + } + if ((diff == 2) && (*(s1 + i) == 0) && i == t.size() && + (*(s1 + diffpos[0]) == t[diffpos[1]]) && + (*(s1 + diffpos[1]) == t[diffpos[0]])) + *is_swap = 1; + } + return num; +} + +int SuggestMgr::mystrlen(const char* word) { + if (utf8) { + w_char w[MAXSWL]; + return u8_u16(w, MAXSWL, word); + } else + return strlen(word); +} + +// sort in decreasing order of score +void SuggestMgr::bubblesort(char** rword, char** rword2, int* rsc, int n) { + int m = 1; + while (m < n) { + int j = m; + while (j > 0) { + if (rsc[j - 1] < rsc[j]) { + int sctmp = rsc[j - 1]; + char* wdtmp = rword[j - 1]; + rsc[j - 1] = rsc[j]; + rword[j - 1] = rword[j]; + rsc[j] = sctmp; + rword[j] = wdtmp; + if (rword2) { + wdtmp = rword2[j - 1]; + rword2[j - 1] = rword2[j]; + rword2[j] = wdtmp; + } + j--; + } else + break; + } + m++; + } + return; +} + +// longest common subsequence +void SuggestMgr::lcs(const char* s, + const char* s2, + int* l1, + int* l2, + char** result) { + int n, m; + w_char su[MAXSWL]; + w_char su2[MAXSWL]; + char* b; + char* c; + int i; + int j; + if (utf8) { + m = u8_u16(su, MAXSWL, s); + n = u8_u16(su2, MAXSWL, s2); + } else { + m = strlen(s); + n = strlen(s2); + } + c = (char*)malloc((m + 1) * (n + 1)); + b = (char*)malloc((m + 1) * (n + 1)); + if (!c || !b) { + if (c) + free(c); + if (b) + free(b); + *result = NULL; + return; + } + for (i = 1; i <= m; i++) + c[i * (n + 1)] = 0; + for (j = 0; j <= n; j++) + c[j] = 0; + for (i = 1; i <= m; i++) { + for (j = 1; j <= n; j++) { + if (((utf8) && (*((short*)su + i - 1) == *((short*)su2 + j - 1))) || + ((!utf8) && ((*(s + i - 1)) == (*(s2 + j - 1))))) { + c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1; + b[i * (n + 1) + j] = LCS_UPLEFT; + } else if (c[(i - 1) * (n + 1) + j] >= c[i * (n + 1) + j - 1]) { + c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j]; + b[i * (n + 1) + j] = LCS_UP; + } else { + c[i * (n + 1) + j] = c[i * (n + 1) + j - 1]; + b[i * (n + 1) + j] = LCS_LEFT; + } + } + } + *result = b; + free(c); + *l1 = m; + *l2 = n; +} + +int SuggestMgr::lcslen(const char* s, const char* s2) { + int m; + int n; + int i; + int j; + char* result; + int len = 0; + lcs(s, s2, &m, &n, &result); + if (!result) + return 0; + i = m; + j = n; + while ((i != 0) && (j != 0)) { + if (result[i * (n + 1) + j] == LCS_UPLEFT) { + len++; + i--; + j--; + } else if (result[i * (n + 1) + j] == LCS_UP) { + i--; + } else + j--; + } + free(result); + return len; +} + +int SuggestMgr::lcslen(const std::string& s, const std::string& s2) { + return lcslen(s.c_str(), s2.c_str()); +} diff --git a/libs/hunspell/src/suggestmgr.cxx b/libs/hunspell/src/suggestmgr.cxx deleted file mode 100644 index 4269a1181a..0000000000 --- a/libs/hunspell/src/suggestmgr.cxx +++ /dev/null @@ -1,2376 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -/* - * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada - * And Contributors. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. All modifications to the source code must be clearly marked as - * such. Binary redistributions based on modified source code - * must be clearly marked as modified versions in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include - -#include "suggestmgr.hxx" -#include "htypes.hxx" -#include "csutil.hxx" - -const w_char W_VLINE = {'\0', '|'}; - -SuggestMgr::SuggestMgr(const char* tryme, int maxn, AffixMgr* aptr) { - // register affix manager and check in string of chars to - // try when building candidate suggestions - pAMgr = aptr; - - csconv = NULL; - - ckeyl = 0; - ckey = NULL; - ckey_utf = NULL; - - ctryl = 0; - ctry = NULL; - ctry_utf = NULL; - - utf8 = 0; - langnum = 0; - complexprefixes = 0; - - maxSug = maxn; - nosplitsugs = 0; - maxngramsugs = MAXNGRAMSUGS; - maxcpdsugs = MAXCOMPOUNDSUGS; - - if (pAMgr) { - langnum = pAMgr->get_langnum(); - ckey = pAMgr->get_key_string(); - nosplitsugs = pAMgr->get_nosplitsugs(); - if (pAMgr->get_maxngramsugs() >= 0) - maxngramsugs = pAMgr->get_maxngramsugs(); - utf8 = pAMgr->get_utf8(); - if (pAMgr->get_maxcpdsugs() >= 0) - maxcpdsugs = pAMgr->get_maxcpdsugs(); - if (!utf8) { - char* enc = pAMgr->get_encoding(); - csconv = get_current_cs(enc); - free(enc); - } - complexprefixes = pAMgr->get_complexprefixes(); - } - - if (ckey) { - if (utf8) { - w_char t[MAXSWL]; - ckeyl = u8_u16(t, MAXSWL, ckey); - ckey_utf = (w_char*)malloc(ckeyl * sizeof(w_char)); - if (ckey_utf) - memcpy(ckey_utf, t, ckeyl * sizeof(w_char)); - else - ckeyl = 0; - } else { - ckeyl = strlen(ckey); - } - } - - if (tryme) { - ctry = mystrdup(tryme); - if (ctry) - ctryl = strlen(ctry); - if (ctry && utf8) { - w_char t[MAXSWL]; - ctryl = u8_u16(t, MAXSWL, tryme); - ctry_utf = (w_char*)malloc(ctryl * sizeof(w_char)); - if (ctry_utf) - memcpy(ctry_utf, t, ctryl * sizeof(w_char)); - else - ctryl = 0; - } - } -} - -SuggestMgr::~SuggestMgr() { - pAMgr = NULL; - if (ckey) - free(ckey); - ckey = NULL; - if (ckey_utf) - free(ckey_utf); - ckey_utf = NULL; - ckeyl = 0; - if (ctry) - free(ctry); - ctry = NULL; - if (ctry_utf) - free(ctry_utf); - ctry_utf = NULL; - ctryl = 0; - maxSug = 0; -#ifdef MOZILLA_CLIENT - delete[] csconv; -#endif -} - -int SuggestMgr::testsug(char** wlst, - const char* candidate, - int wl, - int ns, - int cpdsuggest, - int* timer, - clock_t* timelimit) { - int cwrd = 1; - if (ns == maxSug) - return maxSug; - for (int k = 0; k < ns; k++) { - if (strcmp(candidate, wlst[k]) == 0) { - cwrd = 0; - break; - } - } - if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { - wlst[ns] = mystrdup(candidate); - if (wlst[ns] == NULL) { - for (int j = 0; j < ns; j++) - free(wlst[j]); - return -1; - } - ns++; - } - return ns; -} - -// generate suggestions for a misspelled word -// pass in address of array of char * pointers -// onlycompoundsug: probably bad suggestions (need for ngram sugs, too) - -int SuggestMgr::suggest(char*** slst, - const char* w, - int nsug, - int* onlycompoundsug) { - int nocompoundtwowords = 0; - char** wlst; - w_char word_utf[MAXSWL]; - int wl = 0; - int nsugorig = nsug; - std::string w2; - const char* word = w; - int oldSug = 0; - - // word reversing wrapper for complex prefixes - if (complexprefixes) { - w2.assign(w); - if (utf8) - reverseword_utf(w2); - else - reverseword(w2); - word = w2.c_str(); - } - - if (*slst) { - wlst = *slst; - } else { - wlst = (char**)malloc(maxSug * sizeof(char*)); - if (wlst == NULL) - return -1; - for (int i = 0; i < maxSug; i++) { - wlst[i] = NULL; - } - } - - if (utf8) { - wl = u8_u16(word_utf, MAXSWL, word); - if (wl == -1) { - *slst = wlst; - return nsug; - } - } - - for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0); - cpdsuggest++) { - // limit compound suggestion - if (cpdsuggest > 0) - oldSug = nsug; - - // suggestions for an uppercase word (html -> HTML) - if ((nsug < maxSug) && (nsug > -1)) { - nsug = (utf8) ? capchars_utf(wlst, word_utf, wl, nsug, cpdsuggest) - : capchars(wlst, word, nsug, cpdsuggest); - } - - // perhaps we made a typical fault of spelling - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { - nsug = replchars(wlst, word, nsug, cpdsuggest); - } - - // perhaps we made chose the wrong char from a related set - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { - nsug = mapchars(wlst, word, nsug, cpdsuggest); - } - - // only suggest compound words when no other suggestion - if ((cpdsuggest == 0) && (nsug > nsugorig)) - nocompoundtwowords = 1; - - // did we swap the order of chars by mistake - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { - nsug = (utf8) ? swapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) - : swapchar(wlst, word, nsug, cpdsuggest); - } - - // did we swap the order of non adjacent chars by mistake - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { - nsug = (utf8) ? longswapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) - : longswapchar(wlst, word, nsug, cpdsuggest); - } - - // did we just hit the wrong key in place of a good char (case and keyboard) - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { - nsug = (utf8) ? badcharkey_utf(wlst, word_utf, wl, nsug, cpdsuggest) - : badcharkey(wlst, word, nsug, cpdsuggest); - } - - // did we add a char that should not be there - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { - nsug = (utf8) ? extrachar_utf(wlst, word_utf, wl, nsug, cpdsuggest) - : extrachar(wlst, word, nsug, cpdsuggest); - } - - // did we forgot a char - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { - nsug = (utf8) ? forgotchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) - : forgotchar(wlst, word, nsug, cpdsuggest); - } - - // did we move a char - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { - nsug = (utf8) ? movechar_utf(wlst, word_utf, wl, nsug, cpdsuggest) - : movechar(wlst, word, nsug, cpdsuggest); - } - - // did we just hit the wrong key in place of a good char - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { - nsug = (utf8) ? badchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) - : badchar(wlst, word, nsug, cpdsuggest); - } - - // did we double two characters - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { - nsug = (utf8) ? doubletwochars_utf(wlst, word_utf, wl, nsug, cpdsuggest) - : doubletwochars(wlst, word, nsug, cpdsuggest); - } - - // perhaps we forgot to hit space and two words ran together - if (!nosplitsugs && (nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { - nsug = twowords(wlst, word, nsug, cpdsuggest); - } - - } // repeating ``for'' statement compounding support - - if (nsug < 0) { - // we ran out of memory - we should free up as much as possible - for (int i = 0; i < maxSug; i++) - if (wlst[i] != NULL) - free(wlst[i]); - free(wlst); - wlst = NULL; - } - - if (!nocompoundtwowords && (nsug > 0) && onlycompoundsug) - *onlycompoundsug = 1; - - *slst = wlst; - return nsug; -} - -// generate suggestions for a word with typical mistake -// pass in address of array of char * pointers -#ifdef HUNSPELL_EXPERIMENTAL -int SuggestMgr::suggest_auto(char*** slst, const char* w, int nsug) { - int nocompoundtwowords = 0; - char** wlst; - int oldSug; - - char w2[MAXWORDUTF8LEN]; - const char* word = w; - - // word reversing wrapper for complex prefixes - if (complexprefixes) { - strcpy(w2, w); - if (utf8) - reverseword_utf(w2); - else - reverseword(w2); - word = w2; - } - - if (*slst) { - wlst = *slst; - } else { - wlst = (char**)malloc(maxSug * sizeof(char*)); - if (wlst == NULL) - return -1; - } - - for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0); - cpdsuggest++) { - // limit compound suggestion - if (cpdsuggest > 0) - oldSug = nsug; - - // perhaps we made a typical fault of spelling - if ((nsug < maxSug) && (nsug > -1)) - nsug = replchars(wlst, word, nsug, cpdsuggest); - - // perhaps we made chose the wrong char from a related set - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) - nsug = mapchars(wlst, word, nsug, cpdsuggest); - - if ((cpdsuggest == 0) && (nsug > 0)) - nocompoundtwowords = 1; - - // perhaps we forgot to hit space and two words ran together - - if ((nsug < maxSug) && (nsug > -1) && - (!cpdsuggest || (nsug < oldSug + maxcpdsugs)) && - check_forbidden(word, strlen(word))) { - nsug = twowords(wlst, word, nsug, cpdsuggest); - } - - } // repeating ``for'' statement compounding support - - if (nsug < 0) { - for (int i = 0; i < maxSug; i++) - if (wlst[i] != NULL) - free(wlst[i]); - free(wlst); - return -1; - } - - *slst = wlst; - return nsug; -} -#endif // END OF HUNSPELL_EXPERIMENTAL CODE - -// suggestions for an uppercase word (html -> HTML) -int SuggestMgr::capchars_utf(char** wlst, - const w_char* word, - int wl, - int ns, - int cpdsuggest) { - char candidate[MAXSWUTF8L]; - w_char candidate_utf[MAXSWL]; - memcpy(candidate_utf, word, wl * sizeof(w_char)); - mkallcap_utf(candidate_utf, wl, langnum); - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); - return testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, - NULL); -} - -// suggestions for an uppercase word (html -> HTML) -int SuggestMgr::capchars(char** wlst, - const char* word, - int ns, - int cpdsuggest) { - std::string candidate(word); - mkallcap(candidate, csconv); - return testsug(wlst, candidate.data(), candidate.size(), ns, cpdsuggest, NULL, - NULL); -} - -// suggestions for when chose the wrong char out of a related set -int SuggestMgr::mapchars(char** wlst, - const char* word, - int ns, - int cpdsuggest) { - char candidate[MAXSWUTF8L]; - clock_t timelimit; - int timer; - candidate[0] = '\0'; - - int wl = strlen(word); - if (wl < 2 || !pAMgr) - return ns; - - int nummap = pAMgr->get_nummap(); - struct mapentry* maptable = pAMgr->get_maptable(); - if (maptable == NULL) - return ns; - - timelimit = clock(); - timer = MINTIMER; - return map_related(word, (char*)&candidate, 0, 0, wlst, cpdsuggest, ns, - maptable, nummap, &timer, &timelimit); -} - -int SuggestMgr::map_related(const char* word, - char* candidate, - int wn, - int cn, - char** wlst, - int cpdsuggest, - int ns, - const mapentry* maptable, - int nummap, - int* timer, - clock_t* timelimit) { - if (*(word + wn) == '\0') { - int cwrd = 1; - *(candidate + cn) = '\0'; - int wl = strlen(candidate); - for (int m = 0; m < ns; m++) { - if (strcmp(candidate, wlst[m]) == 0) { - cwrd = 0; - break; - } - } - if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { - if (ns < maxSug) { - wlst[ns] = mystrdup(candidate); - if (wlst[ns] == NULL) - return -1; - ns++; - } - } - return ns; - } - int in_map = 0; - for (int j = 0; j < nummap; j++) { - for (int k = 0; k < maptable[j].len; k++) { - int len = strlen(maptable[j].set[k]); - if (strncmp(maptable[j].set[k], word + wn, len) == 0) { - in_map = 1; - for (int l = 0; l < maptable[j].len; l++) { - strcpy(candidate + cn, maptable[j].set[l]); - ns = map_related(word, candidate, wn + len, strlen(candidate), wlst, - cpdsuggest, ns, maptable, nummap, timer, timelimit); - if (!(*timer)) - return ns; - } - } - } - } - if (!in_map) { - *(candidate + cn) = *(word + wn); - ns = map_related(word, candidate, wn + 1, cn + 1, wlst, cpdsuggest, ns, - maptable, nummap, timer, timelimit); - } - return ns; -} - -// suggestions for a typical fault of spelling, that -// differs with more, than 1 letter from the right form. -int SuggestMgr::replchars(char** wlst, - const char* word, - int ns, - int cpdsuggest) { - char candidate[MAXSWUTF8L]; - const char* r; - int lenr, lenp; - int wl = strlen(word); - if (wl < 2 || !pAMgr) - return ns; - int numrep = pAMgr->get_numrep(); - struct replentry* reptable = pAMgr->get_reptable(); - if (reptable == NULL) - return ns; - for (int i = 0; i < numrep; i++) { - r = word; - lenr = strlen(reptable[i].pattern2); - lenp = strlen(reptable[i].pattern); - // search every occurence of the pattern in the word - while ((r = strstr(r, reptable[i].pattern)) != NULL && - (!reptable[i].end || strlen(r) == strlen(reptable[i].pattern)) && - (!reptable[i].start || r == word)) { - strcpy(candidate, word); - if (r - word + lenr + strlen(r + lenp) >= MAXSWUTF8L) - break; - strcpy(candidate + (r - word), reptable[i].pattern2); - strcpy(candidate + (r - word) + lenr, r + lenp); - ns = testsug(wlst, candidate, wl - lenp + lenr, ns, cpdsuggest, NULL, - NULL); - if (ns == -1) - return -1; - // check REP suggestions with space - char* sp = strchr(candidate, ' '); - if (sp) { - char* prev = candidate; - while (sp) { - *sp = '\0'; - if (checkword(prev, strlen(prev), 0, NULL, NULL)) { - int oldns = ns; - *sp = ' '; - ns = testsug(wlst, sp + 1, strlen(sp + 1), ns, cpdsuggest, NULL, - NULL); - if (ns == -1) - return -1; - if (oldns < ns) { - free(wlst[ns - 1]); - wlst[ns - 1] = mystrdup(candidate); - if (!wlst[ns - 1]) - return -1; - } - } - *sp = ' '; - prev = sp + 1; - sp = strchr(prev, ' '); - } - } - r++; // search for the next letter - } - } - return ns; -} - -// perhaps we doubled two characters (pattern aba -> ababa, for example vacation -// -> vacacation) -int SuggestMgr::doubletwochars(char** wlst, - const char* word, - int ns, - int cpdsuggest) { - char candidate[MAXSWUTF8L]; - int state = 0; - int wl = strlen(word); - if (wl < 5 || !pAMgr) - return ns; - for (int i = 2; i < wl; i++) { - if (word[i] == word[i - 2]) { - state++; - if (state == 3) { - strcpy(candidate, word); - strcpy(candidate + i - 1, word + i + 1); - ns = testsug(wlst, candidate, wl - 2, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - state = 0; - } - } else { - state = 0; - } - } - return ns; -} - -// perhaps we doubled two characters (pattern aba -> ababa, for example vacation -// -> vacacation) -int SuggestMgr::doubletwochars_utf(char** wlst, - const w_char* word, - int wl, - int ns, - int cpdsuggest) { - w_char candidate_utf[MAXSWL]; - char candidate[MAXSWUTF8L]; - int state = 0; - if (wl < 5 || !pAMgr) - return ns; - for (int i = 2; i < wl; i++) { - if (w_char_eq(word[i], word[i - 2])) { - state++; - if (state == 3) { - memcpy(candidate_utf, word, (i - 1) * sizeof(w_char)); - memcpy(candidate_utf + i - 1, word + i + 1, - (wl - i - 1) * sizeof(w_char)); - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl - 2); - ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, - NULL); - if (ns == -1) - return -1; - state = 0; - } - } else { - state = 0; - } - } - return ns; -} - -// error is wrong char in place of correct one (case and keyboard related -// version) -int SuggestMgr::badcharkey(char** wlst, - const char* word, - int ns, - int cpdsuggest) { - char tmpc; - char candidate[MAXSWUTF8L]; - int wl = strlen(word); - strcpy(candidate, word); - // swap out each char one by one and try uppercase and neighbor - // keyboard chars in its place to see if that makes a good word - - for (int i = 0; i < wl; i++) { - tmpc = candidate[i]; - // check with uppercase letters - candidate[i] = csconv[((unsigned char)tmpc)].cupper; - if (tmpc != candidate[i]) { - ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - candidate[i] = tmpc; - } - // check neighbor characters in keyboard string - if (!ckey) - continue; - char* loc = strchr(ckey, tmpc); - while (loc) { - if ((loc > ckey) && (*(loc - 1) != '|')) { - candidate[i] = *(loc - 1); - ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - } - if ((*(loc + 1) != '|') && (*(loc + 1) != '\0')) { - candidate[i] = *(loc + 1); - ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - } - loc = strchr(loc + 1, tmpc); - } - candidate[i] = tmpc; - } - return ns; -} - -// error is wrong char in place of correct one (case and keyboard related -// version) -int SuggestMgr::badcharkey_utf(char** wlst, - const w_char* word, - int wl, - int ns, - int cpdsuggest) { - w_char tmpc; - w_char candidate_utf[MAXSWL]; - char candidate[MAXSWUTF8L]; - memcpy(candidate_utf, word, wl * sizeof(w_char)); - // swap out each char one by one and try all the tryme - // chars in its place to see if that makes a good word - for (int i = 0; i < wl; i++) { - tmpc = candidate_utf[i]; - // check with uppercase letters - mkallcap_utf(candidate_utf + i, 1, langnum); - if (!w_char_eq(tmpc, candidate_utf[i])) { - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); - ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, - NULL); - if (ns == -1) - return -1; - candidate_utf[i] = tmpc; - } - // check neighbor characters in keyboard string - if (!ckey) - continue; - w_char* loc = ckey_utf; - while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc)) - loc++; - while (loc < (ckey_utf + ckeyl)) { - if ((loc > ckey_utf) && !w_char_eq(*(loc - 1), W_VLINE)) { - candidate_utf[i] = *(loc - 1); - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); - ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, - NULL); - if (ns == -1) - return -1; - } - if (((loc + 1) < (ckey_utf + ckeyl)) && !w_char_eq(*(loc + 1), W_VLINE)) { - candidate_utf[i] = *(loc + 1); - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); - ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, - NULL); - if (ns == -1) - return -1; - } - do { - loc++; - } while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc)); - } - candidate_utf[i] = tmpc; - } - return ns; -} - -// error is wrong char in place of correct one -int SuggestMgr::badchar(char** wlst, const char* word, int ns, int cpdsuggest) { - char tmpc; - char candidate[MAXSWUTF8L]; - clock_t timelimit = clock(); - int timer = MINTIMER; - int wl = strlen(word); - strcpy(candidate, word); - // swap out each char one by one and try all the tryme - // chars in its place to see if that makes a good word - for (int j = 0; j < ctryl; j++) { - for (int i = wl - 1; i >= 0; i--) { - tmpc = candidate[i]; - if (ctry[j] == tmpc) - continue; - candidate[i] = ctry[j]; - ns = testsug(wlst, candidate, wl, ns, cpdsuggest, &timer, &timelimit); - if (ns == -1) - return -1; - if (!timer) - return ns; - candidate[i] = tmpc; - } - } - return ns; -} - -// error is wrong char in place of correct one -int SuggestMgr::badchar_utf(char** wlst, - const w_char* word, - int wl, - int ns, - int cpdsuggest) { - w_char tmpc; - w_char candidate_utf[MAXSWL]; - char candidate[MAXSWUTF8L]; - clock_t timelimit = clock(); - int timer = MINTIMER; - memcpy(candidate_utf, word, wl * sizeof(w_char)); - // swap out each char one by one and try all the tryme - // chars in its place to see if that makes a good word - for (int j = 0; j < ctryl; j++) { - for (int i = wl - 1; i >= 0; i--) { - tmpc = candidate_utf[i]; - if (w_char_eq(tmpc, ctry_utf[j])) - continue; - candidate_utf[i] = ctry_utf[j]; - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); - ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer, - &timelimit); - if (ns == -1) - return -1; - if (!timer) - return ns; - candidate_utf[i] = tmpc; - } - } - return ns; -} - -// error is word has an extra letter it does not need -int SuggestMgr::extrachar_utf(char** wlst, - const w_char* word, - int wl, - int ns, - int cpdsuggest) { - char candidate[MAXSWUTF8L]; - w_char candidate_utf[MAXSWL]; - w_char* p; - w_char tmpc = W_VLINE; // not used value, only for VCC warning message - if (wl < 2) - return ns; - // try omitting one char of word at a time - memcpy(candidate_utf, word, wl * sizeof(w_char)); - for (p = candidate_utf + wl - 1; p >= candidate_utf; p--) { - w_char tmpc2 = *p; - if (p < candidate_utf + wl - 1) - *p = tmpc; - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl - 1); - ns = - testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - tmpc = tmpc2; - } - return ns; -} - -// error is word has an extra letter it does not need -int SuggestMgr::extrachar(char** wlst, - const char* word, - int ns, - int cpdsuggest) { - char tmpc = '\0'; - char candidate[MAXSWUTF8L]; - char* p; - int wl = strlen(word); - if (wl < 2) - return ns; - // try omitting one char of word at a time - strcpy(candidate, word); - for (p = candidate + wl - 1; p >= candidate; p--) { - char tmpc2 = *p; - *p = tmpc; - ns = testsug(wlst, candidate, wl - 1, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - tmpc = tmpc2; - } - return ns; -} - -// error is missing a letter it needs -int SuggestMgr::forgotchar(char** wlst, - const char* word, - int ns, - int cpdsuggest) { - char candidate[MAXSWUTF8L + 4]; - char* p; - clock_t timelimit = clock(); - int timer = MINTIMER; - int wl = strlen(word); - // try inserting a tryme character before every letter (and the null - // terminator) - for (int i = 0; i < ctryl; i++) { - strcpy(candidate, word); - for (p = candidate + wl; p >= candidate; p--) { - *(p + 1) = *p; - *p = ctry[i]; - ns = testsug(wlst, candidate, wl + 1, ns, cpdsuggest, &timer, &timelimit); - if (ns == -1) - return -1; - if (!timer) - return ns; - } - } - return ns; -} - -// error is missing a letter it needs -int SuggestMgr::forgotchar_utf(char** wlst, - const w_char* word, - int wl, - int ns, - int cpdsuggest) { - w_char candidate_utf[MAXSWL + 1]; - char candidate[MAXSWUTF8L + 4]; - w_char* p; - clock_t timelimit = clock(); - int timer = MINTIMER; - // try inserting a tryme character at the end of the word and before every - // letter - for (int i = 0; i < ctryl; i++) { - memcpy(candidate_utf, word, wl * sizeof(w_char)); - for (p = candidate_utf + wl; p >= candidate_utf; p--) { - *(p + 1) = *p; - *p = ctry_utf[i]; - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl + 1); - ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer, - &timelimit); - if (ns == -1) - return -1; - if (!timer) - return ns; - } - } - return ns; -} - -/* error is should have been two words */ -int SuggestMgr::twowords(char** wlst, - const char* word, - int ns, - int cpdsuggest) { - char candidate[MAXSWUTF8L]; - char* p; - int c1, c2; - int forbidden = 0; - int cwrd; - - int wl = strlen(word); - if (wl < 3) - return ns; - - if (langnum == LANG_hu) - forbidden = check_forbidden(word, wl); - - strcpy(candidate + 1, word); - // split the string into two pieces after every char - // if both pieces are good words make them a suggestion - for (p = candidate + 1; p[1] != '\0'; p++) { - p[-1] = *p; - // go to end of the UTF-8 character - while (utf8 && ((p[1] & 0xc0) == 0x80)) { - *p = p[1]; - p++; - } - if (utf8 && p[1] == '\0') - break; // last UTF-8 character - *p = '\0'; - c1 = checkword(candidate, strlen(candidate), cpdsuggest, NULL, NULL); - if (c1) { - c2 = checkword((p + 1), strlen(p + 1), cpdsuggest, NULL, NULL); - if (c2) { - *p = ' '; - - // spec. Hungarian code (need a better compound word support) - if ((langnum == LANG_hu) && !forbidden && - // if 3 repeating letter, use - instead of space - (((p[-1] == p[1]) && - (((p > candidate + 1) && (p[-1] == p[-2])) || (p[-1] == p[2]))) || - // or multiple compounding, with more, than 6 syllables - ((c1 == 3) && (c2 >= 2)))) - *p = '-'; - - cwrd = 1; - for (int k = 0; k < ns; k++) { - if (strcmp(candidate, wlst[k]) == 0) { - cwrd = 0; - break; - } - } - if (ns < maxSug) { - if (cwrd) { - wlst[ns] = mystrdup(candidate); - if (wlst[ns] == NULL) - return -1; - ns++; - } - } else - return ns; - // add two word suggestion with dash, if TRY string contains - // "a" or "-" - // NOTE: cwrd doesn't modified for REP twoword sugg. - if (ctry && (strchr(ctry, 'a') || strchr(ctry, '-')) && - mystrlen(p + 1) > 1 && mystrlen(candidate) - mystrlen(p) > 1) { - *p = '-'; - for (int k = 0; k < ns; k++) { - if (strcmp(candidate, wlst[k]) == 0) { - cwrd = 0; - break; - } - } - if (ns < maxSug) { - if (cwrd) { - wlst[ns] = mystrdup(candidate); - if (wlst[ns] == NULL) - return -1; - ns++; - } - } else - return ns; - } - } - } - } - return ns; -} - -// error is adjacent letter were swapped -int SuggestMgr::swapchar(char** wlst, - const char* word, - int ns, - int cpdsuggest) { - char candidate[MAXSWUTF8L]; - char* p; - char tmpc; - int wl = strlen(word); - // try swapping adjacent chars one by one - strcpy(candidate, word); - for (p = candidate; p[1] != 0; p++) { - tmpc = *p; - *p = p[1]; - p[1] = tmpc; - ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - p[1] = *p; - *p = tmpc; - } - // try double swaps for short words - // ahev -> have, owudl -> would - if (wl == 4 || wl == 5) { - candidate[0] = word[1]; - candidate[1] = word[0]; - candidate[2] = word[2]; - candidate[wl - 2] = word[wl - 1]; - candidate[wl - 1] = word[wl - 2]; - ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - if (wl == 5) { - candidate[0] = word[0]; - candidate[1] = word[2]; - candidate[2] = word[1]; - ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - } - } - return ns; -} - -// error is adjacent letter were swapped -int SuggestMgr::swapchar_utf(char** wlst, - const w_char* word, - int wl, - int ns, - int cpdsuggest) { - w_char candidate_utf[MAXSWL]; - char candidate[MAXSWUTF8L]; - w_char* p; - w_char tmpc; - int len = 0; - // try swapping adjacent chars one by one - memcpy(candidate_utf, word, wl * sizeof(w_char)); - for (p = candidate_utf; p < (candidate_utf + wl - 1); p++) { - tmpc = *p; - *p = p[1]; - p[1] = tmpc; - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); - if (len == 0) - len = strlen(candidate); - ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - p[1] = *p; - *p = tmpc; - } - // try double swaps for short words - // ahev -> have, owudl -> would, suodn -> sound - if (wl == 4 || wl == 5) { - candidate_utf[0] = word[1]; - candidate_utf[1] = word[0]; - candidate_utf[2] = word[2]; - candidate_utf[wl - 2] = word[wl - 1]; - candidate_utf[wl - 1] = word[wl - 2]; - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); - ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - if (wl == 5) { - candidate_utf[0] = word[0]; - candidate_utf[1] = word[2]; - candidate_utf[2] = word[1]; - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); - ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - } - } - return ns; -} - -// error is not adjacent letter were swapped -int SuggestMgr::longswapchar(char** wlst, - const char* word, - int ns, - int cpdsuggest) { - char candidate[MAXSWUTF8L]; - char* p; - char* q; - char tmpc; - int wl = strlen(word); - // try swapping not adjacent chars one by one - strcpy(candidate, word); - for (p = candidate; *p != 0; p++) { - for (q = candidate; *q != 0; q++) { - if (abs((int)(p - q)) > 1) { - tmpc = *p; - *p = *q; - *q = tmpc; - ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - *q = *p; - *p = tmpc; - } - } - } - return ns; -} - -// error is adjacent letter were swapped -int SuggestMgr::longswapchar_utf(char** wlst, - const w_char* word, - int wl, - int ns, - int cpdsuggest) { - w_char candidate_utf[MAXSWL]; - char candidate[MAXSWUTF8L]; - w_char* p; - w_char* q; - w_char tmpc; - // try swapping not adjacent chars - memcpy(candidate_utf, word, wl * sizeof(w_char)); - for (p = candidate_utf; p < (candidate_utf + wl); p++) { - for (q = candidate_utf; q < (candidate_utf + wl); q++) { - if (abs((int)(p - q)) > 1) { - tmpc = *p; - *p = *q; - *q = tmpc; - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); - ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, - NULL); - if (ns == -1) - return -1; - *q = *p; - *p = tmpc; - } - } - } - return ns; -} - -// error is a letter was moved -int SuggestMgr::movechar(char** wlst, - const char* word, - int ns, - int cpdsuggest) { - char candidate[MAXSWUTF8L]; - char* p; - char* q; - char tmpc; - - int wl = strlen(word); - // try moving a char - strcpy(candidate, word); - for (p = candidate; *p != 0; p++) { - for (q = p + 1; (*q != 0) && ((q - p) < 10); q++) { - tmpc = *(q - 1); - *(q - 1) = *q; - *q = tmpc; - if ((q - p) < 2) - continue; // omit swap char - ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - } - strcpy(candidate, word); - } - for (p = candidate + wl - 1; p > candidate; p--) { - for (q = p - 1; (q >= candidate) && ((p - q) < 10); q--) { - tmpc = *(q + 1); - *(q + 1) = *q; - *q = tmpc; - if ((p - q) < 2) - continue; // omit swap char - ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); - if (ns == -1) - return -1; - } - strcpy(candidate, word); - } - return ns; -} - -// error is a letter was moved -int SuggestMgr::movechar_utf(char** wlst, - const w_char* word, - int wl, - int ns, - int cpdsuggest) { - w_char candidate_utf[MAXSWL]; - char candidate[MAXSWUTF8L]; - w_char* p; - w_char* q; - w_char tmpc; - // try moving a char - memcpy(candidate_utf, word, wl * sizeof(w_char)); - for (p = candidate_utf; p < (candidate_utf + wl); p++) { - for (q = p + 1; (q < (candidate_utf + wl)) && ((q - p) < 10); q++) { - tmpc = *(q - 1); - *(q - 1) = *q; - *q = tmpc; - if ((q - p) < 2) - continue; // omit swap char - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); - ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, - NULL); - if (ns == -1) - return -1; - } - memcpy(candidate_utf, word, wl * sizeof(w_char)); - } - for (p = candidate_utf + wl - 1; p > candidate_utf; p--) { - for (q = p - 1; (q >= candidate_utf) && ((p - q) < 10); q--) { - tmpc = *(q + 1); - *(q + 1) = *q; - *q = tmpc; - if ((p - q) < 2) - continue; // omit swap char - u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); - ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, - NULL); - if (ns == -1) - return -1; - } - memcpy(candidate_utf, word, wl * sizeof(w_char)); - } - return ns; -} - -// generate a set of suggestions for very poorly spelled words -int SuggestMgr::ngsuggest(char** wlst, - char* w, - int ns, - HashMgr** pHMgr, - int md) { - int i, j; - int lval; - int sc; - int lp, lpphon; - int nonbmp = 0; - - // exhaustively search through all root words - // keeping track of the MAX_ROOTS most similar root words - struct hentry* roots[MAX_ROOTS]; - char* rootsphon[MAX_ROOTS]; - int scores[MAX_ROOTS]; - int scoresphon[MAX_ROOTS]; - for (i = 0; i < MAX_ROOTS; i++) { - roots[i] = NULL; - scores[i] = -100 * i; - rootsphon[i] = NULL; - scoresphon[i] = -100 * i; - } - lp = MAX_ROOTS - 1; - lpphon = MAX_ROOTS - 1; - int low = NGRAM_LOWERING; - - std::string w2; - char f[MAXSWUTF8L]; - const char* word = w; - - // word reversing wrapper for complex prefixes - if (complexprefixes) { - w2.assign(w); - if (utf8) - reverseword_utf(w2); - else - reverseword(w2); - word = w2.c_str(); - } - - char mw[MAXSWUTF8L]; - w_char u8[MAXSWL]; - int nc = strlen(word); - int n = (utf8) ? u8_u16(u8, MAXSWL, word) : nc; - - // set character based ngram suggestion for words with non-BMP Unicode - // characters - if (n == -1) { - utf8 = 0; // XXX not state-free - n = nc; - nonbmp = 1; - low = 0; - } - - struct hentry* hp = NULL; - int col = -1; - phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL; - char target[MAXSWUTF8L]; - std::string candidate; - if (ph) { - if (utf8) { - std::vector _w; - int _wl = u8_u16(_w, word); - mkallcap_utf(_w, _wl, langnum); - u16_u8(candidate, _w); - } else { - candidate.assign(word); - if (!nonbmp) - mkallcap(candidate, csconv); - } - phonet(candidate.c_str(), target, nc, - *ph); // XXX phonet() is 8-bit (nc, not n) - } - - FLAG forbiddenword = pAMgr ? pAMgr->get_forbiddenword() : FLAG_NULL; - FLAG nosuggest = pAMgr ? pAMgr->get_nosuggest() : FLAG_NULL; - FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL; - FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL; - - for (i = 0; i < md; i++) { - while (0 != (hp = (pHMgr[i])->walk_hashtable(col, hp))) { - if ((hp->astr) && (pAMgr) && - (TESTAFF(hp->astr, forbiddenword, hp->alen) || - TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) || - TESTAFF(hp->astr, nosuggest, hp->alen) || - TESTAFF(hp->astr, nongramsuggest, hp->alen) || - TESTAFF(hp->astr, onlyincompound, hp->alen))) - continue; - - sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) + - leftcommonsubstring(word, HENTRY_WORD(hp)); - - // check special pronounciation - if ((hp->var & H_OPT_PHON) && - copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) { - int sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) + - +leftcommonsubstring(word, f); - if (sc2 > sc) - sc = sc2; - } - - int scphon = -20000; - if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) { - char target2[MAXSWUTF8L]; - if (utf8) { - std::vector _w; - int _wl = u8_u16(_w, HENTRY_WORD(hp)); - mkallcap_utf(_w, _wl, langnum); - u16_u8(candidate, _w); - } else { - candidate.assign(HENTRY_WORD(hp)); - mkallcap(candidate, csconv); - } - phonet(candidate.c_str(), target2, -1, *ph); - scphon = 2 * ngram(3, target, target2, NGRAM_LONGER_WORSE); - } - - if (sc > scores[lp]) { - scores[lp] = sc; - roots[lp] = hp; - lval = sc; - for (j = 0; j < MAX_ROOTS; j++) - if (scores[j] < lval) { - lp = j; - lval = scores[j]; - } - } - - if (scphon > scoresphon[lpphon]) { - scoresphon[lpphon] = scphon; - rootsphon[lpphon] = HENTRY_WORD(hp); - lval = scphon; - for (j = 0; j < MAX_ROOTS; j++) - if (scoresphon[j] < lval) { - lpphon = j; - lval = scoresphon[j]; - } - } - } - } - - // find minimum threshold for a passable suggestion - // mangle original word three differnt ways - // and score them to generate a minimum acceptable score - int thresh = 0; - for (int sp = 1; sp < 4; sp++) { - if (utf8) { - for (int k = sp; k < n; k += 4) - *((unsigned short*)u8 + k) = '*'; - u16_u8(mw, MAXSWUTF8L, u8, n); - thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low); - } else { - strcpy(mw, word); - for (int k = sp; k < n; k += 4) - *(mw + k) = '*'; - thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low); - } - } - thresh = thresh / 3; - thresh--; - - // now expand affixes on each of these root words and - // and use length adjusted ngram scores to select - // possible suggestions - char* guess[MAX_GUESS]; - char* guessorig[MAX_GUESS]; - int gscore[MAX_GUESS]; - for (i = 0; i < MAX_GUESS; i++) { - guess[i] = NULL; - guessorig[i] = NULL; - gscore[i] = -100 * i; - } - - lp = MAX_GUESS - 1; - - struct guessword* glst; - glst = (struct guessword*)calloc(MAX_WORDS, sizeof(struct guessword)); - if (!glst) { - if (nonbmp) - utf8 = 1; - return ns; - } - - for (i = 0; i < MAX_ROOTS; i++) { - if (roots[i]) { - struct hentry* rp = roots[i]; - int nw = pAMgr->expand_rootword( - glst, MAX_WORDS, HENTRY_WORD(rp), rp->blen, rp->astr, rp->alen, word, - nc, - ((rp->var & H_OPT_PHON) ? copy_field(f, HENTRY_DATA(rp), MORPH_PHON) - : NULL)); - - for (int k = 0; k < nw; k++) { - sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH + low) + - leftcommonsubstring(word, glst[k].word); - - if (sc > thresh) { - if (sc > gscore[lp]) { - if (guess[lp]) { - free(guess[lp]); - if (guessorig[lp]) { - free(guessorig[lp]); - guessorig[lp] = NULL; - } - } - gscore[lp] = sc; - guess[lp] = glst[k].word; - guessorig[lp] = glst[k].orig; - lval = sc; - for (j = 0; j < MAX_GUESS; j++) - if (gscore[j] < lval) { - lp = j; - lval = gscore[j]; - } - } else { - free(glst[k].word); - if (glst[k].orig) - free(glst[k].orig); - } - } else { - free(glst[k].word); - if (glst[k].orig) - free(glst[k].orig); - } - } - } - } - free(glst); - - // now we are done generating guesses - // sort in order of decreasing score - - bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS); - if (ph) - bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS); - - // weight suggestions with a similarity index, based on - // the longest common subsequent algorithm and resort - - int is_swap = 0; - int re = 0; - double fact = 1.0; - if (pAMgr) { - int maxd = pAMgr->get_maxdiff(); - if (maxd >= 0) - fact = (10.0 - maxd) / 5.0; - } - - for (i = 0; i < MAX_GUESS; i++) { - if (guess[i]) { - // lowering guess[i] - std::string gl; - int len; - if (utf8) { - std::vector _w; - len = u8_u16(_w, guess[i]); - mkallsmall_utf(_w, len, langnum); - u16_u8(gl, _w); - } else { - gl.assign(guess[i]); - if (!nonbmp) - mkallsmall(gl, csconv); - len = strlen(guess[i]); - } - - int _lcs = lcslen(word, gl.c_str()); - - // same characters with different casing - if ((n == len) && (n == _lcs)) { - gscore[i] += 2000; - break; - } - // using 2-gram instead of 3, and other weightening - - re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) + - ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED); - - gscore[i] = - // length of longest common subsequent minus length difference - 2 * _lcs - abs((int)(n - len)) + - // weight length of the left common substring - leftcommonsubstring(word, gl.c_str()) + - // weight equal character positions - (!nonbmp && commoncharacterpositions(word, gl.c_str(), &is_swap) - ? 1 - : 0) + - // swap character (not neighboring) - ((is_swap) ? 10 : 0) + - // ngram - ngram(4, word, gl, NGRAM_ANY_MISMATCH + low) + - // weighted ngrams - re + - // different limit for dictionaries with PHONE rules - (ph ? (re < len * fact ? -1000 : 0) - : (re < (n + len) * fact ? -1000 : 0)); - } - } - - bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS); - - // phonetic version - if (ph) - for (i = 0; i < MAX_ROOTS; i++) { - if (rootsphon[i]) { - // lowering rootphon[i] - std::string gl; - int len; - if (utf8) { - std::vector _w; - len = u8_u16(_w, rootsphon[i]); - mkallsmall_utf(_w, len, langnum); - u16_u8(gl, _w); - } else { - gl.assign(rootsphon[i]); - if (!nonbmp) - mkallsmall(gl, csconv); - len = strlen(rootsphon[i]); - } - - // heuristic weigthing of ngram scores - scoresphon[i] += 2 * lcslen(word, gl) - abs((int)(n - len)) + - // weight length of the left common substring - leftcommonsubstring(word, gl.c_str()); - } - } - - if (ph) - bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS); - - // copy over - int oldns = ns; - - int same = 0; - for (i = 0; i < MAX_GUESS; i++) { - if (guess[i]) { - if ((ns < oldns + maxngramsugs) && (ns < maxSug) && - (!same || (gscore[i] > 1000))) { - int unique = 1; - // leave only excellent suggestions, if exists - if (gscore[i] > 1000) - same = 1; - else if (gscore[i] < -100) { - same = 1; - // keep the best ngram suggestions, unless in ONLYMAXDIFF mode - if (ns > oldns || (pAMgr && pAMgr->get_onlymaxdiff())) { - free(guess[i]); - if (guessorig[i]) - free(guessorig[i]); - continue; - } - } - for (j = 0; j < ns; j++) { - // don't suggest previous suggestions or a previous suggestion with - // prefixes or affixes - if ((!guessorig[i] && strstr(guess[i], wlst[j])) || - (guessorig[i] && strstr(guessorig[i], wlst[j])) || - // check forbidden words - !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) { - unique = 0; - break; - } - } - if (unique) { - wlst[ns++] = guess[i]; - if (guessorig[i]) { - free(guess[i]); - wlst[ns - 1] = guessorig[i]; - } - } else { - free(guess[i]); - if (guessorig[i]) - free(guessorig[i]); - } - } else { - free(guess[i]); - if (guessorig[i]) - free(guessorig[i]); - } - } - } - - oldns = ns; - if (ph) - for (i = 0; i < MAX_ROOTS; i++) { - if (rootsphon[i]) { - if ((ns < oldns + MAXPHONSUGS) && (ns < maxSug)) { - int unique = 1; - for (j = 0; j < ns; j++) { - // don't suggest previous suggestions or a previous suggestion with - // prefixes or affixes - if (strstr(rootsphon[i], wlst[j]) || - // check forbidden words - !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) { - unique = 0; - break; - } - } - if (unique) { - wlst[ns++] = mystrdup(rootsphon[i]); - if (!wlst[ns - 1]) - return ns - 1; - } - } - } - } - - if (nonbmp) - utf8 = 1; - return ns; -} - -// see if a candidate suggestion is spelled correctly -// needs to check both root words and words with affixes - -// obsolote MySpell-HU modifications: -// return value 2 and 3 marks compounding with hyphen (-) -// `3' marks roots without suffix -int SuggestMgr::checkword(const char* word, - int len, - int cpdsuggest, - int* timer, - clock_t* timelimit) { - struct hentry* rv = NULL; - struct hentry* rv2 = NULL; - int nosuffix = 0; - - // check time limit - if (timer) { - (*timer)--; - if (!(*timer) && timelimit) { - if ((clock() - *timelimit) > TIMELIMIT) - return 0; - *timer = MAXPLUSTIMER; - } - } - - if (pAMgr) { - if (cpdsuggest == 1) { - if (pAMgr->get_compound()) { - rv = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 1, - 0); // EXT - if (rv && - (!(rv2 = pAMgr->lookup(word)) || !rv2->astr || - !(TESTAFF(rv2->astr, pAMgr->get_forbiddenword(), rv2->alen) || - TESTAFF(rv2->astr, pAMgr->get_nosuggest(), rv2->alen)))) - return 3; // XXX obsolote categorisation + only ICONV needs affix - // flag check? - } - return 0; - } - - rv = pAMgr->lookup(word); - - if (rv) { - if ((rv->astr) && - (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) || - TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen))) - return 0; - while (rv) { - if (rv->astr && - (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || - TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || - TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) { - rv = rv->next_homonym; - } else - break; - } - } else - rv = pAMgr->prefix_check(word, len, - 0); // only prefix, and prefix + suffix XXX - - if (rv) { - nosuffix = 1; - } else { - rv = pAMgr->suffix_check(word, len, 0, NULL, NULL, 0, - NULL); // only suffix - } - - if (!rv && pAMgr->have_contclass()) { - rv = pAMgr->suffix_check_twosfx(word, len, 0, NULL, FLAG_NULL); - if (!rv) - rv = pAMgr->prefix_check_twosfx(word, len, 1, FLAG_NULL); - } - - // check forbidden words - if ((rv) && (rv->astr) && - (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) || - TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || - TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen) || - TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) - return 0; - - if (rv) { // XXX obsolote - if ((pAMgr->get_compoundflag()) && - TESTAFF(rv->astr, pAMgr->get_compoundflag(), rv->alen)) - return 2 + nosuffix; - return 1; - } - } - return 0; -} - -int SuggestMgr::check_forbidden(const char* word, int len) { - struct hentry* rv = NULL; - - if (pAMgr) { - rv = pAMgr->lookup(word); - if (rv && rv->astr && - (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || - TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) - rv = NULL; - if (!(pAMgr->prefix_check(word, len, 1))) - rv = pAMgr->suffix_check(word, len, 0, NULL, NULL, 0, - NULL); // prefix+suffix, suffix - // check forbidden words - if ((rv) && (rv->astr) && - TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen)) - return 1; - } - return 0; -} - -#ifdef HUNSPELL_EXPERIMENTAL -// suggest possible stems -int SuggestMgr::suggest_pos_stems(char*** slst, const char* w, int nsug) { - char** wlst; - - struct hentry* rv = NULL; - - char w2[MAXSWUTF8L]; - const char* word = w; - - // word reversing wrapper for complex prefixes - if (complexprefixes) { - strcpy(w2, w); - if (utf8) - reverseword_utf(w2); - else - reverseword(w2); - word = w2; - } - - int wl = strlen(word); - - if (*slst) { - wlst = *slst; - } else { - wlst = (char**)calloc(maxSug, sizeof(char*)); - if (wlst == NULL) - return -1; - } - - rv = pAMgr->suffix_check(word, wl, 0, NULL, wlst, maxSug, &nsug); - - // delete dash from end of word - if (nsug > 0) { - for (int j = 0; j < nsug; j++) { - if (wlst[j][strlen(wlst[j]) - 1] == '-') - wlst[j][strlen(wlst[j]) - 1] = '\0'; - } - } - - *slst = wlst; - return nsug; -} -#endif // END OF HUNSPELL_EXPERIMENTAL CODE - -char* SuggestMgr::suggest_morph(const char* w) { - char result[MAXLNLEN]; - char* r = (char*)result; - char* st; - - struct hentry* rv = NULL; - - *result = '\0'; - - if (!pAMgr) - return NULL; - - std::string w2; - const char* word = w; - - // word reversing wrapper for complex prefixes - if (complexprefixes) { - w2.assign(w); - if (utf8) - reverseword_utf(w2); - else - reverseword(w2); - word = w2.c_str(); - } - - rv = pAMgr->lookup(word); - - while (rv) { - if ((!rv->astr) || - !(TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) || - TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || - TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) { - if (!HENTRY_FIND(rv, MORPH_STEM)) { - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, MORPH_STEM, MAXLNLEN); - mystrcat(result, word, MAXLNLEN); - } - if (HENTRY_DATA(rv)) { - mystrcat(result, " ", MAXLNLEN); - mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); - } - mystrcat(result, "\n", MAXLNLEN); - } - rv = rv->next_homonym; - } - - st = pAMgr->affix_check_morph(word, strlen(word)); - if (st) { - mystrcat(result, st, MAXLNLEN); - free(st); - } - - if (pAMgr->get_compound() && (*result == '\0')) - pAMgr->compound_check_morph(word, strlen(word), 0, 0, 100, 0, NULL, 0, &r, - NULL); - - return (*result) ? mystrdup(line_uniq(result, MSEP_REC)) : NULL; -} - -#ifdef HUNSPELL_EXPERIMENTAL -char* SuggestMgr::suggest_morph_for_spelling_error(const char* word) { - char* p = NULL; - char** wlst = (char**)calloc(maxSug, sizeof(char*)); - if (!**wlst) - return NULL; - // we will use only the first suggestion - for (int i = 0; i < maxSug - 1; i++) - wlst[i] = ""; - int ns = suggest(&wlst, word, maxSug - 1, NULL); - if (ns == maxSug) { - p = suggest_morph(wlst[maxSug - 1]); - free(wlst[maxSug - 1]); - } - if (wlst) - free(wlst); - return p; -} -#endif // END OF HUNSPELL_EXPERIMENTAL CODE - -/* affixation */ -char* SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { - char result[MAXLNLEN]; - *result = '\0'; - int sfxcount = get_sfxcount(pattern); - - if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) - return NULL; - - if (HENTRY_DATA(rv)) { - char* aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen, - HENTRY_DATA(rv), pattern, 0); - if (aff) { - mystrcat(result, aff, MAXLNLEN); - mystrcat(result, "\n", MAXLNLEN); - free(aff); - } - } - - // check all allomorphs - char allomorph[MAXLNLEN]; - char* p = NULL; - if (HENTRY_DATA(rv)) - p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH); - while (p) { - struct hentry* rv2 = NULL; - p += MORPH_TAG_LEN; - int plen = fieldlen(p); - strncpy(allomorph, p, plen); - allomorph[plen] = '\0'; - rv2 = pAMgr->lookup(allomorph); - while (rv2) { - // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <= - // sfxcount) { - if (HENTRY_DATA(rv2)) { - char* st = (char*)strstr(HENTRY_DATA2(rv2), MORPH_STEM); - if (st && (strncmp(st + MORPH_TAG_LEN, HENTRY_WORD(rv), - fieldlen(st + MORPH_TAG_LEN)) == 0)) { - char* aff = pAMgr->morphgen(HENTRY_WORD(rv2), rv2->blen, rv2->astr, - rv2->alen, HENTRY_DATA(rv2), pattern, 0); - if (aff) { - mystrcat(result, aff, MAXLNLEN); - mystrcat(result, "\n", MAXLNLEN); - free(aff); - } - } - } - rv2 = rv2->next_homonym; - } - p = strstr(p + plen, MORPH_ALLOMORPH); - } - - return (*result) ? mystrdup(result) : NULL; -} - -char* SuggestMgr::suggest_gen(char** desc, int n, const char* pattern) { - if (n == 0 || !pAMgr) - return NULL; - - char result[MAXLNLEN]; - char result2[MAXLNLEN]; - std::string newpattern; - *result2 = '\0'; - struct hentry* rv = NULL; - - // search affixed forms with and without derivational suffixes - while (1) { - for (int k = 0; k < n; k++) { - *result = '\0'; - // add compound word parts (except the last one) - char* s = (char*)desc[k]; - char* part = strstr(s, MORPH_PART); - if (part) { - char* nextpart = strstr(part + 1, MORPH_PART); - while (nextpart) { - copy_field(result + strlen(result), part, MORPH_PART); - part = nextpart; - nextpart = strstr(part + 1, MORPH_PART); - } - s = part; - } - - char** pl; - std::string tok(s); - size_t pos = tok.find(" | "); - while (pos != std::string::npos) { - tok[pos + 1] = MSEP_ALT; - pos = tok.find(" | ", pos); - } - int pln = line_tok(tok.c_str(), &pl, MSEP_ALT); - for (int i = 0; i < pln; i++) { - // remove inflectional and terminal suffixes - char* is = strstr(pl[i], MORPH_INFL_SFX); - if (is) - *is = '\0'; - char* ts = strstr(pl[i], MORPH_TERM_SFX); - while (ts) { - *ts = '_'; - ts = strstr(pl[i], MORPH_TERM_SFX); - } - char* st = strstr(s, MORPH_STEM); - if (st) { - copy_field(tok, st, MORPH_STEM); - rv = pAMgr->lookup(tok.c_str()); - while (rv) { - std::string newpat(pl[i]); - newpat.append(pattern); - char* sg = suggest_hentry_gen(rv, newpat.c_str()); - if (!sg) - sg = suggest_hentry_gen(rv, pattern); - if (sg) { - char** gen; - int genl = line_tok(sg, &gen, MSEP_REC); - free(sg); - sg = NULL; - for (int j = 0; j < genl; j++) { - if (strstr(pl[i], MORPH_SURF_PFX)) { - int r2l = strlen(result2); - result2[r2l] = MSEP_REC; - strcpy(result2 + r2l + 1, result); - copy_field(result2 + strlen(result2), pl[i], MORPH_SURF_PFX); - mystrcat(result2, gen[j], MAXLNLEN); - } else { - sprintf(result2 + strlen(result2), "%c%s%s", MSEP_REC, result, - gen[j]); - } - } - freelist(&gen, genl); - } - rv = rv->next_homonym; - } - } - } - freelist(&pl, pln); - } - - if (*result2 || !strstr(pattern, MORPH_DERI_SFX)) - break; - - newpattern.assign(pattern); - mystrrep(newpattern, MORPH_DERI_SFX, MORPH_TERM_SFX); - pattern = newpattern.c_str(); - } - return (*result2 ? mystrdup(result2) : NULL); -} - -// generate an n-gram score comparing s1 and s2 -int SuggestMgr::ngram(int n, - const std::string& s1, - const std::string& s2, - int opt) { - int nscore = 0; - int ns; - int l1; - int l2; - int test = 0; - - if (utf8) { - std::vector su1; - std::vector su2; - l1 = u8_u16(su1, s1); - l2 = u8_u16(su2, s2); - if ((l2 <= 0) || (l1 == -1)) - return 0; - // lowering dictionary word - if (opt & NGRAM_LOWERING) - mkallsmall_utf(su2, l2, langnum); - for (int j = 1; j <= n; j++) { - ns = 0; - for (int i = 0; i <= (l1 - j); i++) { - int k = 0; - for (int l = 0; l <= (l2 - j); l++) { - for (k = 0; k < j; k++) { - w_char& c1 = su1[i + k]; - w_char& c2 = su2[l + k]; - if ((c1.l != c2.l) || (c1.h != c2.h)) - break; - } - if (k == j) { - ns++; - break; - } - } - if (k != j && opt & NGRAM_WEIGHTED) { - ns--; - test++; - if (i == 0 || i == l1 - j) - ns--; // side weight - } - } - nscore = nscore + ns; - if (ns < 2 && !(opt & NGRAM_WEIGHTED)) - break; - } - } else { - l2 = s2.size(); - if (l2 == 0) - return 0; - l1 = s1.size(); - std::string t(s2); - if (opt & NGRAM_LOWERING) - mkallsmall(t, csconv); - for (int j = 1; j <= n; j++) { - ns = 0; - for (int i = 0; i <= (l1 - j); i++) { - std::string temp(s1.substr(i, j)); - if (t.find(temp) != std::string::npos) { - ns++; - } else if (opt & NGRAM_WEIGHTED) { - ns--; - test++; - if (i == 0 || i == l1 - j) - ns--; // side weight - } - } - nscore = nscore + ns; - if (ns < 2 && !(opt & NGRAM_WEIGHTED)) - break; - } - } - - ns = 0; - if (opt & NGRAM_LONGER_WORSE) - ns = (l2 - l1) - 2; - if (opt & NGRAM_ANY_MISMATCH) - ns = abs(l2 - l1) - 2; - ns = (nscore - ((ns > 0) ? ns : 0)); - return ns; -} - -// length of the left common substring of s1 and (decapitalised) s2 -int SuggestMgr::leftcommonsubstring(const char* s1, const char* s2) { - if (utf8) { - w_char su1[MAXSWL]; - w_char su2[MAXSWL]; - su1[0].l = su2[0].l = su1[0].h = su2[0].h = 0; - // decapitalize dictionary word - if (complexprefixes) { - int l1 = u8_u16(su1, MAXSWL, s1); - int l2 = u8_u16(su2, MAXSWL, s2); - if (*((short*)su1 + l1 - 1) == *((short*)su2 + l2 - 1)) - return 1; - } else { - int i; - u8_u16(su1, 1, s1); - u8_u16(su2, 1, s2); - unsigned short idx = (su2->h << 8) + su2->l; - unsigned short otheridx = (su1->h << 8) + su1->l; - if (otheridx != idx && (otheridx != unicodetolower(idx, langnum))) - return 0; - int l1 = u8_u16(su1, MAXSWL, s1); - int l2 = u8_u16(su2, MAXSWL, s2); - for (i = 1; (i < l1) && (i < l2) && (su1[i].l == su2[i].l) && - (su1[i].h == su2[i].h); - i++) - ; - return i; - } - } else { - if (complexprefixes) { - int l1 = strlen(s1); - int l2 = strlen(s2); - if (*(s2 + l1 - 1) == *(s2 + l2 - 1)) - return 1; - } else { - const char* olds = s1; - // decapitalise dictionary word - if ((*s1 != *s2) && (*s1 != csconv[((unsigned char)*s2)].clower)) - return 0; - do { - s1++; - s2++; - } while ((*s1 == *s2) && (*s1 != '\0')); - return (int)(s1 - olds); - } - } - return 0; -} - -int SuggestMgr::commoncharacterpositions(const char* s1, - const char* s2, - int* is_swap) { - int num = 0; - int diff = 0; - int diffpos[2]; - *is_swap = 0; - if (utf8) { - w_char su1[MAXSWL]; - w_char su2[MAXSWL]; - int l1 = u8_u16(su1, MAXSWL, s1); - int l2 = u8_u16(su2, MAXSWL, s2); - - if (l1 <= 0 || l2 <= 0) - return 0; - - // decapitalize dictionary word - if (complexprefixes) { - mkallsmall_utf(su2 + l2 - 1, 1, langnum); - } else { - mkallsmall_utf(su2, 1, langnum); - } - for (int i = 0; (i < l1) && (i < l2); i++) { - if (((short*)su1)[i] == ((short*)su2)[i]) { - num++; - } else { - if (diff < 2) - diffpos[diff] = i; - diff++; - } - } - if ((diff == 2) && (l1 == l2) && - (((short*)su1)[diffpos[0]] == ((short*)su2)[diffpos[1]]) && - (((short*)su1)[diffpos[1]] == ((short*)su2)[diffpos[0]])) - *is_swap = 1; - } else { - size_t i; - std::string t(s2); - // decapitalize dictionary word - if (complexprefixes) { - size_t l2 = t.size(); - t[l2 - 1] = csconv[(unsigned char)t[l2 - 1]].clower; - } else { - mkallsmall(t, csconv); - } - for (i = 0; (*(s1 + i) != 0) && i < t.size(); i++) { - if (*(s1 + i) == t[i]) { - num++; - } else { - if (diff < 2) - diffpos[diff] = i; - diff++; - } - } - if ((diff == 2) && (*(s1 + i) == 0) && i == t.size() && - (*(s1 + diffpos[0]) == t[diffpos[1]]) && - (*(s1 + diffpos[1]) == t[diffpos[0]])) - *is_swap = 1; - } - return num; -} - -int SuggestMgr::mystrlen(const char* word) { - if (utf8) { - w_char w[MAXSWL]; - return u8_u16(w, MAXSWL, word); - } else - return strlen(word); -} - -// sort in decreasing order of score -void SuggestMgr::bubblesort(char** rword, char** rword2, int* rsc, int n) { - int m = 1; - while (m < n) { - int j = m; - while (j > 0) { - if (rsc[j - 1] < rsc[j]) { - int sctmp = rsc[j - 1]; - char* wdtmp = rword[j - 1]; - rsc[j - 1] = rsc[j]; - rword[j - 1] = rword[j]; - rsc[j] = sctmp; - rword[j] = wdtmp; - if (rword2) { - wdtmp = rword2[j - 1]; - rword2[j - 1] = rword2[j]; - rword2[j] = wdtmp; - } - j--; - } else - break; - } - m++; - } - return; -} - -// longest common subsequence -void SuggestMgr::lcs(const char* s, - const char* s2, - int* l1, - int* l2, - char** result) { - int n, m; - w_char su[MAXSWL]; - w_char su2[MAXSWL]; - char* b; - char* c; - int i; - int j; - if (utf8) { - m = u8_u16(su, MAXSWL, s); - n = u8_u16(su2, MAXSWL, s2); - } else { - m = strlen(s); - n = strlen(s2); - } - c = (char*)malloc((m + 1) * (n + 1)); - b = (char*)malloc((m + 1) * (n + 1)); - if (!c || !b) { - if (c) - free(c); - if (b) - free(b); - *result = NULL; - return; - } - for (i = 1; i <= m; i++) - c[i * (n + 1)] = 0; - for (j = 0; j <= n; j++) - c[j] = 0; - for (i = 1; i <= m; i++) { - for (j = 1; j <= n; j++) { - if (((utf8) && (*((short*)su + i - 1) == *((short*)su2 + j - 1))) || - ((!utf8) && ((*(s + i - 1)) == (*(s2 + j - 1))))) { - c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1; - b[i * (n + 1) + j] = LCS_UPLEFT; - } else if (c[(i - 1) * (n + 1) + j] >= c[i * (n + 1) + j - 1]) { - c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j]; - b[i * (n + 1) + j] = LCS_UP; - } else { - c[i * (n + 1) + j] = c[i * (n + 1) + j - 1]; - b[i * (n + 1) + j] = LCS_LEFT; - } - } - } - *result = b; - free(c); - *l1 = m; - *l2 = n; -} - -int SuggestMgr::lcslen(const char* s, const char* s2) { - int m; - int n; - int i; - int j; - char* result; - int len = 0; - lcs(s, s2, &m, &n, &result); - if (!result) - return 0; - i = m; - j = n; - while ((i != 0) && (j != 0)) { - if (result[i * (n + 1) + j] == LCS_UPLEFT) { - len++; - i--; - j--; - } else if (result[i * (n + 1) + j] == LCS_UP) { - i--; - } else - j--; - } - free(result); - return len; -} - -int SuggestMgr::lcslen(const std::string& s, const std::string& s2) { - return lcslen(s.c_str(), s2.c_str()); -} -- cgit v1.2.3