smaller unifired project for hunspell

git-svn-id: http://svn.miranda-ng.org/main/trunk@16202 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
author: George Hazan <george.hazan@gmail.com> 2016-01-31 16:14:52 +0000
committer: George Hazan <george.hazan@gmail.com> 2016-01-31 16:14:52 +0000
commit: 5707c6b2b1eafbf38ee7c14f39e42c2280d294ea (patch)
tree: cefc6ec223a91562809b3a885cbde5b44f4fc798 /libs/hunspell/src/dictmgr.c++
parent: 62767bfaf7f7fb988b826da797463545db14b3b5 (diff)
1 files changed, 216 insertions, 0 deletions
diff --git a/libs/hunspell/src/dictmgr.c++ b/libs/hunspell/src/dictmgr.c++
new file mode 100644
index 0000000000..473c09acfe
--- /dev/null
+++ b/libs/hunspell/src/dictmgr.c++
@@ -0,0 +1,216 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#include "dictmgr.hxx"
+#include "csutil.hxx"
+
+DictMgr::DictMgr(const char* dictpath, const char* etype) : numdict(0) {
+  // load list of etype entries
+  pdentry = (dictentry*)malloc(MAXDICTIONARIES * sizeof(struct dictentry));
+  if (pdentry) {
+    if (parse_file(dictpath, etype)) {
+      numdict = 0;
+      // no dictionary.lst found is okay
+    }
+  }
+}
+
+DictMgr::~DictMgr() {
+  dictentry* pdict = NULL;
+  if (pdentry) {
+    pdict = pdentry;
+    for (int i = 0; i < numdict; i++) {
+      if (pdict->lang) {
+        free(pdict->lang);
+        pdict->lang = NULL;
+      }
+      if (pdict->region) {
+        free(pdict->region);
+        pdict->region = NULL;
+      }
+      if (pdict->filename) {
+        free(pdict->filename);
+        pdict->filename = NULL;
+      }
+      pdict++;
+    }
+    free(pdentry);
+    pdentry = NULL;
+    pdict = NULL;
+  }
+  numdict = 0;
+}
+
+// read in list of etype entries and build up structure to describe them
+int DictMgr::parse_file(const char* dictpath, const char* etype) {
+  int i;
+  char line[MAXDICTENTRYLEN + 1];
+  dictentry* pdict = pdentry;
+
+  // open the dictionary list file
+  FILE* dictlst;
+  dictlst = myfopen(dictpath, "r");
+  if (!dictlst) {
+    return 1;
+  }
+
+  // step one is to parse the dictionary list building up the
+  // descriptive structures
+
+  // read in each line ignoring any that dont start with etype
+  while (fgets(line, MAXDICTENTRYLEN, dictlst)) {
+    mychomp(line);
+
+    /* parse in a dictionary entry */
+    if (strncmp(line, etype, 4) == 0) {
+      if (numdict < MAXDICTIONARIES) {
+        char* tp = line;
+        char* piece;
+        i = 0;
+        while ((piece = mystrsep(&tp, ' '))) {
+          if (*piece != '\0') {
+            switch (i) {
+              case 0:
+                break;
+              case 1:
+                pdict->lang = mystrdup(piece);
+                break;
+              case 2:
+                if (strcmp(piece, "ANY") == 0)
+                  pdict->region = mystrdup("");
+                else
+                  pdict->region = mystrdup(piece);
+                break;
+              case 3:
+                pdict->filename = mystrdup(piece);
+                break;
+              default:
+                break;
+            }
+            i++;
+          }
+          free(piece);
+        }
+        if (i == 4) {
+          numdict++;
+          pdict++;
+        } else {
+          switch (i) {
+            case 3:
+              free(pdict->region);
+              pdict->region = NULL;
+            /* FALLTHROUGH */
+            case 2:
+              free(pdict->lang);
+              pdict->lang = NULL;
+            default:
+              break;
+          }
+          fprintf(stderr, "dictionary list corruption in line \"%s\"\n", line);
+          fflush(stderr);
+        }
+      }
+    }
+  }
+  fclose(dictlst);
+  return 0;
+}
+
+// return text encoding of dictionary
+int DictMgr::get_list(dictentry** ppentry) {
+  *ppentry = pdentry;
+  return numdict;
+}
+
+// strip strings into token based on single char delimiter
+// acts like strsep() but only uses a delim char and not
+// a delim string
+
+char* DictMgr::mystrsep(char** stringp, const char delim) {
+  char* rv = NULL;
+  char* mp = *stringp;
+  size_t n = strlen(mp);
+  if (n > 0) {
+    char* dp = (char*)memchr(mp, (int)((unsigned char)delim), n);
+    if (dp) {
+      *stringp = dp + 1;
+      size_t nc = dp - mp;
+      rv = (char*)malloc(nc + 1);
+      if (rv) {
+        memcpy(rv, mp, nc);
+        *(rv + nc) = '\0';
+      }
+    } else {
+      rv = (char*)malloc(n + 1);
+      if (rv) {
+        memcpy(rv, mp, n);
+        *(rv + n) = '\0';
+        *stringp = mp + n;
+      }
+    }
+  }
+  return rv;
+}
+
+// replaces strdup with ansi version
+char* DictMgr::mystrdup(const char* s) {
+  char* d = NULL;
+  if (s) {
+    int sl = strlen(s) + 1;
+    d = (char*)malloc(sl);
+    if (d)
+      memcpy(d, s, sl);
+  }
+  return d;
+}
+
+// remove cross-platform text line end characters
+void DictMgr::mychomp(char* s) {
+  int k = strlen(s);
+  if ((k > 0) && ((*(s + k - 1) == '\r') || (*(s + k - 1) == '\n')))
+    *(s + k - 1) = '\0';
+  if ((k > 1) && (*(s + k - 2) == '\r'))
+    *(s + k - 2) = '\0';
+}
author	George Hazan <george.hazan@gmail.com>	2016-01-31 16:14:52 +0000
committer	George Hazan <george.hazan@gmail.com>	2016-01-31 16:14:52 +0000
commit	5707c6b2b1eafbf38ee7c14f39e42c2280d294ea (patch)
tree	cefc6ec223a91562809b3a885cbde5b44f4fc798 /libs/hunspell/src/dictmgr.c++
parent	62767bfaf7f7fb988b826da797463545db14b3b5 (diff)