From d5dfa90eec9c108f86b9a0aa3f746a5b164c6649 Mon Sep 17 00:00:00 2001 From: Tobias Weimer Date: Sun, 20 Jul 2014 08:01:05 +0000 Subject: SpellChecker: -Updated Hunspell to 1.3.3 -Create services in Load() git-svn-id: http://svn.miranda-ng.org/main/trunk@9876 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c --- plugins/SpellChecker/src/hunspell/hunspell.cxx | 113 ++++++++++++++----------- 1 file changed, 65 insertions(+), 48 deletions(-) (limited to 'plugins/SpellChecker/src/hunspell/hunspell.cxx') diff --git a/plugins/SpellChecker/src/hunspell/hunspell.cxx b/plugins/SpellChecker/src/hunspell/hunspell.cxx index 03f8cb42c8..b5dcfd57fb 100644 --- a/plugins/SpellChecker/src/hunspell/hunspell.cxx +++ b/plugins/SpellChecker/src/hunspell/hunspell.cxx @@ -1,4 +1,4 @@ -#include "..\commons.h" +#include "..\commons.h" #ifndef MOZILLA_CLIENT # include "config.h" @@ -234,25 +234,10 @@ int Hunspell::mkallcap2(char * p, w_char * u, int nc) void Hunspell::mkallsmall(char * p) { - if (utf8) { - w_char u[MAXWORDLEN]; - int nc = u8_u16(u, MAXWORDLEN, p); - unsigned short idx; - for (int i = 0; i < nc; i++) { - idx = (u[i].h << 8) + u[i].l; - unsigned short low = unicodetolower(idx, langnum); - if (idx != low) { - u[i].h = (unsigned char) (low >> 8); - u[i].l = (unsigned char) (low & 0x00FF); - } - } - u16_u8(p, MAXWORDUTF8LEN, u, nc); - } else { while (*p != '\0') { *p = csconv[((unsigned char) *p)].clower; p++; } - } } int Hunspell::mkallsmall2(char * p, w_char * u, int nc) @@ -335,6 +320,10 @@ int Hunspell::spell(const char * word, int * info, char ** root) char cw[MAXWORDUTF8LEN]; char wspace[MAXWORDUTF8LEN]; w_char unicw[MAXWORDLEN]; + + int info2 = 0; + if (!info) info = &info2; else *info = 0; + // Hunspell supports XML input of the simplified API (see manual) if (strcmp(word, SPELL_XML) == 0) return 1; int nc = strlen(word); @@ -353,7 +342,6 @@ int Hunspell::spell(const char * word, int * info, char ** root) if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - int info2 = 0; if (wl == 0 || maxdic == 0) return 1; if (root) *root = NULL; @@ -371,13 +359,14 @@ int Hunspell::spell(const char * word, int * info, char ** root) } else break; } if ((i == wl) && (nstate == NNUM)) return 1; - if (!info) info = &info2; else *info = 0; switch(captype) { case HUHCAP: + /* FALLTHROUGH */ case HUHINITCAP: *info += SPELL_ORIGCAP; - case NOCAP: { + /* FALLTHROUGH */ + case NOCAP: rv = checkword(cw, info, root); if ((abbv) && !(rv)) { memcpy(wspace,cw,wl); @@ -386,7 +375,6 @@ int Hunspell::spell(const char * word, int * info, char ** root) rv = checkword(wspace, info, root); } break; - } case ALLCAP: { *info += SPELL_ORIGCAP; rv = checkword(cw, info, root); @@ -410,7 +398,7 @@ int Hunspell::spell(const char * word, int * info, char ** root) *apostrophe = '\0'; wl2 = u8_u16(tmpword, MAXWORDLEN, cw); *apostrophe = '\''; - if (wl2 < nc) { + if (wl2 >= 0 && wl2 < nc) { mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1); rv = checkword(cw, info, root); if (rv) break; @@ -757,19 +745,28 @@ int Hunspell::suggest(char*** slst, const char * word) char * dot = strchr(cw, '.'); if (dot && (dot > cw)) { int captype_; - if (utf8) { + if (utf8) + { w_char w_[MAXWORDLEN]; int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1); captype_ = get_captype_utf8(w_, wl_, langnum); } else captype_ = get_captype(dot+1, strlen(dot+1), csconv); - if (captype_ == INITCAP) { + if (captype_ == INITCAP) + { char * st = mystrdup(cw); - if (st) st = (char *) realloc(st, wl + 2); - if (st) { - st[(dot - cw) + 1] = ' '; - strcpy(st + (dot - cw) + 2, dot + 1); - ns = insert_sug(slst, st, ns); - free(st); + if (st) + { + char *newst = (char *) realloc(st, wl + 2); + if (newst == NULL) + free(st); + st = newst; + } + if (st) + { + st[(dot - cw) + 1] = ' '; + strcpy(st + (dot - cw) + 2, dot + 1); + ns = insert_sug(slst, st, ns); + free(st); } } } @@ -855,7 +852,7 @@ int Hunspell::suggest(char*** slst, const char * word) *pos = '\0'; strcpy(w, (*slst)[j]); strcat(w, pos + 1); - spell(w, &info, NULL); + (void)spell(w, &info, NULL); if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { *pos = ' '; } else *pos = '-'; @@ -1682,6 +1679,13 @@ int Hunspell::get_langnum() const return langnum; } +int Hunspell::input_conv(const char * word, char * dest) +{ + RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + return (rl && rl->conv(word, dest)); +} + + // return the beginning of the element (attr == NULL) or the attribute const char * Hunspell::get_xml_pos(const char * s, const char * attr) { @@ -1706,11 +1710,11 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) { int n = 0; char * p; if (!list) return 0; - for (p = list; (p = strstr(p, tag)); p++) n++; + for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++; if (n == 0) return 0; *slst = (char **) malloc(sizeof(char *) * n); if (!*slst) return 0; - for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) { + for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) { int l = strlen(p); (*slst)[n] = (char *) malloc(l + 1); if (!(*slst)[n]) return n; @@ -1722,6 +1726,19 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) { return n; } +namespace +{ + void myrep(std::string& str, const std::string& search, const std::string& replace) + { + size_t pos = 0; + while ((pos = str.find(search, pos)) != std::string::npos) + { + str.replace(pos, search.length(), replace); + pos += replace.length(); + } + } +} + int Hunspell::spellml(char*** slst, const char * word) { char *q, *q2; @@ -1733,26 +1750,26 @@ int Hunspell::spellml(char*** slst, const char * word) q2 = strstr(q2, "'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw); if (n == 0) return 0; // convert the result to ana1ana2 format - for (int i = 0; i < n; i++) s+= strlen((*slst)[i]); - char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->& - if (!r) return 0; - strcpy(r, ""); + std::string r; + r.append(""); for (int i = 0; i < n; i++) { - int l = strlen(r); - strcpy(r + l, ""); - strcpy(r + l + 3, (*slst)[i]); - mystrrep(r + l + 3, "\t", " "); - mystrrep(r + l + 3, "<", "<"); - mystrrep(r + l + 3, "&", "&"); - strcat(r, ""); + r.append(""); + + std::string entry((*slst)[i]); free((*slst)[i]); + myrep(entry, "\t", " "); + myrep(entry, "&", "&"); + myrep(entry, "<", "<"); + r.append(entry); + + r.append(""); } - strcat(r, ""); - (*slst)[0] = r; + r.append(""); + (*slst)[0] = mystrdup(r.c_str()); return 1; } else if (check_xml_par(q, "type=", "stem")) { if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw); @@ -1765,9 +1782,9 @@ int Hunspell::spellml(char*** slst, const char * word) return generate(slst, cw, cw2); } } else { - if ((q2 = strstr(q2 + 1, "'), ""))) { + if ((n = get_xml_list(&slst2, strchr(q2, '>'), "")) != 0) { int n2 = generate(slst, cw, slst2, n); freelist(&slst2, n); return uniqlist(*slst, n2); -- cgit v1.2.3