summaryrefslogtreecommitdiff
path: root/libs/hunspell/src/hashmgr.c++
diff options
context:
space:
mode:
Diffstat (limited to 'libs/hunspell/src/hashmgr.c++')
-rw-r--r--libs/hunspell/src/hashmgr.c++193
1 files changed, 112 insertions, 81 deletions
diff --git a/libs/hunspell/src/hashmgr.c++ b/libs/hunspell/src/hashmgr.c++
index dbcf56a51c..c3cd95420f 100644
--- a/libs/hunspell/src/hashmgr.c++
+++ b/libs/hunspell/src/hashmgr.c++
@@ -76,6 +76,7 @@
#include <stdio.h>
#include <ctype.h>
#include <limits>
+#include <sstream>
#include "hashmgr.hxx"
#include "csutil.hxx"
@@ -101,8 +102,6 @@ HashMgr::HashMgr(const char* tpath, const char* apath, const char* key)
enc = NULL;
csconv = 0;
ignorechars = NULL;
- ignorechars_utf16 = NULL;
- ignorechars_utf16_len = 0;
load_config(apath, key);
int ec = load_tables(tpath, key);
if (ec) {
@@ -167,8 +166,6 @@ HashMgr::~HashMgr() {
if (ignorechars)
free(ignorechars);
- if (ignorechars_utf16)
- free(ignorechars_utf16);
#ifdef MOZILLA_CLIENT
delete[] csconv;
@@ -199,28 +196,56 @@ int HashMgr::add_word(const char* word,
int al,
const char* desc,
bool onlyupcase) {
+
+ std::string *word_copy = NULL;
+ std::string *desc_copy = NULL;
+ if (ignorechars || complexprefixes) {
+ word_copy = new std::string(word, wbl);
+
+ if (ignorechars != NULL) {
+ if (utf8) {
+ wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16);
+ } else {
+ remove_ignored_chars(*word_copy, ignorechars);
+ }
+ }
+
+ if (complexprefixes) {
+ if (utf8)
+ wcl = reverseword_utf(*word_copy);
+ else
+ reverseword(*word_copy);
+
+ if (desc && !aliasm) {
+ desc_copy = new std::string(desc);
+
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(*desc_copy);
+ else
+ reverseword(*desc_copy);
+ }
+ desc = desc_copy->c_str();
+ }
+ }
+
+ wbl = word_copy->size();
+ word = word_copy->c_str();
+ }
+
bool upcasehomonym = false;
int descl = desc ? (aliasm ? sizeof(char*) : strlen(desc) + 1) : 0;
// variable-length hash record with word and optional fields
struct hentry* hp =
(struct hentry*)malloc(sizeof(struct hentry) + wbl + descl);
- if (!hp)
+ if (!hp) {
+ delete desc_copy;
+ delete word_copy;
return 1;
+ }
+
char* hpw = hp->word;
strcpy(hpw, word);
- if (ignorechars != NULL) {
- if (utf8) {
- remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);
- } else {
- remove_ignored_chars(hpw, ignorechars);
- }
- }
- if (complexprefixes) {
- if (utf8)
- reverseword_utf(hpw);
- else
- reverseword(hpw);
- }
int i = hash(hpw);
@@ -239,12 +264,6 @@ int HashMgr::add_word(const char* word,
store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));
} else {
strcpy(hpw + wbl + 1, desc);
- if (complexprefixes) {
- if (utf8)
- reverseword_utf(HENTRY_DATA(hp));
- else
- reverseword(HENTRY_DATA(hp));
- }
}
if (strstr(HENTRY_DATA(hp), MORPH_PHON))
hp->var += H_OPT_PHON;
@@ -254,6 +273,8 @@ int HashMgr::add_word(const char* word,
struct hentry* dp = tableptr[i];
if (!dp) {
tableptr[i] = hp;
+ delete desc_copy;
+ delete word_copy;
return 0;
}
while (dp->next != NULL) {
@@ -265,6 +286,8 @@ int HashMgr::add_word(const char* word,
dp->astr = hp->astr;
dp->alen = hp->alen;
free(hp);
+ delete desc_copy;
+ delete word_copy;
return 0;
} else {
dp->next_homonym = hp;
@@ -283,6 +306,8 @@ int HashMgr::add_word(const char* word,
dp->astr = hp->astr;
dp->alen = hp->alen;
free(hp);
+ delete desc_copy;
+ delete word_copy;
return 0;
} else {
dp->next_homonym = hp;
@@ -299,11 +324,13 @@ int HashMgr::add_word(const char* word,
free(hp->astr);
free(hp);
}
+
+ delete desc_copy;
+ delete word_copy;
return 0;
}
-int HashMgr::add_hidden_capitalized_word(char* word,
- int wbl,
+int HashMgr::add_hidden_capitalized_word(const std::string& word,
int wcl,
unsigned short* flags,
int flagslen,
@@ -326,32 +353,34 @@ int HashMgr::add_hidden_capitalized_word(char* word,
memcpy(flags2, flags, flagslen * sizeof(unsigned short));
flags2[flagslen] = ONLYUPCASEFLAG;
if (utf8) {
- char st[BUFSIZE];
- w_char w[BUFSIZE];
- int wlen = u8_u16(w, BUFSIZE, word);
- mkallsmall_utf(w, wlen, langnum);
- mkallcap_utf(w, 1, langnum);
- u16_u8(st, BUFSIZE, w, wlen);
- return add_word(st, wbl, wcl, flags2, flagslen + 1, dp, true);
+ std::string st;
+ std::vector<w_char> w;
+ u8_u16(w, word);
+ mkallsmall_utf(w, langnum);
+ mkinitcap_utf(w, langnum);
+ u16_u8(st, w);
+ return add_word(st.c_str(), st.size(), wcl, flags2, flagslen + 1, dp, true);
} else {
- mkallsmall(word, csconv);
- mkinitcap(word, csconv);
- return add_word(word, wbl, wcl, flags2, flagslen + 1, dp, true);
+ std::string new_word(word);
+ mkallsmall(new_word, csconv);
+ mkinitcap(new_word, csconv);
+ int ret = add_word(new_word.c_str(), new_word.size(), wcl, flags2, flagslen + 1, dp, true);
+ return ret;
}
}
return 0;
}
// detect captype and modify word length for UTF-8 encoding
-int HashMgr::get_clen_and_captype(const char* word, int wbl, int* captype) {
+int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
int len;
if (utf8) {
- w_char dest_utf[BUFSIZE];
- len = u8_u16(dest_utf, BUFSIZE, word);
- *captype = get_captype_utf8(dest_utf, len, langnum);
+ std::vector<w_char> dest_utf;
+ len = u8_u16(dest_utf, word);
+ *captype = get_captype_utf8(dest_utf, langnum);
} else {
- len = wbl;
- *captype = get_captype((char*)word, len, csconv);
+ len = word.size();
+ *captype = get_captype(word, csconv);
}
return len;
}
@@ -370,7 +399,7 @@ int HashMgr::remove(const char* word) {
flags[dp->alen] = forbiddenword;
dp->astr = flags;
dp->alen++;
- flag_qsort(flags, 0, dp->alen);
+ std::sort(flags, flags + dp->alen);
}
dp = dp->next_homonym;
}
@@ -378,8 +407,8 @@ int HashMgr::remove(const char* word) {
}
/* remove forbidden flag to add a personal word to the hash */
-int HashMgr::remove_forbidden_flag(const char* word) {
- struct hentry* dp = lookup(word);
+int HashMgr::remove_forbidden_flag(const std::string& word) {
+ struct hentry* dp = lookup(word.c_str());
if (!dp)
return 1;
while (dp) {
@@ -406,15 +435,15 @@ int HashMgr::remove_forbidden_flag(const char* word) {
}
// add a custom dic. word to the hash table (public)
-int HashMgr::add(const char* word) {
+int HashMgr::add(const std::string& word) {
unsigned short* flags = NULL;
int al = 0;
if (remove_forbidden_flag(word)) {
int captype;
- int wbl = strlen(word);
- int wcl = get_clen_and_captype(word, wbl, &captype);
- add_word(word, wbl, wcl, flags, al, NULL, false);
- return add_hidden_capitalized_word((char*)word, wbl, wcl, flags, al, NULL,
+ int wbl = word.size();
+ int wcl = get_clen_and_captype(word, &captype);
+ add_word(word.c_str(), wbl, wcl, flags, al, NULL, false);
+ return add_hidden_capitalized_word(word, wcl, flags, al, NULL,
captype);
}
return 0;
@@ -427,7 +456,7 @@ int HashMgr::add_with_affix(const char* word, const char* example) {
if (dp && dp->astr) {
int captype;
int wbl = strlen(word);
- int wcl = get_clen_and_captype(word, wbl, &captype);
+ int wcl = get_clen_and_captype(word, &captype);
if (aliasf) {
add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);
} else {
@@ -440,7 +469,7 @@ int HashMgr::add_with_affix(const char* word, const char* example) {
} else
return 1;
}
- return add_hidden_capitalized_word((char*)word, wbl, wcl, dp->astr,
+ return add_hidden_capitalized_word(word, wcl, dp->astr,
dp->alen, NULL, captype);
}
return 1;
@@ -574,7 +603,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
delete dict;
return 6;
}
- flag_qsort(flags, 0, al);
+ std::sort(flags, flags + al);
}
} else {
al = 0;
@@ -584,10 +613,10 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
int captype;
int wbl = strlen(ts);
- int wcl = get_clen_and_captype(ts, wbl, &captype);
+ int wcl = get_clen_and_captype(ts, &captype);
// add the word and its index plus its capitalized form optionally
if (add_word(ts, wbl, wcl, flags, al, dp, false) ||
- add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {
+ add_hidden_capitalized_word(ts, wcl, flags, al, dp, captype)) {
delete dict;
return 5;
}
@@ -674,12 +703,13 @@ int HashMgr::decode_flags(unsigned short** result, char* flags, FileMgr* af) {
break;
}
case FLAG_UNI: { // UTF-8 characters
- w_char w[BUFSIZE / 2];
- len = u8_u16(w, BUFSIZE / 2, flags);
+ std::vector<w_char> w;
+ u8_u16(w, flags);
+ len = w.size();
*result = (unsigned short*)malloc(len * sizeof(unsigned short));
if (!*result)
return -1;
- memcpy(*result, w, len * sizeof(short));
+ memcpy(*result, &w[0], len * sizeof(short));
break;
}
default: { // Ispell's one-character flags (erfg -> e r f g)
@@ -712,9 +742,13 @@ unsigned short HashMgr::decode_flag(const char* f) {
i, DEFAULTFLAGS - 1);
s = (unsigned short)i;
break;
- case FLAG_UNI:
- u8_u16((w_char*)&s, 1, f);
+ case FLAG_UNI: {
+ std::vector<w_char> w;
+ u8_u16(w, f);
+ if (!w.empty())
+ memcpy(&s, &w[0], 1 * sizeof(short));
break;
+ }
default:
s = (unsigned short)*((unsigned char*)f);
}
@@ -724,22 +758,24 @@ unsigned short HashMgr::decode_flag(const char* f) {
}
char* HashMgr::encode_flag(unsigned short f) {
- unsigned char ch[10];
if (f == 0)
return mystrdup("(NULL)");
+ std::string ch;
if (flag_mode == FLAG_LONG) {
- ch[0] = (unsigned char)(f >> 8);
- ch[1] = (unsigned char)(f - ((f >> 8) << 8));
- ch[2] = '\0';
+ ch.push_back((unsigned char)(f >> 8));
+ ch.push_back((unsigned char)(f - ((f >> 8) << 8)));
} else if (flag_mode == FLAG_NUM) {
- sprintf((char*)ch, "%d", f);
+ std::ostringstream stream;
+ stream << f;
+ ch = stream.str();
} else if (flag_mode == FLAG_UNI) {
- u16_u8((char*)&ch, 10, (w_char*)&f, 1);
+ const w_char* w_c = (const w_char*)&f;
+ std::vector<w_char> w(w_c, w_c + 1);
+ u16_u8(ch, w);
} else {
- ch[0] = (unsigned char)(f);
- ch[1] = '\0';
+ ch.push_back((unsigned char)(f));
}
- return mystrdup((char*)ch);
+ return mystrdup(ch.c_str());
}
// read in aff file and set flag mode
@@ -824,8 +860,8 @@ int HashMgr::load_config(const char* affpath, const char* key) {
/* parse in the ignored characters (for example, Arabic optional diacritics
* characters */
if (strncmp(line, "IGNORE", 6) == 0) {
- if (parse_array(line, &ignorechars, &ignorechars_utf16,
- &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
+ if (!parse_array(line, &ignorechars, ignorechars_utf16,
+ utf8, afflst->getlinenum())) {
delete afflst;
return 1;
}
@@ -951,7 +987,7 @@ int HashMgr::parse_aliasf(char* line, FileMgr* af) {
case 1: {
aliasflen[j] =
(unsigned short)decode_flags(&(aliasf[j]), piece, af);
- flag_qsort(aliasf[j], 0, aliasflen[j]);
+ std::sort(aliasf[j], aliasf[j] + aliasflen[j]);
break;
}
default:
@@ -1070,19 +1106,14 @@ int HashMgr::parse_aliasm(char* line, FileMgr* af) {
*(tp - 1) = ' ';
tp = tp + strlen(tp);
}
+ std::string chunk(piece);
if (complexprefixes) {
if (utf8)
- reverseword_utf(piece);
+ reverseword_utf(chunk);
else
- reverseword(piece);
- }
- aliasm[j] = mystrdup(piece);
- if (!aliasm[j]) {
- numaliasm = 0;
- free(aliasm);
- aliasm = NULL;
- return 1;
+ reverseword(chunk);
}
+ aliasm[j] = mystrdup(chunk.c_str());
break;
}
default: