diff options
Diffstat (limited to 'libs/hunspell/src/hashmgr.c++')
-rw-r--r-- | libs/hunspell/src/hashmgr.c++ | 193 |
1 files changed, 112 insertions, 81 deletions
diff --git a/libs/hunspell/src/hashmgr.c++ b/libs/hunspell/src/hashmgr.c++ index dbcf56a51c..c3cd95420f 100644 --- a/libs/hunspell/src/hashmgr.c++ +++ b/libs/hunspell/src/hashmgr.c++ @@ -76,6 +76,7 @@ #include <stdio.h> #include <ctype.h> #include <limits> +#include <sstream> #include "hashmgr.hxx" #include "csutil.hxx" @@ -101,8 +102,6 @@ HashMgr::HashMgr(const char* tpath, const char* apath, const char* key) enc = NULL; csconv = 0; ignorechars = NULL; - ignorechars_utf16 = NULL; - ignorechars_utf16_len = 0; load_config(apath, key); int ec = load_tables(tpath, key); if (ec) { @@ -167,8 +166,6 @@ HashMgr::~HashMgr() { if (ignorechars) free(ignorechars); - if (ignorechars_utf16) - free(ignorechars_utf16); #ifdef MOZILLA_CLIENT delete[] csconv; @@ -199,28 +196,56 @@ int HashMgr::add_word(const char* word, int al, const char* desc, bool onlyupcase) { + + std::string *word_copy = NULL; + std::string *desc_copy = NULL; + if (ignorechars || complexprefixes) { + word_copy = new std::string(word, wbl); + + if (ignorechars != NULL) { + if (utf8) { + wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16); + } else { + remove_ignored_chars(*word_copy, ignorechars); + } + } + + if (complexprefixes) { + if (utf8) + wcl = reverseword_utf(*word_copy); + else + reverseword(*word_copy); + + if (desc && !aliasm) { + desc_copy = new std::string(desc); + + if (complexprefixes) { + if (utf8) + reverseword_utf(*desc_copy); + else + reverseword(*desc_copy); + } + desc = desc_copy->c_str(); + } + } + + wbl = word_copy->size(); + word = word_copy->c_str(); + } + bool upcasehomonym = false; int descl = desc ? (aliasm ? sizeof(char*) : strlen(desc) + 1) : 0; // variable-length hash record with word and optional fields struct hentry* hp = (struct hentry*)malloc(sizeof(struct hentry) + wbl + descl); - if (!hp) + if (!hp) { + delete desc_copy; + delete word_copy; return 1; + } + char* hpw = hp->word; strcpy(hpw, word); - if (ignorechars != NULL) { - if (utf8) { - remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len); - } else { - remove_ignored_chars(hpw, ignorechars); - } - } - if (complexprefixes) { - if (utf8) - reverseword_utf(hpw); - else - reverseword(hpw); - } int i = hash(hpw); @@ -239,12 +264,6 @@ int HashMgr::add_word(const char* word, store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc))); } else { strcpy(hpw + wbl + 1, desc); - if (complexprefixes) { - if (utf8) - reverseword_utf(HENTRY_DATA(hp)); - else - reverseword(HENTRY_DATA(hp)); - } } if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON; @@ -254,6 +273,8 @@ int HashMgr::add_word(const char* word, struct hentry* dp = tableptr[i]; if (!dp) { tableptr[i] = hp; + delete desc_copy; + delete word_copy; return 0; } while (dp->next != NULL) { @@ -265,6 +286,8 @@ int HashMgr::add_word(const char* word, dp->astr = hp->astr; dp->alen = hp->alen; free(hp); + delete desc_copy; + delete word_copy; return 0; } else { dp->next_homonym = hp; @@ -283,6 +306,8 @@ int HashMgr::add_word(const char* word, dp->astr = hp->astr; dp->alen = hp->alen; free(hp); + delete desc_copy; + delete word_copy; return 0; } else { dp->next_homonym = hp; @@ -299,11 +324,13 @@ int HashMgr::add_word(const char* word, free(hp->astr); free(hp); } + + delete desc_copy; + delete word_copy; return 0; } -int HashMgr::add_hidden_capitalized_word(char* word, - int wbl, +int HashMgr::add_hidden_capitalized_word(const std::string& word, int wcl, unsigned short* flags, int flagslen, @@ -326,32 +353,34 @@ int HashMgr::add_hidden_capitalized_word(char* word, memcpy(flags2, flags, flagslen * sizeof(unsigned short)); flags2[flagslen] = ONLYUPCASEFLAG; if (utf8) { - char st[BUFSIZE]; - w_char w[BUFSIZE]; - int wlen = u8_u16(w, BUFSIZE, word); - mkallsmall_utf(w, wlen, langnum); - mkallcap_utf(w, 1, langnum); - u16_u8(st, BUFSIZE, w, wlen); - return add_word(st, wbl, wcl, flags2, flagslen + 1, dp, true); + std::string st; + std::vector<w_char> w; + u8_u16(w, word); + mkallsmall_utf(w, langnum); + mkinitcap_utf(w, langnum); + u16_u8(st, w); + return add_word(st.c_str(), st.size(), wcl, flags2, flagslen + 1, dp, true); } else { - mkallsmall(word, csconv); - mkinitcap(word, csconv); - return add_word(word, wbl, wcl, flags2, flagslen + 1, dp, true); + std::string new_word(word); + mkallsmall(new_word, csconv); + mkinitcap(new_word, csconv); + int ret = add_word(new_word.c_str(), new_word.size(), wcl, flags2, flagslen + 1, dp, true); + return ret; } } return 0; } // detect captype and modify word length for UTF-8 encoding -int HashMgr::get_clen_and_captype(const char* word, int wbl, int* captype) { +int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { int len; if (utf8) { - w_char dest_utf[BUFSIZE]; - len = u8_u16(dest_utf, BUFSIZE, word); - *captype = get_captype_utf8(dest_utf, len, langnum); + std::vector<w_char> dest_utf; + len = u8_u16(dest_utf, word); + *captype = get_captype_utf8(dest_utf, langnum); } else { - len = wbl; - *captype = get_captype((char*)word, len, csconv); + len = word.size(); + *captype = get_captype(word, csconv); } return len; } @@ -370,7 +399,7 @@ int HashMgr::remove(const char* word) { flags[dp->alen] = forbiddenword; dp->astr = flags; dp->alen++; - flag_qsort(flags, 0, dp->alen); + std::sort(flags, flags + dp->alen); } dp = dp->next_homonym; } @@ -378,8 +407,8 @@ int HashMgr::remove(const char* word) { } /* remove forbidden flag to add a personal word to the hash */ -int HashMgr::remove_forbidden_flag(const char* word) { - struct hentry* dp = lookup(word); +int HashMgr::remove_forbidden_flag(const std::string& word) { + struct hentry* dp = lookup(word.c_str()); if (!dp) return 1; while (dp) { @@ -406,15 +435,15 @@ int HashMgr::remove_forbidden_flag(const char* word) { } // add a custom dic. word to the hash table (public) -int HashMgr::add(const char* word) { +int HashMgr::add(const std::string& word) { unsigned short* flags = NULL; int al = 0; if (remove_forbidden_flag(word)) { int captype; - int wbl = strlen(word); - int wcl = get_clen_and_captype(word, wbl, &captype); - add_word(word, wbl, wcl, flags, al, NULL, false); - return add_hidden_capitalized_word((char*)word, wbl, wcl, flags, al, NULL, + int wbl = word.size(); + int wcl = get_clen_and_captype(word, &captype); + add_word(word.c_str(), wbl, wcl, flags, al, NULL, false); + return add_hidden_capitalized_word(word, wcl, flags, al, NULL, captype); } return 0; @@ -427,7 +456,7 @@ int HashMgr::add_with_affix(const char* word, const char* example) { if (dp && dp->astr) { int captype; int wbl = strlen(word); - int wcl = get_clen_and_captype(word, wbl, &captype); + int wcl = get_clen_and_captype(word, &captype); if (aliasf) { add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false); } else { @@ -440,7 +469,7 @@ int HashMgr::add_with_affix(const char* word, const char* example) { } else return 1; } - return add_hidden_capitalized_word((char*)word, wbl, wcl, dp->astr, + return add_hidden_capitalized_word(word, wcl, dp->astr, dp->alen, NULL, captype); } return 1; @@ -574,7 +603,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) { delete dict; return 6; } - flag_qsort(flags, 0, al); + std::sort(flags, flags + al); } } else { al = 0; @@ -584,10 +613,10 @@ int HashMgr::load_tables(const char* tpath, const char* key) { int captype; int wbl = strlen(ts); - int wcl = get_clen_and_captype(ts, wbl, &captype); + int wcl = get_clen_and_captype(ts, &captype); // add the word and its index plus its capitalized form optionally if (add_word(ts, wbl, wcl, flags, al, dp, false) || - add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) { + add_hidden_capitalized_word(ts, wcl, flags, al, dp, captype)) { delete dict; return 5; } @@ -674,12 +703,13 @@ int HashMgr::decode_flags(unsigned short** result, char* flags, FileMgr* af) { break; } case FLAG_UNI: { // UTF-8 characters - w_char w[BUFSIZE / 2]; - len = u8_u16(w, BUFSIZE / 2, flags); + std::vector<w_char> w; + u8_u16(w, flags); + len = w.size(); *result = (unsigned short*)malloc(len * sizeof(unsigned short)); if (!*result) return -1; - memcpy(*result, w, len * sizeof(short)); + memcpy(*result, &w[0], len * sizeof(short)); break; } default: { // Ispell's one-character flags (erfg -> e r f g) @@ -712,9 +742,13 @@ unsigned short HashMgr::decode_flag(const char* f) { i, DEFAULTFLAGS - 1); s = (unsigned short)i; break; - case FLAG_UNI: - u8_u16((w_char*)&s, 1, f); + case FLAG_UNI: { + std::vector<w_char> w; + u8_u16(w, f); + if (!w.empty()) + memcpy(&s, &w[0], 1 * sizeof(short)); break; + } default: s = (unsigned short)*((unsigned char*)f); } @@ -724,22 +758,24 @@ unsigned short HashMgr::decode_flag(const char* f) { } char* HashMgr::encode_flag(unsigned short f) { - unsigned char ch[10]; if (f == 0) return mystrdup("(NULL)"); + std::string ch; if (flag_mode == FLAG_LONG) { - ch[0] = (unsigned char)(f >> 8); - ch[1] = (unsigned char)(f - ((f >> 8) << 8)); - ch[2] = '\0'; + ch.push_back((unsigned char)(f >> 8)); + ch.push_back((unsigned char)(f - ((f >> 8) << 8))); } else if (flag_mode == FLAG_NUM) { - sprintf((char*)ch, "%d", f); + std::ostringstream stream; + stream << f; + ch = stream.str(); } else if (flag_mode == FLAG_UNI) { - u16_u8((char*)&ch, 10, (w_char*)&f, 1); + const w_char* w_c = (const w_char*)&f; + std::vector<w_char> w(w_c, w_c + 1); + u16_u8(ch, w); } else { - ch[0] = (unsigned char)(f); - ch[1] = '\0'; + ch.push_back((unsigned char)(f)); } - return mystrdup((char*)ch); + return mystrdup(ch.c_str()); } // read in aff file and set flag mode @@ -824,8 +860,8 @@ int HashMgr::load_config(const char* affpath, const char* key) { /* parse in the ignored characters (for example, Arabic optional diacritics * characters */ if (strncmp(line, "IGNORE", 6) == 0) { - if (parse_array(line, &ignorechars, &ignorechars_utf16, - &ignorechars_utf16_len, utf8, afflst->getlinenum())) { + if (!parse_array(line, &ignorechars, ignorechars_utf16, + utf8, afflst->getlinenum())) { delete afflst; return 1; } @@ -951,7 +987,7 @@ int HashMgr::parse_aliasf(char* line, FileMgr* af) { case 1: { aliasflen[j] = (unsigned short)decode_flags(&(aliasf[j]), piece, af); - flag_qsort(aliasf[j], 0, aliasflen[j]); + std::sort(aliasf[j], aliasf[j] + aliasflen[j]); break; } default: @@ -1070,19 +1106,14 @@ int HashMgr::parse_aliasm(char* line, FileMgr* af) { *(tp - 1) = ' '; tp = tp + strlen(tp); } + std::string chunk(piece); if (complexprefixes) { if (utf8) - reverseword_utf(piece); + reverseword_utf(chunk); else - reverseword(piece); - } - aliasm[j] = mystrdup(piece); - if (!aliasm[j]) { - numaliasm = 0; - free(aliasm); - aliasm = NULL; - return 1; + reverseword(chunk); } + aliasm[j] = mystrdup(chunk.c_str()); break; } default: |