From 15855fa84a09fd1fd486d357c38db0f2bd181e74 Mon Sep 17 00:00:00 2001 From: George Hazan Date: Tue, 4 Mar 2014 23:23:45 +0000 Subject: HistoryStats compiles ok now git-svn-id: http://svn.miranda-ng.org/main/trunk@8399 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c --- plugins/HistoryStats/src/colbase_words.cpp | 368 +++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100644 plugins/HistoryStats/src/colbase_words.cpp (limited to 'plugins/HistoryStats/src/colbase_words.cpp') diff --git a/plugins/HistoryStats/src/colbase_words.cpp b/plugins/HistoryStats/src/colbase_words.cpp new file mode 100644 index 0000000000..9325d2a421 --- /dev/null +++ b/plugins/HistoryStats/src/colbase_words.cpp @@ -0,0 +1,368 @@ +#include "_globals.h" +#include "colbase_words.h" + +/* + * ColBaseWords + */ + +void ColBaseWords::addWord(WordMap* pWords, const ext::string& word, bool bOutgoing) const +{ + // filter words + if (m_bFilterWords) + { + upto_each_(i, m_ActiveWordFilter.size()) + { + const Filter* pFilter = m_ActiveWordFilter[i]; + + switch (pFilter->getMode()) + { + case Settings::fwmWordsMatching: + { + citer_each_(WordSet, j, pFilter->getWords()) + { + if (word == *j) + { + return; + } + } + } + break; + + case Settings::fwmWordsContaining: + { + citer_each_(WordSet, j, pFilter->getWords()) + { + if (word.find(*j) != ext::string::npos) + { + return; + } + } + } + break; + + case Settings::fwmWordsStartingWith: + { + citer_each_(WordSet, j, pFilter->getWords()) + { + if (word.length() >= j->length() && word.substr(0, j->length()) == *j) + { + return; + } + } + } + break; + + case Settings::fwmWordsEndingWith: + { + citer_each_(WordSet, j, pFilter->getWords()) + { + if (word.length() >= j->length() && word.substr(word.length() - j->length(), j->length()) == *j) + { + return; + } + } + } + break; + } + } + } + + // insert, if we didn't get filtered + WordMap::iterator i = pWords->find(word); + + if (i != pWords->end()) + { + (bOutgoing ? i->second.out : i->second.in)++; + } + else + { + pWords->insert(std::make_pair(word, bOutgoing ? InOut(0, 1) : InOut(1, 0))); + } +} + +void ColBaseWords::parseMsg(WordMap* pWords, const ext::string& msg, bool bOutgoing) const +{ + // filter messages + if (m_bFilterMessages) + { + ext::string msgLC = utils::toLowerCase(msg); + + upto_each_(i, m_ActiveMessageFilter.size()) + { + const Filter* pFilter = m_ActiveMessageFilter[i]; + + switch (pFilter->getMode()) + { + case Settings::fwmMessagesMatching: + { + citer_each_(WordSet, j, pFilter->getWords()) + { + if (msgLC == *j) + { + return; + } + } + } + break; + + case Settings::fwmMessagesContaining: + { + citer_each_(WordSet, j, pFilter->getWords()) + { + if (msgLC.find(*j) != ext::string::npos) + { + return; + } + } + } + break; + + case Settings::fwmMessagesStartingWith: + { + citer_each_(WordSet, j, pFilter->getWords()) + { + if (msgLC.length() >= j->length() && msgLC.substr(0, j->length()) == *j) + { + return; + } + } + } + break; + + case Settings::fwmMessagesEndingWith: + { + citer_each_(WordSet, j, pFilter->getWords()) + { + if (msgLC.length() >= j->length() && msgLC.substr(msgLC.length() - j->length(), j->length()) == *j) + { + return; + } + } + } + break; + } + } + } + + // start parsing into words if not already filtered + ext::string::size_type firstChar = 0; + ext::string::size_type nextSpace; + + while (firstChar < msg.length() && getCharMapper()->mapChar(msg[firstChar]) == muC(' ')) + { + ++firstChar; + } + + while (firstChar < msg.length()) + { + nextSpace = firstChar + 1; + + while (nextSpace < msg.length() && getCharMapper()->mapChar(msg[nextSpace]) != muC(' ')) + { + ++nextSpace; + } + + int wordLen = nextSpace - firstChar; + + if (wordLen >= m_nMinLength && (m_nMaxLength == 0 || wordLen <= m_nMaxLength)) + { + ext::string word(wordLen, muC('_')); + + upto_each_(i, wordLen) + { + word[i] = getCharMapper()->mapChar(msg[firstChar + i]); + } + + addWord(pWords, word, bOutgoing); + } + + firstChar = nextSpace + 1; + + while (firstChar < msg.length() && getCharMapper()->mapChar(msg[firstChar]) == muC(' ')) + { + ++firstChar; + } + } +} + +ColBaseWords::ColBaseWords() + : m_nSource(2), m_nMinLength(1), m_nMaxLength(0), m_bFilterLinks(true), + m_hSource(NULL), m_hMinLength(NULL), m_hMaxLength(NULL), m_hFilterLinks(NULL) +{ +} + +void ColBaseWords::impl_copyConfig(const Column* pSource) +{ + const ColBaseWords& src = *reinterpret_cast(pSource); + + m_nSource = src.m_nSource; + m_nMinLength = src.m_nMinLength; + m_nMaxLength = src.m_nMaxLength; + m_bFilterLinks = src.m_bFilterLinks; + m_FilterWords = src.m_FilterWords; +} + +void ColBaseWords::impl_configRead(const SettingsTree& settings) +{ + m_nSource = settings.readIntRanged(con::KeySource, 2, 0, 2); + m_nMinLength = settings.readIntRanged(con::KeyMinLength, 1, 1, 1000); + m_nMaxLength = settings.readIntRanged(con::KeyMaxLength, 0, 0, 1000); + m_bFilterLinks = settings.readBool(con::KeyFilterLinks, true); + + // filter words + m_FilterWords.clear(); + + int nCount = settings.readInt(con::KeyFilterWords, 0); + + upto_each_(i, nCount) + { + m_FilterWords.insert(settings.readStr((con::KeyFilterWords + utils::intToString(i)).c_str(), muT(""))); + } +} + +void ColBaseWords::impl_configWrite(SettingsTree& settings) const +{ + settings.writeInt (con::KeySource, m_nSource); + settings.writeInt (con::KeyMinLength, m_nMinLength); + settings.writeInt (con::KeyMaxLength, m_nMaxLength); + settings.writeBool(con::KeyFilterLinks, m_bFilterLinks); + + // filter words + settings.writeInt(con::KeyFilterWords, m_FilterWords.size()); + + int nFilterNr = 0; + + citer_each_(ColFilterSet, i, m_FilterWords) + { + settings.writeStr((con::KeyFilterWords + utils::intToString(nFilterNr++)).c_str(), i->c_str()); + } +} + +void ColBaseWords::impl_configToUI(OptionsCtrl& Opt, OptionsCtrl::Item hGroup) +{ + OptionsCtrl::Group hTemp; + + /**/hTemp = Opt.insertGroup (hGroup, i18n(muT("Extract words from"))); + /**/ m_hSource = Opt.insertRadio (hTemp, NULL, i18n(muT("Incoming messages"))); + /**/ Opt.insertRadio (hTemp, m_hSource, i18n(muT("Outgoing messages"))); + /**/ Opt.insertRadio (hTemp, m_hSource, i18n(muT("All messages"))); + /**/m_hMinLength = Opt.insertEdit (hGroup, i18n(muT("Ignore words shorter than (chars)")), muT(""), OptionsCtrl::OCF_NUMBER); + /**/m_hMaxLength = Opt.insertEdit (hGroup, i18n(muT("Ignore words longer than (chars, 0=no limit)")), muT(""), OptionsCtrl::OCF_NUMBER); + /**/m_hFilterLinks = Opt.insertCheck (hGroup, i18n(muT("Filter URLs/e-mail addresses"))); + /**/ Opt.insertButton(hGroup, i18n(muT("Filter words/messages")), i18n(muT("Define...")), 0, Settings::biFilterWords); + + Opt.setRadioChecked(m_hSource , m_nSource ); + Opt.setEditNumber (m_hMinLength , m_nMinLength ); + Opt.setEditNumber (m_hMaxLength , m_nMaxLength ); + Opt.checkItem (m_hFilterLinks, m_bFilterLinks); +} + +void ColBaseWords::impl_configFromUI(OptionsCtrl& Opt) +{ + m_nSource = Opt.getRadioChecked(m_hSource ); + m_nMinLength = Opt.getEditNumber (m_hMinLength ); + m_nMaxLength = Opt.getEditNumber (m_hMaxLength ); + m_bFilterLinks = Opt.isItemChecked (m_hFilterLinks); + + // ensure constraints + utils::ensureRange(m_nMinLength, 1, 1000, 1); + utils::ensureRange(m_nMaxLength, 0, 1000, 0); +} + +ext::string ColBaseWords::impl_contactDataGetUID() const +{ + ext::string strUID = ext::str(ext::format(muT("words-|-|-|-|")) + % m_nSource + % m_nMinLength + % m_nMaxLength + % (m_bFilterLinks ? 1 : 0)); + + citer_each_(ColFilterSet, i, m_FilterWords) + { + strUID += muT("-"); + strUID += *i; + } + + return strUID; +} + +void ColBaseWords::impl_contactDataBeginAcquire() +{ + m_bFilterMessages = false; + m_bFilterWords = false; + m_ActiveMessageFilter.clear(); + m_ActiveWordFilter.clear(); + + citer_each_(ColFilterSet, i, m_FilterWords) + { + const Filter* pFilter = getSettings()->getFilter(*i); + + if (pFilter && !pFilter->getWords().empty()) + { + switch (pFilter->getMode()) + { + case Settings::fwmMessagesMatching: + case Settings::fwmMessagesContaining: + case Settings::fwmMessagesStartingWith: + case Settings::fwmMessagesEndingWith: + m_ActiveMessageFilter.push_back(pFilter); + m_bFilterMessages = true; + break; + + case Settings::fwmWordsMatching: + case Settings::fwmWordsContaining: + case Settings::fwmWordsStartingWith: + case Settings::fwmWordsEndingWith: + m_ActiveWordFilter.push_back(pFilter); + m_bFilterWords = true; + break; + } + } + } +} + +void ColBaseWords::impl_contactDataPrepare(Contact& contact) const +{ + WordMap* pData = new WordMap; + + contact.setSlot(contactDataSlotGet(), pData); +} + +void ColBaseWords::impl_contactDataFree(Contact& contact) const +{ + WordMap* pData = reinterpret_cast(contact.getSlot(contactDataSlotGet())); + + if (pData) + { + delete pData; + contact.setSlot(contactDataSlotGet(), NULL); + } +} + +void ColBaseWords::impl_contactDataAcquireMessage(Contact& contact, Message& msg) +{ + if (m_nSource == 2 || m_nSource == 1 && msg.isOutgoing() || m_nSource == 0 && !msg.isOutgoing()) + { + WordMap* pData = reinterpret_cast(contact.getSlot(contactDataSlotGet())); + + parseMsg(pData, m_bFilterLinks ? msg.getWithoutLinks() : msg.getRaw(), msg.isOutgoing()); + } +} + +void ColBaseWords::impl_contactDataMerge(Contact& contact, const Contact& include) const +{ + WordMap* pData = reinterpret_cast(contact.getSlot(contactDataSlotGet())); + const WordMap* pIncData = reinterpret_cast(include.getSlot(contactDataSlotGet())); + + citer_each_(WordMap, j, *pIncData) + { + if (pData->find(j->first) != pData->end()) + { + (*pData)[j->first] += j->second; + } + else + { + pData->insert(*j); + } + } +} -- cgit v1.2.3