From 15855fa84a09fd1fd486d357c38db0f2bd181e74 Mon Sep 17 00:00:00 2001 From: George Hazan Date: Tue, 4 Mar 2014 23:23:45 +0000 Subject: HistoryStats compiles ok now git-svn-id: http://svn.miranda-ng.org/main/trunk@8399 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c --- plugins/HistoryStats/src/message.cpp | 334 +++++++++++++++++++++++++++++++++++ 1 file changed, 334 insertions(+) create mode 100644 plugins/HistoryStats/src/message.cpp (limited to 'plugins/HistoryStats/src/message.cpp') diff --git a/plugins/HistoryStats/src/message.cpp b/plugins/HistoryStats/src/message.cpp new file mode 100644 index 0000000000..b72f92bfda --- /dev/null +++ b/plugins/HistoryStats/src/message.cpp @@ -0,0 +1,334 @@ +#include "_globals.h" +#include "message.h" + +#include + +/* + * Message + */ + +void Message::makeRawAvailable() +{ + do + { +#if defined(MU_WIDE) + if (m_Available & PtrIsNonT) + { + m_Raw = utils::fromA(ext::a::string(reinterpret_cast(m_RawSource), m_nLength)); + m_Available |= Raw; + + break; + } +#endif // MU_WIDE + + if (m_Available & PtrIsUTF8) + { + m_Raw = utils::fromUTF8(reinterpret_cast(m_RawSource)); + m_Available |= Raw; + + break; + } + + m_Raw.assign(reinterpret_cast(m_RawSource), m_nLength); + m_Available |= Raw; + } while(false); + + if (m_bStripRawRTF) + { + stripRawRTF(); + } + + if (m_bStripBBCodes) + { + stripBBCodes(); + } +} + +void Message::stripRawRTF() +{ + if (m_Raw.substr(0, 6) == muT("{\\rtf1")) + { + m_Raw = RTFFilter::filter(m_Raw); + } +} + +void Message::stripBBCodes() +{ + static const mu_text* szSimpleBBCodes[][2] = { + { muT("[b]"), muT("[/b]") }, + { muT("[u]"), muT("[/u]") }, + { muT("[i]"), muT("[/i]") }, + { muT("[s]"), muT("[/s]") }, + }; + + static const mu_text* szParamBBCodes[][2] = { + { muT("[url=") , muT("[/url]") }, + { muT("[color="), muT("[/color]") }, + }; + + // convert raw string to lower case + ext::string strRawLC = utils::toLowerCase(m_Raw); + + // remove simple BBcodes + array_each_(i, szSimpleBBCodes) + { + const mu_text* szOpenTag = szSimpleBBCodes[i][0]; + const mu_text* szCloseTag = szSimpleBBCodes[i][1]; + + int lenOpen = ext::strfunc::len(szOpenTag); + int lenClose = ext::strfunc::len(szCloseTag); + + ext::string::size_type posOpen = 0; + ext::string::size_type posClose = 0; + + while (true) + { + if ((posOpen = strRawLC.find(szOpenTag, posOpen)) == ext::string::npos) + { + break; + } + + if ((posClose = strRawLC.find(szCloseTag, posOpen + lenOpen)) == ext::string::npos) + { + break; + } + + strRawLC.erase(posOpen, lenOpen); + strRawLC.erase(posClose - lenOpen, lenClose); + + // fix real string + m_Raw.erase(posOpen, lenOpen); + m_Raw.erase(posClose - lenOpen, lenClose); + } + } + + // remove BBcodes with parameters + array_each_(i, szParamBBCodes) + { + const mu_text* szOpenTag = szParamBBCodes[i][0]; + const mu_text* szCloseTag = szParamBBCodes[i][1]; + + int lenOpen = ext::strfunc::len(szOpenTag); + int lenClose = ext::strfunc::len(szCloseTag); + + ext::string::size_type posOpen = 0; + ext::string::size_type posOpen2 = 0; + ext::string::size_type posClose = 0; + + while (true) + { + if ((posOpen = strRawLC.find(szOpenTag, posOpen)) == ext::string::npos) + { + break; + } + + if ((posOpen2 = strRawLC.find(muC(']'), posOpen + lenOpen)) == ext::string::npos) + { + break; + } + + if ((posClose = strRawLC.find(szCloseTag, posOpen2 + 1)) == ext::string::npos) + { + break; + } + + strRawLC.erase(posOpen, posOpen2 - posOpen + 1); + strRawLC.erase(posClose - posOpen2 + posOpen - 1, lenClose); + + // fix real string + m_Raw.erase(posOpen, posOpen2 - posOpen + 1); + m_Raw.erase(posClose - posOpen2 + posOpen - 1, lenClose); + } + } +} + +void Message::filterLinks() +{ + static const mu_text* szSpaces = muT(" \r\r\n"); + static const mu_text* szPrefixes = muT("([{<:\"'"); + static const mu_text* szSuffixes = muT(".,:;!?)]}>\"'"); + static const mu_text* szValidProtocol = muT("abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + static const mu_text* szValidHost = muT(".-abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + + // init with raw text + m_WithoutLinks = getRaw(); + + ext::string& msg = m_WithoutLinks; + ext::string::size_type pos = -1; + + // detect: protocol://[user[:password]@]host[/path] + while (true) + { + if ((pos = msg.find(muT("://"), pos + 1)) == ext::string::npos) + { + break; + } + + // find start of URL + ext::string::size_type pos_proto = msg.find_last_not_of(szValidProtocol, pos - 1); + + (pos_proto == ext::string::npos) ? pos_proto = 0 : ++pos_proto; + + if (pos_proto < pos) + { + // find end of URL + ext::string::size_type pos_last = msg.find_first_of(szSpaces, pos + 3); + + (pos_last == ext::string::npos) ? pos_last = msg.length() - 1 : --pos_last; + + // filter suffixes (punctuation, parentheses, ...) + if (ext::strfunc::chr(szSuffixes, msg[pos_last])) + { + --pos_last; + } + + // find slash: for host name validation + ext::string::size_type pos_slash = msg.find(muC('/'), pos + 3); + + if (pos_slash == ext::string::npos || pos_slash > pos_last) + { + pos_slash = pos_last + 1; + } + + // find at: for host name validation + ext::string::size_type pos_at = msg.find(muC('@'), pos + 3); + + if (pos_at == ext::string::npos || pos_at > pos_slash) + { + pos_at = pos + 2; + } + + // check for valid host (x.x) + if (pos_slash - pos_at > 3) + { + ext::string::size_type pos_invalid = msg.find_first_not_of(szValidHost, pos_at + 1); + + if (pos_invalid == ext::string::npos || pos_invalid >= pos_slash) + { + if (std::count(msg.begin() + pos_at + 1, msg.begin() + pos_slash, muC('.')) >= 1) + { + ext::string link = msg.substr(pos_proto, pos_last - pos_proto + 1); + + // remove extracted link from message text + msg.erase(pos_proto, link.length()); + pos = pos_last - link.length(); + + // TODO: put link in list + } + } + } + } + } + + // detect: www.host[/path] + pos = -1; + + while (true) + { + if ((pos = msg.find(muT("www."), pos + 1)) == ext::string::npos) + { + break; + } + + // find end of URL + ext::string::size_type pos_last = msg.find_first_of(szSpaces, pos + 4); + + (pos_last == ext::string::npos) ? pos_last = msg.length() - 1 : --pos_last; + + // filter suffixes (punctuation, parentheses, ...) + if (ext::strfunc::chr(szSuffixes, msg[pos_last])) + { + --pos_last; + } + + // find slash: for host name validation + ext::string::size_type pos_slash = msg.find(muC('/'), pos + 4); + + if (pos_slash == ext::string::npos || pos_slash > pos_last) + { + pos_slash = pos_last + 1; + } + + // find at: for host name validation + ext::string::size_type pos_at = pos + 3; + + // check for valid host (x.x) + if (pos_slash - pos_at > 3) + { + ext::string::size_type pos_invalid = msg.find_first_not_of(szValidHost, pos_at + 1); + + if (pos_invalid == ext::string::npos || pos_invalid >= pos_slash) + { + if (std::count(msg.begin() + pos_at + 1, msg.begin() + pos_slash, muC('.')) >= 1) + { + ext::string link = muT("http://") + msg.substr(pos, pos_last - pos + 1); + + // remove extracted link from message text + msg.erase(pos, link.length() - 7); + pos = pos_last - (link.length() - 7); + + // TODO: put link in list + } + } + } + } + + // detect: user@host + pos = -1; + + while (true) + { + if ((pos = msg.find(muC('@'), pos + 1)) == ext::string::npos) + { + break; + } + + if (pos > 0 && pos < msg.length() - 1) + { + // find end of address + ext::string::size_type pos_last = msg.find_first_not_of(szValidHost, pos + 1); + + (pos_last == ext::string::npos) ? pos_last = msg.length() - 1 : --pos_last; + + // filter suffixes (punctuation, parentheses, ...) + if (ext::strfunc::chr(szSuffixes, msg[pos_last])) + { + --pos_last; + } + + // find start of address + ext::string::size_type pos_first = msg.find_last_of(szSpaces, pos - 1); + + (pos_first == ext::string::npos) ? pos_first = 0 : ++pos_first; + + // filter prefixes (punctuation, parentheses, ...) + if (ext::strfunc::chr(szPrefixes, msg[pos_first])) + { + ++pos_first; + } + + // check for valid host (x.x) + if (pos_first < pos && pos_last - pos >= 3) + { + if (std::count(msg.begin() + pos + 1, msg.begin() + pos_last + 1, muC('.')) >= 1) + { + ext::string link = msg.substr(pos_first, pos_last - pos_first + 1); + + // remove extracted link from message text + msg.erase(pos_first, link.length()); + pos = pos_last - (link.length()); + + // prepend "mailto:" if missing + if (link.substr(0, 7) != muT("mailto:")) + { + link.insert(0, muT("mailto:")); + } + + // TODO: put link in list + } + } + } + } + + m_Available |= WithoutLinks; +} -- cgit v1.2.3