#include "stdafx.h" #include "message.h" #include <algorithm> /* * Message */ void Message::makeRawAvailable() { do { if (m_Available & PtrIsNonT) { m_Raw = utils::fromA(ext::a::string(reinterpret_cast<const char*>(m_RawSource), m_nLength)); m_Available |= Raw; break; } if (m_Available & PtrIsUTF8) { m_Raw = utils::fromUTF8(reinterpret_cast<const char*>(m_RawSource)); m_Available |= Raw; break; } m_Raw.assign(reinterpret_cast<const wchar_t*>(m_RawSource), m_nLength); m_Available |= Raw; } while (false); if (m_bStripRawRTF) stripRawRTF(); if (m_bStripBBCodes) stripBBCodes(); } void Message::stripRawRTF() { if (m_Raw.substr(0, 6) == L"{\\rtf1") m_Raw = RTFFilter::filter(m_Raw); } void Message::stripBBCodes() { static const wchar_t* szSimpleBBCodes[][2] = { { L"[b]", L"[/b]" }, { L"[u]", L"[/u]" }, { L"[i]", L"[/i]" }, { L"[s]", L"[/s]" }, }; static const wchar_t* szParamBBCodes[][2] = { { L"[url=", L"[/url]" }, { L"[color=", L"[/color]" }, }; // convert raw string to lower case ext::string strRawLC = utils::toLowerCase(m_Raw); // remove simple BBcodes array_each_(i, szSimpleBBCodes) { const wchar_t* szOpenTag = szSimpleBBCodes[i][0]; const wchar_t* szCloseTag = szSimpleBBCodes[i][1]; int lenOpen = ext::strfunc::len(szOpenTag); int lenClose = ext::strfunc::len(szCloseTag); size_t posOpen = 0; size_t posClose = 0; while (true) { if ((posOpen = strRawLC.find(szOpenTag, posOpen)) == ext::string::npos) break; if ((posClose = strRawLC.find(szCloseTag, posOpen + lenOpen)) == ext::string::npos) break; strRawLC.erase(posOpen, lenOpen); strRawLC.erase(posClose - lenOpen, lenClose); // fix real string m_Raw.erase(posOpen, lenOpen); m_Raw.erase(posClose - lenOpen, lenClose); } } // remove BBcodes with parameters array_each_(i, szParamBBCodes) { const wchar_t* szOpenTag = szParamBBCodes[i][0]; const wchar_t* szCloseTag = szParamBBCodes[i][1]; int lenOpen = ext::strfunc::len(szOpenTag); int lenClose = ext::strfunc::len(szCloseTag); size_t posOpen = 0; size_t posOpen2 = 0; size_t posClose = 0; while (true) { if ((posOpen = strRawLC.find(szOpenTag, posOpen)) == ext::string::npos) break; if ((posOpen2 = strRawLC.find(']', posOpen + lenOpen)) == ext::string::npos) break; if ((posClose = strRawLC.find(szCloseTag, posOpen2 + 1)) == ext::string::npos) break; strRawLC.erase(posOpen, posOpen2 - posOpen + 1); strRawLC.erase(posClose - posOpen2 + posOpen - 1, lenClose); // fix real string m_Raw.erase(posOpen, posOpen2 - posOpen + 1); m_Raw.erase(posClose - posOpen2 + posOpen - 1, lenClose); } } } void Message::filterLinks() { static const wchar_t* szSpaces = L" \r\r\n"; static const wchar_t* szPrefixes = L"([{<:\"'"; static const wchar_t* szSuffixes = L".,:;!?)]}>\"'"; static const wchar_t* szValidProtocol = L"abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; static const wchar_t* szValidHost = L".-abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; // init with raw text m_WithoutLinks = getRaw(); ext::string& msg = m_WithoutLinks; size_t pos = -1; // detect: protocol://[user[:password]@]host[/path] while (true) { if ((pos = msg.find(L"://", pos + 1)) == ext::string::npos) break; // find start of URL size_t pos_proto = msg.find_last_not_of(szValidProtocol, pos - 1); (pos_proto == ext::string::npos) ? pos_proto = 0 : ++pos_proto; if (pos_proto < pos) { // find end of URL size_t pos_last = msg.find_first_of(szSpaces, pos + 3); (pos_last == ext::string::npos) ? pos_last = msg.length() - 1 : --pos_last; // filter suffixes (punctuation, parentheses, ...) if (ext::strfunc::chr(szSuffixes, msg[pos_last])) --pos_last; // find slash: for host name validation size_t pos_slash = msg.find('/', pos + 3); if (pos_slash == ext::string::npos || pos_slash > pos_last) pos_slash = pos_last + 1; // find at: for host name validation size_t pos_at = msg.find('@', pos + 3); if (pos_at == ext::string::npos || pos_at > pos_slash) pos_at = pos + 2; // check for valid host (x.x) if (pos_slash - pos_at > 3) { size_t pos_invalid = msg.find_first_not_of(szValidHost, pos_at + 1); if (pos_invalid == ext::string::npos || pos_invalid >= pos_slash) { if (std::count(msg.begin() + pos_at + 1, msg.begin() + pos_slash, '.') >= 1) { ext::string link = msg.substr(pos_proto, pos_last - pos_proto + 1); // remove extracted link from message text msg.erase(pos_proto, link.length()); pos = pos_last - link.length(); // TODO: put link in list } } } } } // detect: www.host[/path] pos = -1; while (true) { if ((pos = msg.find(L"www.", pos + 1)) == ext::string::npos) break; // find end of URL size_t pos_last = msg.find_first_of(szSpaces, pos + 4); (pos_last == ext::string::npos) ? pos_last = msg.length() - 1 : --pos_last; // filter suffixes (punctuation, parentheses, ...) if (ext::strfunc::chr(szSuffixes, msg[pos_last])) --pos_last; // find slash: for host name validation size_t pos_slash = msg.find('/', pos + 4); if (pos_slash == ext::string::npos || pos_slash > pos_last) pos_slash = pos_last + 1; // find at: for host name validation size_t pos_at = pos + 3; // check for valid host (x.x) if (pos_slash - pos_at > 3) { size_t pos_invalid = msg.find_first_not_of(szValidHost, pos_at + 1); if (pos_invalid == ext::string::npos || pos_invalid >= pos_slash) { if (std::count(msg.begin() + pos_at + 1, msg.begin() + pos_slash, '.') >= 1) { ext::string link = L"http://" + msg.substr(pos, pos_last - pos + 1); // remove extracted link from message text msg.erase(pos, link.length() - 7); pos = pos_last - (link.length() - 7); // TODO: put link in list } } } } // detect: user@host pos = -1; while (true) { if ((pos = msg.find('@', pos + 1)) == ext::string::npos) break; if (pos > 0 && pos < msg.length() - 1) { // find end of address size_t pos_last = msg.find_first_not_of(szValidHost, pos + 1); (pos_last == ext::string::npos) ? pos_last = msg.length() - 1 : --pos_last; // filter suffixes (punctuation, parentheses, ...) if (ext::strfunc::chr(szSuffixes, msg[pos_last])) --pos_last; // find start of address size_t pos_first = msg.find_last_of(szSpaces, pos - 1); (pos_first == ext::string::npos) ? pos_first = 0 : ++pos_first; // filter prefixes (punctuation, parentheses, ...) if (ext::strfunc::chr(szPrefixes, msg[pos_first])) ++pos_first; // check for valid host (x.x) if (pos_first < pos && pos_last - pos >= 3) { if (std::count(msg.begin() + pos + 1, msg.begin() + pos_last + 1, '.') >= 1) { ext::string link = msg.substr(pos_first, pos_last - pos_first + 1); // remove extracted link from message text msg.erase(pos_first, link.length()); pos = pos_last - (link.length()); // prepend "mailto:" if missing if (link.substr(0, 7) != L"mailto:") { link.insert(0, L"mailto:"); } // TODO: put link in list } } } } m_Available |= WithoutLinks; }