#include "stdafx.h"
#include "message.h"

#include <algorithm>

/*
 * Message
 */

void Message::makeRawAvailable()
{
	do {
		if (m_Available & PtrIsNonT) {
			m_Raw = utils::fromA(ext::a::string(reinterpret_cast<const char*>(m_RawSource), m_nLength));
			m_Available |= Raw;
			break;
		}

		if (m_Available & PtrIsUTF8) {
			m_Raw = utils::fromUTF8(reinterpret_cast<const char*>(m_RawSource));
			m_Available |= Raw;
			break;
		}

		m_Raw.assign(reinterpret_cast<const TCHAR*>(m_RawSource), m_nLength);
		m_Available |= Raw;
	}
	while (false);

	if (m_bStripRawRTF)
		stripRawRTF();

	if (m_bStripBBCodes)
		stripBBCodes();
}

void Message::stripRawRTF()
{
	if (m_Raw.substr(0, 6) == _T("{\\rtf1"))
		m_Raw = RTFFilter::filter(m_Raw);
}

void Message::stripBBCodes()
{
	static const TCHAR* szSimpleBBCodes[][2] = {
		{ _T("[b]"), _T("[/b]") },
		{ _T("[u]"), _T("[/u]") },
		{ _T("[i]"), _T("[/i]") },
		{ _T("[s]"), _T("[/s]") },
	};

	static const TCHAR* szParamBBCodes[][2] = {
		{ _T("[url="), _T("[/url]") },
		{ _T("[color="), _T("[/color]") },
	};

	// convert raw string to lower case
	ext::string strRawLC = utils::toLowerCase(m_Raw);

	// remove simple BBcodes
	array_each_(i, szSimpleBBCodes)
	{
		const TCHAR* szOpenTag = szSimpleBBCodes[i][0];
		const TCHAR* szCloseTag = szSimpleBBCodes[i][1];

		int lenOpen = ext::strfunc::len(szOpenTag);
		int lenClose = ext::strfunc::len(szCloseTag);

		ext::string::size_type posOpen = 0;
		ext::string::size_type posClose = 0;

		while (true) {
			if ((posOpen = strRawLC.find(szOpenTag, posOpen)) == ext::string::npos)
				break;

			if ((posClose = strRawLC.find(szCloseTag, posOpen + lenOpen)) == ext::string::npos)
				break;

			strRawLC.erase(posOpen, lenOpen);
			strRawLC.erase(posClose - lenOpen, lenClose);

			// fix real string
			m_Raw.erase(posOpen, lenOpen);
			m_Raw.erase(posClose - lenOpen, lenClose);
		}
	}

	// remove BBcodes with parameters
	array_each_(i, szParamBBCodes)
	{
		const TCHAR* szOpenTag = szParamBBCodes[i][0];
		const TCHAR* szCloseTag = szParamBBCodes[i][1];

		int lenOpen = ext::strfunc::len(szOpenTag);
		int lenClose = ext::strfunc::len(szCloseTag);

		ext::string::size_type posOpen = 0;
		ext::string::size_type posOpen2 = 0;
		ext::string::size_type posClose = 0;

		while (true) {
			if ((posOpen = strRawLC.find(szOpenTag, posOpen)) == ext::string::npos)
				break;

			if ((posOpen2 = strRawLC.find(']', posOpen + lenOpen)) == ext::string::npos)
				break;

			if ((posClose = strRawLC.find(szCloseTag, posOpen2 + 1)) == ext::string::npos)
				break;

			strRawLC.erase(posOpen, posOpen2 - posOpen + 1);
			strRawLC.erase(posClose - posOpen2 + posOpen - 1, lenClose);

			// fix real string
			m_Raw.erase(posOpen, posOpen2 - posOpen + 1);
			m_Raw.erase(posClose - posOpen2 + posOpen - 1, lenClose);
		}
	}
}

void Message::filterLinks()
{
	static const TCHAR* szSpaces = _T(" \r\r\n");
	static const TCHAR* szPrefixes = _T("([{<:\"'");
	static const TCHAR* szSuffixes = _T(".,:;!?)]}>\"'");
	static const TCHAR* szValidProtocol = _T("abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");
	static const TCHAR* szValidHost = _T(".-abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");

	// init with raw text
	m_WithoutLinks = getRaw();

	ext::string& msg = m_WithoutLinks;
	ext::string::size_type pos = -1;

	// detect: protocol://[user[:password]@]host[/path]
	while (true) {
		if ((pos = msg.find(_T("://"), pos + 1)) == ext::string::npos)
			break;

		// find start of URL
		ext::string::size_type pos_proto = msg.find_last_not_of(szValidProtocol, pos - 1);

		(pos_proto == ext::string::npos) ? pos_proto = 0 : ++pos_proto;

		if (pos_proto < pos) {
			// find end of URL
			ext::string::size_type pos_last = msg.find_first_of(szSpaces, pos + 3);

			(pos_last == ext::string::npos) ? pos_last = msg.length() - 1 : --pos_last;

			// filter suffixes (punctuation, parentheses, ...)
			if (ext::strfunc::chr(szSuffixes, msg[pos_last]))
				--pos_last;

			// find slash: for host name validation
			ext::string::size_type pos_slash = msg.find('/', pos + 3);

			if (pos_slash == ext::string::npos || pos_slash > pos_last)
				pos_slash = pos_last + 1;

			// find at: for host name validation
			ext::string::size_type pos_at = msg.find('@', pos + 3);

			if (pos_at == ext::string::npos || pos_at > pos_slash)
				pos_at = pos + 2;

			// check for valid host (x.x)
			if (pos_slash - pos_at > 3) {
				ext::string::size_type pos_invalid = msg.find_first_not_of(szValidHost, pos_at + 1);

				if (pos_invalid == ext::string::npos || pos_invalid >= pos_slash) {
					if (std::count(msg.begin() + pos_at + 1, msg.begin() + pos_slash, '.') >= 1) {
						ext::string link = msg.substr(pos_proto, pos_last - pos_proto + 1);

						// remove extracted link from message text
						msg.erase(pos_proto, link.length());
						pos = pos_last - link.length();

						// TODO: put link in list
					}
				}
			}
		}
	}

	// detect: www.host[/path]
	pos = -1;

	while (true) {
		if ((pos = msg.find(_T("www."), pos + 1)) == ext::string::npos)
			break;

		// find end of URL
		ext::string::size_type pos_last = msg.find_first_of(szSpaces, pos + 4);

		(pos_last == ext::string::npos) ? pos_last = msg.length() - 1 : --pos_last;

		// filter suffixes (punctuation, parentheses, ...)
		if (ext::strfunc::chr(szSuffixes, msg[pos_last]))
			--pos_last;

		// find slash: for host name validation
		ext::string::size_type pos_slash = msg.find('/', pos + 4);

		if (pos_slash == ext::string::npos || pos_slash > pos_last)
			pos_slash = pos_last + 1;

		// find at: for host name validation
		ext::string::size_type pos_at = pos + 3;

		// check for valid host (x.x)
		if (pos_slash - pos_at > 3) {
			ext::string::size_type pos_invalid = msg.find_first_not_of(szValidHost, pos_at + 1);

			if (pos_invalid == ext::string::npos || pos_invalid >= pos_slash) {
				if (std::count(msg.begin() + pos_at + 1, msg.begin() + pos_slash, '.') >= 1) {
					ext::string link = _T("http://") + msg.substr(pos, pos_last - pos + 1);

					// remove extracted link from message text
					msg.erase(pos, link.length() - 7);
					pos = pos_last - (link.length() - 7);

					// TODO: put link in list
				}
			}
		}
	}

	// detect: user@host
	pos = -1;

	while (true) {
		if ((pos = msg.find('@', pos + 1)) == ext::string::npos)
			break;

		if (pos > 0 && pos < msg.length() - 1) {
			// find end of address
			ext::string::size_type pos_last = msg.find_first_not_of(szValidHost, pos + 1);

			(pos_last == ext::string::npos) ? pos_last = msg.length() - 1 : --pos_last;

			// filter suffixes (punctuation, parentheses, ...)
			if (ext::strfunc::chr(szSuffixes, msg[pos_last]))
				--pos_last;

			// find start of address
			ext::string::size_type pos_first = msg.find_last_of(szSpaces, pos - 1);

			(pos_first == ext::string::npos) ? pos_first = 0 : ++pos_first;

			// filter prefixes (punctuation, parentheses, ...)
			if (ext::strfunc::chr(szPrefixes, msg[pos_first]))
				++pos_first;

			// check for valid host (x.x)
			if (pos_first < pos && pos_last - pos >= 3) {
				if (std::count(msg.begin() + pos + 1, msg.begin() + pos_last + 1, '.') >= 1) {
					ext::string link = msg.substr(pos_first, pos_last - pos_first + 1);

					// remove extracted link from message text
					msg.erase(pos_first, link.length());
					pos = pos_last - (link.length());

					// prepend "mailto:" if missing
					if (link.substr(0, 7) != _T("mailto:")) {
						link.insert(0, _T("mailto:"));
					}

					// TODO: put link in list
				}
			}
		}
	}

	m_Available |= WithoutLinks;
}