/*
 * This code implements decoding encoded MIME header in style
 * =?iso-8859-2?Q? "User using email in central Europe characters such as =E9" ?=
 *
 * (c) majvan 2002-2004
 */

#include "../stdafx.h"

struct _tcptable CodePageNamesAll[] =
{
	{ "ANSI", "", TRUE, CP_ACP },
	{ "WINDOWS-1", "250", 0, 1250 },
	{ "WINDOWS-1", "251", 0, 1251 },
	{ "WINDOWS-1", "252", 0, 1252 },
	{ "WINDOWS-1", "253", 0, 1253 },
	{ "WINDOWS-1", "254", 0, 1254 },
	{ "WINDOWS-1", "255", 0, 1255 },
	{ "WINDOWS-1", "256", 0, 1256 },
	{ "WINDOWS-1", "257", 0, 1257 },
	{ "WINDOWS-1", "258", 0, 1258 },
	{ "CP1", "250", 0, 1250 },
	{ "CP1", "251", 0, 1251 },
	{ "CP1", "252", 0, 1252 },
	{ "CP1", "253", 0, 1253 },
	{ "CP1", "254", 0, 1254 },
	{ "CP1", "255", 0, 1255 },
	{ "CP1", "256", 0, 1256 },
	{ "CP1", "257", 0, 1257 },
	{ "CP1", "258", 0, 1258 },
	{ "ANSI-1", "250", 0, 1250 },
	{ "ANSI-1", "251", 0, 1251 },
	{ "ANSI-1", "252", 0, 1252 },
	{ "ANSI-1", "253", 0, 1253 },
	{ "ANSI-1", "254", 0, 1254 },
	{ "ANSI-1", "255", 0, 1255 },
	{ "ANSI-1", "256", 0, 1256 },
	{ "ANSI-1", "257", 0, 1257 },
	{ "ANSI-1", "258", 0, 1258 },
	{ "KOI8", "-R", 0, 20866 },
	{ "KOI8", "", 0, 20866 },
	{ "KOI8", "-U", 0, 21866 },
	{ "KOI8", "-RU", 0, 21866 },
	{ "US-", "ASCII", 0, 20127 },
	{ "CP", "367", 0, 20127 },
	{ "ASCII", "", 0, 20127 },
	{ "ASCII", "7", 0, 20127 },
	{ "ISO-8859", "-1", 0, 28591 },
	{ "ISO-8859", "-2", 0, 28592 },
	{ "ISO-8859", "-3", 0, 28593 },
	{ "ISO-8859", "-4", 0, 28594 },
	{ "ISO-8859", "-5", 0, 28595 },
	{ "ISO-8859", "-6", 0, 28596 },
	{ "ISO-8859", "-7", 0, 28597 },
	{ "ISO-8859", "-8", 0, 28598 },
	{ "ISO-8859", "-9", 0, 28599 },
	{ "ISO-8859", "-15", 0, 28605 },
	{ "ISO_8859", "-1", 0, 28591 },
	{ "ISO_8859", "-2", 0, 28592 },
	{ "ISO_8859", "-3", 0, 28593 },
	{ "ISO_8859", "-4", 0, 28594 },
	{ "ISO_8859", "-5", 0, 28595 },
	{ "ISO_8859", "-6", 0, 28596 },
	{ "ISO_8859", "-7", 0, 28597 },
	{ "ISO_8859", "-8", 0, 28598 },
	{ "ISO_8859", "-9", 0, 28599 },
	{ "ISO_8859", "-15", 0, 28605 },
	{ "ISO-", "10646-USC2", 0, 1200 },
	{ "ISO-2022", "/2-JP", 0, 50220 },
	{ "ISO-2022", "-JP", 0, 50221 },
	{ "ISO-2022", "/JIS-JP", 0, 50222 },
	{ "ISO-2022", "-KR", 0, 50225 },
	{ "ISO-2022", "-CH(SP)", 0, 50227 },
	{ "ISO-2022", "-CH(TR)", 0, 50229 },
	{ "UTF-", "7", 0, 65000 },
	{ "UTF-", "8", 0, 65001 },
	{ "ARAB-", "TRANSPARENT", 0, 710 },
	{ "ASMO-", "TRANSPARENT", 0, 720 },
	{ "ASMO-", "449", 0, 709 },
	{ "ASMO-", "708", 0, 708 },
	{ "BIG5", "", 0, 950 },
	{ "EUC-", "CH(SP)", 0, 51936 },
	{ "EUC-", "CH(TR)", 0, 51950 },
	{ "EUC-", "JP", 0, 51932 },
	{ "EUC-", "KR", 0, 51949 },
	{ "GB-", "2312", 0, 20936 },
	{ "GB", "2312", 0, 20936 },
	{ "HZGB-", "2312", 0, 52936 },
	{ "IBM-", "037", 0, 37 },
	{ "IBM-", "290", 0, 290 },
	{ "IBM-", "437", 0, 437 },
	{ "IBM-", "500", 0, 500 },
	{ "IBM-", "775", 0, 775 },
	{ "IBM-", "850", 0, 850 },
	{ "IBM-", "852", 0, 852 },
	{ "IBM-", "855", 0, 855 },
	{ "IBM-", "857", 0, 857 },
	{ "IBM-", "860", 0, 860 },
	{ "IBM-", "861", 0, 861 },
	{ "IBM-", "862", 0, 862 },
	{ "IBM-", "863", 0, 863 },
	{ "IBM-", "864", 0, 864 },
	{ "IBM-", "865", 0, 865 },
	{ "IBM-", "866", 0, 866 },
	{ "IBM-", "869", 0, 869 },
	{ "IBM-", "870", 0, 870 },
	{ "IBM-", "875", 0, 875 },
	{ "IBM-", "1026", 0, 1026 },
	{ "IBM-", "273", 0, 20273 },
	{ "IBM-", "277", 0, 20277 },
	{ "IBM-", "278", 0, 20278 },
	{ "IBM-", "280", 0, 20280 },
	{ "IBM-", "284", 0, 20284 },
	{ "IBM-", "285", 0, 20285 },
	{ "IBM-", "290", 0, 20290 },
	{ "IBM-", "297", 0, 20297 },
	{ "IBM-", "420", 0, 20420 },
	{ "IBM-", "423", 0, 20423 },
	{ "IBM-", "871", 0, 20871 },
	{ "IBM-", "880", 0, 20880 },
	{ "IBM-", "905", 0, 20905 },
	{ "IBM-", "THAI", 0, 20838 },
	{ "ISCII-", "DEVANAGARI", 0, 57002 },
	{ "ISCII-", "BENGALI", 0, 57003 },
	{ "ISCII-", "TAMIL", 0, 57004 },
	{ "ISCII-", "TELUGU", 0, 57005 },
	{ "ISCII-", "ASSAMESE", 0, 57006 },
	{ "ISCII-", "ORIYA", 0, 57007 },
	{ "ISCII-", "KANNADA", 0, 57008 },
	{ "ISCII-", "MALAYALAM", 0, 57009 },
	{ "ISCII-", "GUJARATI", 0, 57010 },
	{ "ISCII-", "PUNJABI", 0, 57011 },
	{ "KOR-", "JOHAB", 0, 1361 },
	{ "KSC-", "5601", 0, 1361 },
	{ "MAC-", "ROMAN", 0, 10000 },
	{ "MAC-", "JP", 0, 10001 },
	{ "MAC-", "CH(SP)(BIG5)", 0, 10002 },
	{ "MAC-", "KR", 0, 10003 },
	{ "MAC-", "AR", 0, 10004 },
	{ "MAC-", "HW", 0, 10005 },
	{ "MAC-", "GR", 0, 10006 },
	{ "MAC-", "CY", 0, 10007 },
	{ "MAC-", "CH(SP)(GB2312)", 0, 10008 },
	{ "MAC-", "ROMANIA", 0, 10010 },
	{ "MAC-", "UA", 0, 10017 },
	{ "MAC-", "TH", 0, 10021 },
	{ "MAC-", "LAT2", 0, 10029 },
	{ "MAC-", "ICE", 0, 10079 },
	{ "MAC-", "TR", 0, 10081 },
	{ "MAC-", "CR", 0, 10082 }
};

int CPLENALL = _countof(CodePageNamesAll);
struct _tcptable *CodePageNamesSupp;
int CPLENSUPP = 1;

void SkipNonSpaces(char *&p)
{
	while (!WS(p))
		p++;
}

void SkipSpaces(char *&p)
{
	while (WS(p))
		p++;
}

/////////////////////////////////////////////////////////////////////////////////////////
// Gets codepage ID from string representing charset such as "iso-8859-1"
// input- the string
// size- max length of input string

int GetCharsetFromString(char *input, size_t size)
{
	char *pin = input;
	char *pout, *parser;

	if ((size < 1) || (parser = pout = new char[size + 1]) == nullptr)
		return -1;
	while ((*pin != 0) && (pin - input < (INT_PTR)size)) {
		if ((*pin >= 'a') && (*pin <= 'z'))
			*parser++ = *(pin++) - ('a' - 'A'); // make it capital
		else
			*parser++ = *pin++;
	}

	*parser = 0;

	#ifdef DEBUG_DECODECODEPAGE
	mir_writeLogA(DecodeFile, "<CodePage>%s</CodePage>", pout);
	#endif

	for (int i = 0; i < CPLENALL; i++) {
		size_t len = mir_strlen(CodePageNamesAll[i].NameBase);
		if (0 == strncmp(pout, CodePageNamesAll[i].NameBase, len)) {
			if (0 == mir_strcmp(pout + len, CodePageNamesAll[i].NameSub)) {
				delete[] pout;
				return CodePageNamesAll[i].CP;
			}
		}
	}
	delete[] pout;
	return -1;		//not found
}

/////////////////////////////////////////////////////////////////////////////////////////
// HexValue to DecValue ('a' to 10)
// HexValue- hexa value ('a')
// DecValue- poiner where to store dec value
// returns 0 if not success

int FromHexa(char HexValue, char *DecValue)
{
	if (HexValue >= '0' && HexValue <= '9') {
		*DecValue = HexValue - '0';
		return 1;
	}
	if (HexValue >= 'A' && HexValue <= 'F') {
		*DecValue = HexValue - 'A' + 10;
		return 1;
	}
	if (HexValue >= 'a' && HexValue <= 'f') {
		*DecValue = HexValue - 'a' + 10;
		return 1;
	}
	return 0;
}

/////////////////////////////////////////////////////////////////////////////////////////
// Decodes string in quoted printable
// Src- input string
// Dst- where to store output string
// DstLen- how max long should be output string
// isQ- if is "Q-encoding" modification. should be TRUE in headers
// always returns 1

int DecodeQuotedPrintable(char *Src, char *Dst, int DstLen, BOOL isQ)
{
	#ifdef DEBUG_DECODEQUOTED
	char *DstTemp = Dst;
	mir_writeLogA(DecodeFile, "<Decode Quoted><Input>%s</Input>", Src);
	#endif

	for (auto *Limit = Dst + DstLen; *Src != 0 && Dst < Limit; Src++) {
		if (*Src == '=') {
			Src++;
			if (*Src == 0)
				break;

			if (!isQ) {
				if (*Src == '\r') {
					if (Src[1] == '\n')
						Src++;
					continue;
				}

				if (*Src == '\n')
					continue;
			}

			char First, Second;
			if (!FromHexa(Src[0], &First)) {
				*Dst++ = '=';
				continue;
			}
			if (!FromHexa(Src[1], &Second)) {
				*Dst++ = '='; Src--;
				continue;
			}
			*Dst++ = ((char)(First) << 4) + Second;
			Src++;
		}
		else if (isQ && *Src == '_')
			*Dst++ = ' ';
		else
			*Dst++ = *Src;
	}
	*Dst = 0;

	#ifdef DEBUG_DECODEQUOTED
	mir_writeLogA(DecodeFile, "<Output>%s</Output></Decode Quoted>", DstTemp);
	#endif
	return 1;
}

/////////////////////////////////////////////////////////////////////////////////////////
// Converts string to unicode from string with specified codepage
// stream- input string
// cp- codepage of input string
// out- pointer to new allocated memory that contains unicode string

int ConvertStringToUnicode(char *stream, unsigned int cp, wchar_t **out)
{
	CPINFO CPInfo;
	wchar_t *temp, *src = *out, *dest;
	size_t outlen;
	int streamlen, Index;

	//codepages, which require to have set 0 in dwFlags parameter when calling MultiByteToWideChar
	uint32_t CodePagesZeroFlags[] = {50220, 50221, 50222, 50225, 50227, 50229, 52936, 54936, 57002, 57003, 57004, 57005, 57006, 57007, 57008, 57009, 57010, 57011, 65000, 65001};

	if ((cp != CP_ACP) && (cp != CP_OEMCP) && (cp != CP_MACCP) && (cp != CP_THREAD_ACP) && (cp != CP_SYMBOL) && (cp != CP_UTF7) && (cp != CP_UTF8) && !GetCPInfo(cp, &CPInfo))
		cp = CP_ACP;

	#ifdef DEBUG_DECODECODEPAGE
	mir_writeLogA(DecodeFile, "<CodePage #>%d</CodePage #>", cp);
	#endif

	for (Index = 0; Index < sizeof(CodePagesZeroFlags) / sizeof(CodePagesZeroFlags[0]); Index++)
		if (CodePagesZeroFlags[Index] == cp) {
			Index = -1;
			break;
		}
	if (Index == -1)
		streamlen = MultiByteToWideChar(cp, 0, stream, -1, nullptr, 0);
	else
		streamlen = MultiByteToWideChar(cp, MB_USEGLYPHCHARS, stream, -1, nullptr, 0);

	if (*out != nullptr)
		outlen = mir_wstrlen(*out);
	else
		outlen = 0;
	temp = new wchar_t[streamlen + outlen + 1];

	if (*out != nullptr) {
		for (dest = temp; *src != (wchar_t)0; src++, dest++)				//copy old string from *out to temp
			*dest = *src;

		delete[] *out;
	}
	else dest = temp;
	*out = temp;

	if (Index == -1) {
		if (!MultiByteToWideChar(cp, 0, stream, -1, dest, streamlen))
			return 0;
	}
	else {
		if (!MultiByteToWideChar(cp, MB_USEGLYPHCHARS, stream, -1, dest, streamlen))
			return 0;
	}
	return 1;
}

/////////////////////////////////////////////////////////////////////////////////////////
// Converts string from MIME header to unicode
// stream- input string
// cp- codepage of input string
// storeto- pointer to memory that contains unicode string
// mode- MIME_PLAIN or MIME_MAIL (MIME_MAIL deletes '"' from start and end of string)

CMStringW ConvertCodedStringToUnicode(char *stream, uint32_t cp, int mode)
{
	char *start = stream, *finder, *finderend;
	char Encoding = 0;
	CMStringW ret;

	if (stream == nullptr)
		return ret;

	SkipSpaces(start);

	while (*start != 0) {
		if (CODES(start)) {
			finder = start + 2; finderend = finder;
			while (!CODED(finderend) && !EOS(finderend)) finderend++;
			start = finderend;
			if (CODED(finderend)) {
				Encoding = *(finderend + 1);
				switch (Encoding) {
				case 'b':
				case 'B':
				case 'q':
				case 'Q':
					break;
				default:
					goto NotEncoded;
				}
				if (-1 == (cp = (uint32_t)GetCharsetFromString(finder, finderend - finder)))
					cp = CP_ACP;
				if (Encoding != 0) {
					int codeend;
					char *pcodeend = nullptr;

					finder = finderend + 2;
					if (CODED(finder))
						finder++;
					SkipSpaces(finder);
					finderend = finder;
					while (!CODEE(finderend) && !EOS(finderend))
						finderend++;
					if (codeend = CODEE(finderend))
						pcodeend = finderend;
					while (WS(finderend - 1)) finderend--;
					if ((mode == MIME_MAIL) && (((*finder == '"') && (*(finderend - 1) == '"')))) {
						finder++;
						finderend--;
					}
					char *oneWordEncoded = new char[finderend - finder + 1];
					strncpy(oneWordEncoded, finder, finderend - finder);
					oneWordEncoded[finderend - finder] = 0;

					ptrA DecodedResult;
					switch (Encoding) {
					case 'b':
					case 'B':
						DecodedResult = (char*)mir_base64_decode(oneWordEncoded, 0);
						break;
					case 'q':
					case 'Q':
						int size = finderend - finder + 1 + 1;
						DecodedResult = (char*)mir_alloc(size + 1);
						DecodeQuotedPrintable(oneWordEncoded, DecodedResult, size, TRUE);
						break;
					}
					delete[] oneWordEncoded;
					if (codeend)
						finderend = pcodeend + 2;
					
					// if string continues and there's some whitespace, add space to string that is to be converted
					if (WS(finderend))
						finderend++;

					wchar_t *oneWord = nullptr;
					if (ConvertStringToUnicode(DecodedResult, cp, &oneWord)) {
						ret.Append(oneWord);
						delete oneWord;
					}
					start = finderend;
				}
				else if (!EOS(start))
					start++;
			}
			else if (!EOS(start))
				start++;
		}
		else {
NotEncoded:
			ret.AppendChar(*start);
			start++;
		}
	}

	return ret;
}