/*
* This code implements decoding encoded MIME header in style
* =?iso-8859-2?Q? "User using email in central Europe characters such as =E9" ?=
*
* (c) majvan 2002-2004
*/
#include "../stdafx.h"
struct _tcptable CodePageNamesAll[] =
{
{ "ANSI", "", TRUE, CP_ACP },
{ "WINDOWS-1", "250", 0, 1250 },
{ "WINDOWS-1", "251", 0, 1251 },
{ "WINDOWS-1", "252", 0, 1252 },
{ "WINDOWS-1", "253", 0, 1253 },
{ "WINDOWS-1", "254", 0, 1254 },
{ "WINDOWS-1", "255", 0, 1255 },
{ "WINDOWS-1", "256", 0, 1256 },
{ "WINDOWS-1", "257", 0, 1257 },
{ "WINDOWS-1", "258", 0, 1258 },
{ "CP1", "250", 0, 1250 },
{ "CP1", "251", 0, 1251 },
{ "CP1", "252", 0, 1252 },
{ "CP1", "253", 0, 1253 },
{ "CP1", "254", 0, 1254 },
{ "CP1", "255", 0, 1255 },
{ "CP1", "256", 0, 1256 },
{ "CP1", "257", 0, 1257 },
{ "CP1", "258", 0, 1258 },
{ "ANSI-1", "250", 0, 1250 },
{ "ANSI-1", "251", 0, 1251 },
{ "ANSI-1", "252", 0, 1252 },
{ "ANSI-1", "253", 0, 1253 },
{ "ANSI-1", "254", 0, 1254 },
{ "ANSI-1", "255", 0, 1255 },
{ "ANSI-1", "256", 0, 1256 },
{ "ANSI-1", "257", 0, 1257 },
{ "ANSI-1", "258", 0, 1258 },
{ "KOI8", "-R", 0, 20866 },
{ "KOI8", "", 0, 20866 },
{ "KOI8", "-U", 0, 21866 },
{ "KOI8", "-RU", 0, 21866 },
{ "US-", "ASCII", 0, 20127 },
{ "CP", "367", 0, 20127 },
{ "ASCII", "", 0, 20127 },
{ "ASCII", "7", 0, 20127 },
{ "ISO-8859", "-1", 0, 28591 },
{ "ISO-8859", "-2", 0, 28592 },
{ "ISO-8859", "-3", 0, 28593 },
{ "ISO-8859", "-4", 0, 28594 },
{ "ISO-8859", "-5", 0, 28595 },
{ "ISO-8859", "-6", 0, 28596 },
{ "ISO-8859", "-7", 0, 28597 },
{ "ISO-8859", "-8", 0, 28598 },
{ "ISO-8859", "-9", 0, 28599 },
{ "ISO-8859", "-15", 0, 28605 },
{ "ISO_8859", "-1", 0, 28591 },
{ "ISO_8859", "-2", 0, 28592 },
{ "ISO_8859", "-3", 0, 28593 },
{ "ISO_8859", "-4", 0, 28594 },
{ "ISO_8859", "-5", 0, 28595 },
{ "ISO_8859", "-6", 0, 28596 },
{ "ISO_8859", "-7", 0, 28597 },
{ "ISO_8859", "-8", 0, 28598 },
{ "ISO_8859", "-9", 0, 28599 },
{ "ISO_8859", "-15", 0, 28605 },
{ "ISO-", "10646-USC2", 0, 1200 },
{ "ISO-2022", "/2-JP", 0, 50220 },
{ "ISO-2022", "-JP", 0, 50221 },
{ "ISO-2022", "/JIS-JP", 0, 50222 },
{ "ISO-2022", "-KR", 0, 50225 },
{ "ISO-2022", "-CH(SP)", 0, 50227 },
{ "ISO-2022", "-CH(TR)", 0, 50229 },
{ "UTF-", "7", 0, 65000 },
{ "UTF-", "8", 0, 65001 },
{ "ARAB-", "TRANSPARENT", 0, 710 },
{ "ASMO-", "TRANSPARENT", 0, 720 },
{ "ASMO-", "449", 0, 709 },
{ "ASMO-", "708", 0, 708 },
{ "BIG5", "", 0, 950 },
{ "EUC-", "CH(SP)", 0, 51936 },
{ "EUC-", "CH(TR)", 0, 51950 },
{ "EUC-", "JP", 0, 51932 },
{ "EUC-", "KR", 0, 51949 },
{ "GB-", "2312", 0, 20936 },
{ "GB", "2312", 0, 20936 },
{ "HZGB-", "2312", 0, 52936 },
{ "IBM-", "037", 0, 37 },
{ "IBM-", "290", 0, 290 },
{ "IBM-", "437", 0, 437 },
{ "IBM-", "500", 0, 500 },
{ "IBM-", "775", 0, 775 },
{ "IBM-", "850", 0, 850 },
{ "IBM-", "852", 0, 852 },
{ "IBM-", "855", 0, 855 },
{ "IBM-", "857", 0, 857 },
{ "IBM-", "860", 0, 860 },
{ "IBM-", "861", 0, 861 },
{ "IBM-", "862", 0, 862 },
{ "IBM-", "863", 0, 863 },
{ "IBM-", "864", 0, 864 },
{ "IBM-", "865", 0, 865 },
{ "IBM-", "866", 0, 866 },
{ "IBM-", "869", 0, 869 },
{ "IBM-", "870", 0, 870 },
{ "IBM-", "875", 0, 875 },
{ "IBM-", "1026", 0, 1026 },
{ "IBM-", "273", 0, 20273 },
{ "IBM-", "277", 0, 20277 },
{ "IBM-", "278", 0, 20278 },
{ "IBM-", "280", 0, 20280 },
{ "IBM-", "284", 0, 20284 },
{ "IBM-", "285", 0, 20285 },
{ "IBM-", "290", 0, 20290 },
{ "IBM-", "297", 0, 20297 },
{ "IBM-", "420", 0, 20420 },
{ "IBM-", "423", 0, 20423 },
{ "IBM-", "871", 0, 20871 },
{ "IBM-", "880", 0, 20880 },
{ "IBM-", "905", 0, 20905 },
{ "IBM-", "THAI", 0, 20838 },
{ "ISCII-", "DEVANAGARI", 0, 57002 },
{ "ISCII-", "BENGALI", 0, 57003 },
{ "ISCII-", "TAMIL", 0, 57004 },
{ "ISCII-", "TELUGU", 0, 57005 },
{ "ISCII-", "ASSAMESE", 0, 57006 },
{ "ISCII-", "ORIYA", 0, 57007 },
{ "ISCII-", "KANNADA", 0, 57008 },
{ "ISCII-", "MALAYALAM", 0, 57009 },
{ "ISCII-", "GUJARATI", 0, 57010 },
{ "ISCII-", "PUNJABI", 0, 57011 },
{ "KOR-", "JOHAB", 0, 1361 },
{ "KSC-", "5601", 0, 1361 },
{ "MAC-", "ROMAN", 0, 10000 },
{ "MAC-", "JP", 0, 10001 },
{ "MAC-", "CH(SP)(BIG5)", 0, 10002 },
{ "MAC-", "KR", 0, 10003 },
{ "MAC-", "AR", 0, 10004 },
{ "MAC-", "HW", 0, 10005 },
{ "MAC-", "GR", 0, 10006 },
{ "MAC-", "CY", 0, 10007 },
{ "MAC-", "CH(SP)(GB2312)", 0, 10008 },
{ "MAC-", "ROMANIA", 0, 10010 },
{ "MAC-", "UA", 0, 10017 },
{ "MAC-", "TH", 0, 10021 },
{ "MAC-", "LAT2", 0, 10029 },
{ "MAC-", "ICE", 0, 10079 },
{ "MAC-", "TR", 0, 10081 },
{ "MAC-", "CR", 0, 10082 }
};
int CPLENALL = _countof(CodePageNamesAll);
struct _tcptable *CodePageNamesSupp;
int CPLENSUPP = 1;
void SkipNonSpaces(char *&p)
{
while (!WS(p))
p++;
}
void SkipSpaces(char *&p)
{
while (WS(p))
p++;
}
/////////////////////////////////////////////////////////////////////////////////////////
// Gets codepage ID from string representing charset such as "iso-8859-1"
// input- the string
// size- max length of input string
int GetCharsetFromString(char *input, size_t size)
{
char *pin = input;
char *pout, *parser;
if ((size < 1) || (parser = pout = new char[size + 1]) == nullptr)
return -1;
while ((*pin != 0) && (pin - input < (INT_PTR)size)) {
if ((*pin >= 'a') && (*pin <= 'z'))
*parser++ = *(pin++) - ('a' - 'A'); // make it capital
else
*parser++ = *pin++;
}
*parser = 0;
#ifdef DEBUG_DECODECODEPAGE
mir_writeLogA(DecodeFile, "%s", pout);
#endif
for (int i = 0; i < CPLENALL; i++) {
size_t len = mir_strlen(CodePageNamesAll[i].NameBase);
if (0 == strncmp(pout, CodePageNamesAll[i].NameBase, len)) {
if (0 == mir_strcmp(pout + len, CodePageNamesAll[i].NameSub)) {
delete[] pout;
return CodePageNamesAll[i].CP;
}
}
}
delete[] pout;
return -1; //not found
}
/////////////////////////////////////////////////////////////////////////////////////////
// HexValue to DecValue ('a' to 10)
// HexValue- hexa value ('a')
// DecValue- poiner where to store dec value
// returns 0 if not success
int FromHexa(char HexValue, char *DecValue)
{
if (HexValue >= '0' && HexValue <= '9') {
*DecValue = HexValue - '0';
return 1;
}
if (HexValue >= 'A' && HexValue <= 'F') {
*DecValue = HexValue - 'A' + 10;
return 1;
}
if (HexValue >= 'a' && HexValue <= 'f') {
*DecValue = HexValue - 'a' + 10;
return 1;
}
return 0;
}
/////////////////////////////////////////////////////////////////////////////////////////
// Decodes string in quoted printable
// Src- input string
// Dst- where to store output string
// DstLen- how max long should be output string
// isQ- if is "Q-encoding" modification. should be TRUE in headers
// always returns 1
int DecodeQuotedPrintable(char *Src, char *Dst, int DstLen, BOOL isQ)
{
#ifdef DEBUG_DECODEQUOTED
char *DstTemp = Dst;
mir_writeLogA(DecodeFile, "%s", Src);
#endif
for (auto *Limit = Dst + DstLen; *Src != 0 && Dst < Limit; Src++) {
if (*Src == '=') {
Src++;
if (*Src == 0)
break;
if (!isQ) {
if (*Src == '\r') {
if (Src[1] == '\n')
Src++;
continue;
}
if (*Src == '\n')
continue;
}
char First, Second;
if (!FromHexa(Src[0], &First)) {
*Dst++ = '=';
continue;
}
if (!FromHexa(Src[1], &Second)) {
*Dst++ = '='; Src--;
continue;
}
*Dst++ = ((char)(First) << 4) + Second;
Src++;
}
else if (isQ && *Src == '_')
*Dst++ = ' ';
else
*Dst++ = *Src;
}
*Dst = 0;
#ifdef DEBUG_DECODEQUOTED
mir_writeLogA(DecodeFile, "", DstTemp);
#endif
return 1;
}
/////////////////////////////////////////////////////////////////////////////////////////
// Converts string to unicode from string with specified codepage
// stream- input string
// cp- codepage of input string
// out- pointer to new allocated memory that contains unicode string
int ConvertStringToUnicode(char *stream, unsigned int cp, wchar_t **out)
{
CPINFO CPInfo;
wchar_t *temp, *src = *out, *dest;
size_t outlen;
int streamlen, Index;
//codepages, which require to have set 0 in dwFlags parameter when calling MultiByteToWideChar
uint32_t CodePagesZeroFlags[] = {50220, 50221, 50222, 50225, 50227, 50229, 52936, 54936, 57002, 57003, 57004, 57005, 57006, 57007, 57008, 57009, 57010, 57011, 65000, 65001};
if ((cp != CP_ACP) && (cp != CP_OEMCP) && (cp != CP_MACCP) && (cp != CP_THREAD_ACP) && (cp != CP_SYMBOL) && (cp != CP_UTF7) && (cp != CP_UTF8) && !GetCPInfo(cp, &CPInfo))
cp = CP_ACP;
#ifdef DEBUG_DECODECODEPAGE
mir_writeLogA(DecodeFile, "%d", cp);
#endif
for (Index = 0; Index < sizeof(CodePagesZeroFlags) / sizeof(CodePagesZeroFlags[0]); Index++)
if (CodePagesZeroFlags[Index] == cp) {
Index = -1;
break;
}
if (Index == -1)
streamlen = MultiByteToWideChar(cp, 0, stream, -1, nullptr, 0);
else
streamlen = MultiByteToWideChar(cp, MB_USEGLYPHCHARS, stream, -1, nullptr, 0);
if (*out != nullptr)
outlen = mir_wstrlen(*out);
else
outlen = 0;
temp = new wchar_t[streamlen + outlen + 1];
if (*out != nullptr) {
for (dest = temp; *src != (wchar_t)0; src++, dest++) //copy old string from *out to temp
*dest = *src;
delete[] *out;
}
else dest = temp;
*out = temp;
if (Index == -1) {
if (!MultiByteToWideChar(cp, 0, stream, -1, dest, streamlen))
return 0;
}
else {
if (!MultiByteToWideChar(cp, MB_USEGLYPHCHARS, stream, -1, dest, streamlen))
return 0;
}
return 1;
}
/////////////////////////////////////////////////////////////////////////////////////////
// Converts string from MIME header to unicode
// stream- input string
// cp- codepage of input string
// storeto- pointer to memory that contains unicode string
// mode- MIME_PLAIN or MIME_MAIL (MIME_MAIL deletes '"' from start and end of string)
CMStringW ConvertCodedStringToUnicode(char *stream, uint32_t cp, int mode)
{
char *start = stream, *finder, *finderend;
char Encoding = 0;
CMStringW ret;
if (stream == nullptr)
return ret;
SkipSpaces(start);
while (*start != 0) {
if (CODES(start)) {
finder = start + 2; finderend = finder;
while (!CODED(finderend) && !EOS(finderend)) finderend++;
start = finderend;
if (CODED(finderend)) {
Encoding = *(finderend + 1);
switch (Encoding) {
case 'b':
case 'B':
case 'q':
case 'Q':
break;
default:
goto NotEncoded;
}
if (-1 == (cp = (uint32_t)GetCharsetFromString(finder, finderend - finder)))
cp = CP_ACP;
if (Encoding != 0) {
int codeend;
char *pcodeend = nullptr;
finder = finderend + 2;
if (CODED(finder))
finder++;
SkipSpaces(finder);
finderend = finder;
while (!CODEE(finderend) && !EOS(finderend))
finderend++;
if (codeend = CODEE(finderend))
pcodeend = finderend;
while (WS(finderend - 1)) finderend--;
if ((mode == MIME_MAIL) && (((*finder == '"') && (*(finderend - 1) == '"')))) {
finder++;
finderend--;
}
char *oneWordEncoded = new char[finderend - finder + 1];
strncpy(oneWordEncoded, finder, finderend - finder);
oneWordEncoded[finderend - finder] = 0;
ptrA DecodedResult;
switch (Encoding) {
case 'b':
case 'B':
DecodedResult = (char*)mir_base64_decode(oneWordEncoded, 0);
break;
case 'q':
case 'Q':
int size = finderend - finder + 1 + 1;
DecodedResult = (char*)mir_alloc(size + 1);
DecodeQuotedPrintable(oneWordEncoded, DecodedResult, size, TRUE);
break;
}
delete[] oneWordEncoded;
if (codeend)
finderend = pcodeend + 2;
// if string continues and there's some whitespace, add space to string that is to be converted
if (WS(finderend))
finderend++;
wchar_t *oneWord = nullptr;
if (ConvertStringToUnicode(DecodedResult, cp, &oneWord)) {
ret.Append(oneWord);
delete oneWord;
}
start = finderend;
}
else if (!EOS(start))
start++;
}
else if (!EOS(start))
start++;
}
else {
NotEncoded:
ret.AppendChar(*start);
start++;
}
}
return ret;
}