/* * This code implements decoding encoded MIME header in style * =?iso-8859-2?Q? "User using email in central Europe characters such as =E9" ?= * * (c) majvan 2002-2004 */ #include "../yamn.h" //-------------------------------------------------------------------------------------------------- //-------------------------------------------------------------------------------------------------- struct _tcptable CodePageNamesAll[]= { {_T("ANSI"),_T(""),TRUE,CP_ACP}, {_T("WINDOWS-1"),_T("250"),0,1250}, {_T("WINDOWS-1"),_T("251"),0,1251}, {_T("WINDOWS-1"),_T("252"),0,1252}, {_T("WINDOWS-1"),_T("253"),0,1253}, {_T("WINDOWS-1"),_T("254"),0,1254}, {_T("WINDOWS-1"),_T("255"),0,1255}, {_T("WINDOWS-1"),_T("256"),0,1256}, {_T("WINDOWS-1"),_T("257"),0,1257}, {_T("WINDOWS-1"),_T("258"),0,1258}, {_T("CP1"),_T("250"),0,1250}, {_T("CP1"),_T("251"),0,1251}, {_T("CP1"),_T("252"),0,1252}, {_T("CP1"),_T("253"),0,1253}, {_T("CP1"),_T("254"),0,1254}, {_T("CP1"),_T("255"),0,1255}, {_T("CP1"),_T("256"),0,1256}, {_T("CP1"),_T("257"),0,1257}, {_T("CP1"),_T("258"),0,1258}, {_T("ANSI-1"),_T("250"),0,1250}, {_T("ANSI-1"),_T("251"),0,1251}, {_T("ANSI-1"),_T("252"),0,1252}, {_T("ANSI-1"),_T("253"),0,1253}, {_T("ANSI-1"),_T("254"),0,1254}, {_T("ANSI-1"),_T("255"),0,1255}, {_T("ANSI-1"),_T("256"),0,1256}, {_T("ANSI-1"),_T("257"),0,1257}, {_T("ANSI-1"),_T("258"),0,1258}, {_T("KOI8"),_T("-R"),0,20866}, {_T("KOI8"),_T(""),0,20866}, {_T("KOI8"),_T("-U"),0,21866}, {_T("KOI8"),_T("-RU"),0,21866}, {_T("US-"),_T("ASCII"),0,20127}, {_T("CP"),_T("367"),0,20127}, {_T("ASCII"),_T(""),0,20127}, {_T("ASCII"),_T("7"),0,20127}, {_T("ISO-8859"),_T("-1"),0,28591}, {_T("ISO-8859"),_T("-2"),0,28592}, {_T("ISO-8859"),_T("-3"),0,28593}, {_T("ISO-8859"),_T("-4"),0,28594}, {_T("ISO-8859"),_T("-5"),0,28595}, {_T("ISO-8859"),_T("-6"),0,28596}, {_T("ISO-8859"),_T("-7"),0,28597}, {_T("ISO-8859"),_T("-8"),0,28598}, {_T("ISO-8859"),_T("-9"),0,28599}, {_T("ISO-8859"),_T("-15"),0,28605}, {_T("ISO_8859"),_T("-1"),0,28591}, {_T("ISO_8859"),_T("-2"),0,28592}, {_T("ISO_8859"),_T("-3"),0,28593}, {_T("ISO_8859"),_T("-4"),0,28594}, {_T("ISO_8859"),_T("-5"),0,28595}, {_T("ISO_8859"),_T("-6"),0,28596}, {_T("ISO_8859"),_T("-7"),0,28597}, {_T("ISO_8859"),_T("-8"),0,28598}, {_T("ISO_8859"),_T("-9"),0,28599}, {_T("ISO_8859"),_T("-15"),0,28605}, {_T("ISO-"),_T("10646-USC2"),0,1200}, {_T("ISO-2022"),_T("/2-JP"),0,50220}, {_T("ISO-2022"),_T("-JP"),0,50221}, {_T("ISO-2022"),_T("/JIS-JP"),0,50222}, {_T("ISO-2022"),_T("-KR"),0,50225}, {_T("ISO-2022"),_T("-CH(SP)"),0,50227}, {_T("ISO-2022"),_T("-CH(TR)"),0,50229}, {_T("UTF-"),_T("7"),0,65000}, {_T("UTF-"),_T("8"),0,65001}, {_T("ARAB-"),_T("TRANSPARENT"),0,710}, {_T("ASMO-"),_T("TRANSPARENT"),0,720}, {_T("ASMO-"),_T("449"),0,709}, {_T("ASMO-"),_T("708"),0,708}, {_T("BIG5"),_T(""),0,950}, {_T("EUC-"),_T("CH(SP)"),0,51936}, {_T("EUC-"),_T("CH(TR)"),0,51950}, {_T("EUC-"),_T("JP"),0,51932}, {_T("EUC-"),_T("KR"),0,51949}, {_T("GB-"),_T("2312"),0,20936}, {_T("GB"),_T("2312"),0,20936}, {_T("HZGB-"),_T("2312"),0,52936}, {_T("IBM-"),_T("037"),0,37}, {_T("IBM-"),_T("290"),0,290}, {_T("IBM-"),_T("437"),0,437}, {_T("IBM-"),_T("500"),0,500}, {_T("IBM-"),_T("775"),0,775}, {_T("IBM-"),_T("850"),0,850}, {_T("IBM-"),_T("852"),0,852}, {_T("IBM-"),_T("855"),0,855}, {_T("IBM-"),_T("857"),0,857}, {_T("IBM-"),_T("860"),0,860}, {_T("IBM-"),_T("861"),0,861}, {_T("IBM-"),_T("862"),0,862}, {_T("IBM-"),_T("863"),0,863}, {_T("IBM-"),_T("864"),0,864}, {_T("IBM-"),_T("865"),0,865}, {_T("IBM-"),_T("866"),0,866}, {_T("IBM-"),_T("869"),0,869}, {_T("IBM-"),_T("870"),0,870}, {_T("IBM-"),_T("875"),0,875}, {_T("IBM-"),_T("1026"),0,1026}, {_T("IBM-"),_T("273"),0,20273}, {_T("IBM-"),_T("277"),0,20277}, {_T("IBM-"),_T("278"),0,20278}, {_T("IBM-"),_T("280"),0,20280}, {_T("IBM-"),_T("284"),0,20284}, {_T("IBM-"),_T("285"),0,20285}, {_T("IBM-"),_T("290"),0,20290}, {_T("IBM-"),_T("297"),0,20297}, {_T("IBM-"),_T("420"),0,20420}, {_T("IBM-"),_T("423"),0,20423}, {_T("IBM-"),_T("871"),0,20871}, {_T("IBM-"),_T("880"),0,20880}, {_T("IBM-"),_T("905"),0,20905}, {_T("IBM-"),_T("THAI"),0,20838}, {_T("ISCII-"),_T("DEVANAGARI"),0,57002}, {_T("ISCII-"),_T("BENGALI"),0,57003}, {_T("ISCII-"),_T("TAMIL"),0,57004}, {_T("ISCII-"),_T("TELUGU"),0,57005}, {_T("ISCII-"),_T("ASSAMESE"),0,57006}, {_T("ISCII-"),_T("ORIYA"),0,57007}, {_T("ISCII-"),_T("KANNADA"),0,57008}, {_T("ISCII-"),_T("MALAYALAM"),0,57009}, {_T("ISCII-"),_T("GUJARATI"),0,57010}, {_T("ISCII-"),_T("PUNJABI"),0,57011}, {_T("KOR-"),_T("JOHAB"),0,1361}, {_T("KSC-"),_T("5601"),0,1361}, {_T("MAC-"),_T("ROMAN"),0,10000}, {_T("MAC-"),_T("JP"),0,10001}, {_T("MAC-"),_T("CH(SP)(BIG5)"),0,10002}, {_T("MAC-"),_T("KR"),0,10003}, {_T("MAC-"),_T("AR"),0,10004}, {_T("MAC-"),_T("HW"),0,10005}, {_T("MAC-"),_T("GR"),0,10006}, {_T("MAC-"),_T("CY"),0,10007}, {_T("MAC-"),_T("CH(SP)(GB2312)"),0,10008}, {_T("MAC-"),_T("ROMANIA"),0,10010}, {_T("MAC-"),_T("UA"),0,10017}, {_T("MAC-"),_T("TH"),0,10021}, {_T("MAC-"),_T("LAT2"),0,10029}, {_T("MAC-"),_T("ICE"),0,10079}, {_T("MAC-"),_T("TR"),0,10081}, {_T("MAC-"),_T("CR"),0,10082}, }; int CPLENALL = (sizeof(CodePageNamesAll)/sizeof(CodePageNamesAll[0])); struct _tcptable *CodePageNamesSupp; int CPLENSUPP = 1; //Gets codepage ID from string representing charset such as "iso-8859-1" // input- the string // size- max length of input string int GetCharsetFromString(char *input,size_t size); //HexValue to DecValue ('a' to 10) // HexValue- hexa value ('a') // DecValue- poiner where to store dec value // returns 0 if not success int FromHexa(char HexValue,char *DecValue); //Decodes a char from Base64 // Base64Value- input char in Base64 // DecValue- pointer where to store the result // returns 0 if not success int FromBase64(char Base64Value,char *DecValue); //Decodes string in quoted printable // Src- input string // Dst- where to store output string // DstLen- how max long should be output string // isQ- if is "Q-encoding" modification. should be TRUE in headers // always returns 1 int DecodeQuotedPrintable(char *Src,char *Dst,int DstLen, BOOL isQ); //Decodes string in base64 // Src- input string // Dst- where to store output string // DstLen- how max long should be output string // returns 0 if string was not properly decoded int DecodeBase64(char *Src,char *Dst,int DstLen); //Converts string to unicode from string with specified codepage // stream- input string // cp- codepage of input string // out- pointer to new allocated memory that contains unicode string int ConvertStringToUnicode(char *stream,unsigned int cp,WCHAR **out); //Converts string from MIME header to unicode // stream- input string // cp- codepage of input string // storeto- pointer to memory that contains unicode string // mode- MIME_PLAIN or MIME_MAIL (MIME_MAIL deletes '"' from start and end of string) void ConvertCodedStringToUnicode(char *stream,WCHAR **storeto,DWORD cp,int mode); //-------------------------------------------------------------------------------------------------- //-------------------------------------------------------------------------------------------------- int GetCharsetFromString(char *input,size_t size) //"ISO-8859-1" to ID from table { char *pin=input; char *pout,*parser; if((size<1) || (parser=pout=new char[size+1])==NULL) return -1; while((*pin!=0) && (pin-input< (INT_PTR)size)) { if ((*pin>='a') && (*pin<='z')) *parser++=*(pin++)-('a'-'A'); // make it capital //else if(*pin=='\"') // this is already done in ExtractFromContentType // *pin++; //skip the quotes if any else *parser++=*pin++; } *parser=(char)0; #ifdef DEBUG_DECODECODEPAGE DebugLog(DecodeFile,"%s",pout); #endif for(int i=0;i='0' && HexValue<='9') { *DecValue=HexValue-'0'; return 1; } if(HexValue>='A' && HexValue<='F') { *DecValue=HexValue-'A'+10; return 1; } if(HexValue>='a' && HexValue<='f') { *DecValue=HexValue-'a'+10; return 1; } return 0; } int FromBase64(char Base64Value,char *DecValue) { if(Base64Value>='A' && Base64Value<='Z') { *DecValue=Base64Value-'A'; return 1; } if(Base64Value>='a' && Base64Value<='z') { *DecValue=Base64Value-'a'+26; return 1; } if(Base64Value>='0' && Base64Value<='9') { *DecValue=Base64Value-'0'+52; return 1; } if(Base64Value=='+') { *DecValue=Base64Value-'+'+62; return 1; } if(Base64Value=='/') { *DecValue=Base64Value-'/'+63; return 1; } if(Base64Value=='=') { *DecValue=0; return 1; } return 0; } int DecodeQuotedPrintable(char *Src,char *Dst,int DstLen, BOOL isQ) { #ifdef DEBUG_DECODEQUOTED char *DstTemp=Dst; DebugLog(DecodeFile,"%s",Src); #endif for(int Counter=0;((char)*Src!=0) && DstLen && (Counter++%s",DstTemp); #endif return 1; } int DecodeBase64(char *Src,char *Dst,int DstLen) { int Result=0; char Locator=0,MiniResult[4]; char *End=Dst+DstLen; MiniResult[0]=MiniResult[1]=MiniResult[2]=MiniResult[3]=0; #ifdef DEBUG_DECODEBASE64 char *DstTemp=Dst; DebugLog(DecodeFile,"\n%s\n\n",Src); #endif while(*Src!=0 && DstLen && Dst!=End) { if ((*Src==0x0D)||(*Src==0x0A)) { Src++; continue; } if((!(Result=FromBase64(*Src,MiniResult+Locator)) && (*Src==0)) || Locator++==3) //end_of_str || end_of_4_bytes { Locator=0; //next write to the first byte *Dst++=(char)((MiniResult[0]<<2) | (MiniResult[1]>>4)); if(Dst==End) goto end; //DstLen exceeded? *Dst++=(char)((MiniResult[1]<<4) | (MiniResult[2]>>2)); if(Dst==End) goto end; //someones don't like goto, but not me *Dst++=(char)((MiniResult[2]<<6) | MiniResult[3]); if(!Result && (*Src==0)) goto end; //end of string? MiniResult[0]=MiniResult[1]=MiniResult[2]=MiniResult[3]=0; //zero 4byte buffer for next loop } if(!Result) return 0; //unrecognised character occured Src++; } end: *Dst=0; #ifdef DEBUG_DECODEBASE64 DebugLog(DecodeFile,"\n%s\n",DstTemp); #endif return 1; } int ConvertStringToUnicode(char *stream,unsigned int cp,WCHAR **out) { CPINFO CPInfo; WCHAR *temp,*src=*out,*dest; size_t outlen; int streamlen,Index; //codepages, which require to have set 0 in dwFlags parameter when calling MultiByteToWideChar DWORD CodePagesZeroFlags[]={50220,50221,50222,50225,50227,50229,52936,54936,57002,57003,57004,57005,57006,57007,57008,57009,57010,57011,65000,65001}; if((cp!=CP_ACP) && (cp!=CP_OEMCP) && (cp!=CP_MACCP) && (cp!=CP_THREAD_ACP) && (cp!=CP_SYMBOL) && (cp!=CP_UTF7) && (cp!=CP_UTF8) && !GetCPInfo(cp,&CPInfo)) cp=CP_ACP; #ifdef DEBUG_DECODECODEPAGE DebugLog(DecodeFile,"%d",cp); #endif for(Index=0;Index tempstoreLength) break; start++; } } tempstore[outind] = 0; *storeto = tempstore; }