From d6edd541b10926aee0e9442d6577bf8213e0f757 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Robert=20P=C3=B6sel?= <robyer@seznam.cz>
Date: Mon, 5 Nov 2012 16:02:13 +0000
Subject: Facebook: Replace #&... html codes to chars (fixes #32)

git-svn-id: http://svn.miranda-ng.org/main/trunk@2208 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
---
 protocols/FacebookRM/src/utils.cpp | 286 ++++++++++++++++++++-----------------
 1 file changed, 156 insertions(+), 130 deletions(-)

(limited to 'protocols/FacebookRM/src/utils.cpp')
diff --git a/protocols/FacebookRM/src/utils.cpp b/protocols/FacebookRM/src/utils.cpp
index 8e33b26e01..999cb69c56 100644
--- a/protocols/FacebookRM/src/utils.cpp
+++ b/protocols/FacebookRM/src/utils.cpp
@@ -24,10 +24,9 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 std::string utils::url::encode(const std::string &s)
 {
-	char *encoded = reinterpret_cast<char*>(CallService( MS_NETLIB_URLENCODE,
-		0,reinterpret_cast<LPARAM>(s.c_str())));
+	char *encoded = reinterpret_cast<char*>(CallService(MS_NETLIB_URLENCODE, 0, reinterpret_cast<LPARAM>(s.c_str())));
 	std::string ret = encoded;
-	HeapFree(GetProcessHeap(),0,encoded);
+	HeapFree(GetProcessHeap(), 0, encoded);
 
 	return ret;
 }
@@ -35,30 +34,30 @@ std::string utils::url::encode(const std::string &s)
 std::string utils::url::decode(std::string data)
 {
 	// TODO: Better and universal method?
-	utils::text::replace_all( &data, "%2F", "/" );
-	utils::text::replace_all( &data, "%3F", "?" );
-	utils::text::replace_all( &data, "%3D", "=" );
-	utils::text::replace_all( &data, "%26", "&" );
+	utils::text::replace_all(&data, "%2F", "/");
+	utils::text::replace_all(&data, "%3F", "?");
+	utils::text::replace_all(&data, "%3D", "=");
+	utils::text::replace_all(&data, "%26", "&");
 
 	return data;
 }
 
-std::string utils::time::unix_timestamp( )
+std::string utils::time::unix_timestamp()
 {
-	time_t in = ::time( NULL );
-	return utils::conversion::to_string( ( void* )&in, UTILS_CONV_TIME_T );
+	time_t in = ::time(NULL);
+	return utils::conversion::to_string((void*)&in, UTILS_CONV_TIME_T);
 }
 
-std::string utils::time::mili_timestamp( )
+std::string utils::time::mili_timestamp()
 {
 	SYSTEMTIME st;
 	std::string timestamp = utils::time::unix_timestamp();
 	GetSystemTime(&st);
-	timestamp.append(utils::conversion::to_string( ( void* )&st.wMilliseconds, UTILS_CONV_UNSIGNED_NUMBER ));
+	timestamp.append(utils::conversion::to_string((void*)&st.wMilliseconds, UTILS_CONV_UNSIGNED_NUMBER));
 	return timestamp;
 }
 
-DWORD utils::time::fix_timestamp( double mili_timestamp )
+DWORD utils::time::fix_timestamp(double mili_timestamp)
 {
 	// If it is really mili_timestamp
 	if (mili_timestamp > 100000000000) {
@@ -67,7 +66,7 @@ DWORD utils::time::fix_timestamp( double mili_timestamp )
 	return (DWORD) mili_timestamp;
 }
 
-DWORD utils::conversion::to_timestamp( std::string data )
+DWORD utils::conversion::to_timestamp(std::string data)
 {
 	DWORD timestamp = NULL;
 	if (!utils::conversion::from_string<DWORD>(timestamp, data, std::dec)) {
@@ -76,57 +75,57 @@ DWORD utils::conversion::to_timestamp( std::string data )
 	return timestamp;
 }
 
-std::string utils::conversion::to_string( void* data, WORD type )
+std::string utils::conversion::to_string(void* data, WORD type)
 {
 	std::stringstream out;
 
-	switch ( type )
+	switch (type)
 	{
   	case UTILS_CONV_BOOLEAN:
 		out << (data ? "true" : "false");
 
     case UTILS_CONV_TIME_T:
-		out << (*( time_t* )data);
+		out << (*(time_t*)data);
 		break;
 
 	case UTILS_CONV_SIGNED_NUMBER:
-  		out << (*( signed int* )data);
+  		out << (*(signed int*)data);
 		break;
 
 	case UTILS_CONV_UNSIGNED_NUMBER:
-		out << (*( unsigned int* )data);
+		out << (*(unsigned int*)data);
 		break;
 	}
 
-	return out.str( );
+	return out.str();
 }
 
-void utils::text::replace_first( std::string* data, std::string from, std::string to )
+void utils::text::replace_first(std::string* data, std::string from, std::string to)
 {
 	std::string::size_type position = data->find(from);
-	if ( position != std::string::npos )
+	if (position != std::string::npos)
 	{
-		data->replace( position, from.size(), to );
+		data->replace(position, from.size(), to);
 	}
 }
 
-void utils::text::replace_all( std::string* data, std::string from, std::string to )
+void utils::text::replace_all(std::string* data, std::string from, std::string to)
 {
 	std::string::size_type position = 0;
 
-	while ( ( position = data->find( from, position )) != std::string::npos )
+	while ((position = data->find(from, position)) != std::string::npos)
 	{
-		data->replace( position, from.size(), to );
+		data->replace(position, from.size(), to);
 		position++;
 	}
 }
 
-unsigned int utils::text::count_all( std::string* data, std::string term )
+unsigned int utils::text::count_all(std::string* data, std::string term)
 {
 	unsigned int count = 0;
 	std::string::size_type position = 0;
 
-	while ( ( position = data->find( term, position )) != std::string::npos )
+	while ((position = data->find(term, position)) != std::string::npos)
 	{
 		count++;
 		position++;
@@ -135,20 +134,37 @@ unsigned int utils::text::count_all( std::string* data, std::string term )
 	return count;
 }
 
-std::string utils::text::special_expressions_decode( std::string data )
+void utils::text::append_ordinal(unsigned int value, std::string* data)
 {
-	utils::text::replace_all( &data, "&amp;", "&" );
-	utils::text::replace_all( &data, "&quot;", "\"" );
-	utils::text::replace_all( &data, "&#039;", "'" );
-	utils::text::replace_all( &data, "&#64;", "@" );
-	utils::text::replace_all( &data, "&lt;", "<" );
-	utils::text::replace_all( &data, "&gt;", ">" );
+	if (value >= 128 && value <= 2047)
+	{ // U+0080 .. U+07FF
+		*data += (char)(192 + (value / 64));
+		*data += (char)(128 + (value % 64));
+	} 
+	else if (value >= 2048 && value <= 65535)
+	{ // U+0800 .. U+FFFF
+		*data += (char)(224 + (value / 4096));
+		*data += (char)(128 + ((value / 64) % 64));
+		*data += (char)(128 + (value % 64));
+	}
+	else if (value <= 127)
+	{ // U+0000 .. U+007F
+		*data += (char)value;
+	}
+}
 
-	utils::text::replace_all( &data, "&hearts;", "\xE2\x99\xA5" ); // direct byte replacement
-//	utils::text::replace_all( &data, "&hearts;", "\\u2665" );      // indirect slashu replacement
+std::string utils::text::special_expressions_decode(std::string data)
+{
+	utils::text::replace_all(&data, "&amp;", "&");
+	utils::text::replace_all(&data, "&quot;", "\"");
+	utils::text::replace_all(&data, "&lt;", "<");
+	utils::text::replace_all(&data, "&gt;", ">");
+
+	utils::text::replace_all(&data, "&hearts;", "\xE2\x99\xA5"); // direct byte replacement
+//	utils::text::replace_all(&data, "&hearts;", "\\u2665");      // indirect slashu replacement
 
-	utils::text::replace_all( &data, "\\/", "/" );
-	utils::text::replace_all( &data, "\\\\", "\\" );
+	utils::text::replace_all(&data, "\\/", "/");
+	utils::text::replace_all(&data, "\\\\", "\\");
 
 	// TODO: Add more to comply general usage
 	// http://www.utexas.edu/learn/html/spchar.html
@@ -158,25 +174,51 @@ std::string utils::text::special_expressions_decode( std::string data )
 	// http://www.w3schools.com/tags/ref_entities.asp
 	// http://www.natural-innovations.com/wa/doc-charset.html
 	// http://webdesign.about.com/library/bl_htmlcodes.htm
-	
-	return data;
+
+	std::string new_string = "";
+	for (std::string::size_type i = 0; i < data.length(); i++)
+	{
+		if (data.at(i) == '&' && (i+1) < data.length() && data.at(i+1) == '#')
+		{
+			unsigned int udn;
+			std::string::size_type comma = data.find(";", i);
+			if (comma != std::string::npos) {
+				bool hexa = false;
+				if ((i+2) < data.length() && data.at(i+2) == 'x') {
+					hexa = true;
+					i += 3;
+				} else {
+					i += 2;
+				}
+				udn = strtol(data.substr(i, comma-i).c_str(), NULL, hexa ? 16 : 10);
+				i = comma;
+			}
+			
+			utils::text::append_ordinal(udn, &new_string);
+			continue;
+		}
+
+		new_string += data.at(i);
+	}
+
+	return new_string;
 }
 
-std::string utils::text::edit_html( std::string data )
+std::string utils::text::edit_html(std::string data)
 {
 	std::string::size_type end = 0;
 	std::string::size_type start = 0;
 	std::string new_string = "";
   
-	while ( end != std::string::npos )
+	while (end != std::string::npos)
 	{
-		end = data.find( "<span class=\\\"text_exposed_hide", start );
-		if ( end != std::string::npos )
+		end = data.find("<span class=\\\"text_exposed_hide", start);
+		if (end != std::string::npos)
 		{
-			new_string += data.substr( start, end - start );
-			start = data.find( "<\\/span", end );
+			new_string += data.substr(start, end - start);
+			start = data.find("<\\/span", end);
 		} else {
-			new_string += data.substr( start, data.length() - start );
+			new_string += data.substr(start, data.length() - start);
 		}
 	}
 
@@ -184,15 +226,15 @@ std::string utils::text::edit_html( std::string data )
 	data = new_string;
 	new_string = "";
 
-	while ( end != std::string::npos )
+	while (end != std::string::npos)
 	{
-		end = data.find( "<span class=\\\"uiTooltipText", start );
-		if ( end != std::string::npos )
+		end = data.find("<span class=\\\"uiTooltipText", start);
+		if (end != std::string::npos)
 		{
-			new_string += data.substr( start, end - start );
-			start = data.find( "<\\/span", end );
+			new_string += data.substr(start, end - start);
+			start = data.find("<\\/span", end);
 		} else {
-			new_string += data.substr( start, data.length() - start );
+			new_string += data.substr(start, data.length() - start);
 		}
 	}
 
@@ -200,64 +242,64 @@ std::string utils::text::edit_html( std::string data )
 	start = end = 0;
 	data = new_string;
 	new_string = "";
-	while ( end != std::string::npos )
+	while (end != std::string::npos)
 	{
-		end = data.find( "translate_story_link\\\">", start );
-		if ( end != std::string::npos )
+		end = data.find("translate_story_link\\\">", start);
+		if (end != std::string::npos)
 		{
-			new_string += data.substr( start, end - start );
-			start = data.find( "<\\/div", end );
+			new_string += data.substr(start, end - start);
+			start = data.find("<\\/div", end);
 		} else {
-			new_string += data.substr( start, data.length() - start );
+			new_string += data.substr(start, data.length() - start);
 		}
 	}
 
 	// Append newline after attachement title
-	start = new_string.find( "class=\\\"uiAttachmentTitle", 0 );
-	if ( start != std::string::npos )
+	start = new_string.find("class=\\\"uiAttachmentTitle", 0);
+	if (start != std::string::npos)
 	{
-		data = new_string.substr( 0, start );
-		data = utils::text::trim( data );
+		data = new_string.substr(0, start);
+		data = utils::text::trim(data);
 
-		start = new_string.find( ">", start );
-		if ( start != std::string::npos )
+		start = new_string.find(">", start);
+		if (start != std::string::npos)
 			new_string.insert(start+1, "\n\n");
 
-		start = new_string.find( "<\\/div>", start );
-		if ( start != std::string::npos )
+		start = new_string.find("<\\/div>", start);
+		if (start != std::string::npos)
 			new_string.insert(start, "\n");
 	}
 
 	// Append newline between attachement link and description
-	start = new_string.find( "uiAttachmentDesc", 0 );
-	if ( start != std::string::npos )
+	start = new_string.find("uiAttachmentDesc", 0);
+	if (start != std::string::npos)
 	{
-		start = new_string.find( ">", start );
-		if ( start != std::string::npos )
+		start = new_string.find(">", start);
+		if (start != std::string::npos)
 			new_string.insert(start+1, "\n");
 
-		start = new_string.find( "<\\/div>", start );
-		if ( start != std::string::npos )
+		start = new_string.find("<\\/div>", start);
+		if (start != std::string::npos)
 			new_string.insert(start, "\n");
 	}
   
-	utils::text::replace_all( &new_string, "<br \\/>", "\n" );
-	utils::text::replace_all( &new_string, "\n\n\n", "\n\n" );
-	//utils::text::replace_all( &new_string, "\\t", "" );
-	//utils::text::replace_all( &new_string, "\\n", "" );
+	utils::text::replace_all(&new_string, "<br \\/>", "\n");
+	utils::text::replace_all(&new_string, "\n\n\n", "\n\n");
+	//utils::text::replace_all(&new_string, "\\t", "");
+	//utils::text::replace_all(&new_string, "\\n", "");
 	return new_string;
 }
 
 
-std::string utils::text::remove_html( std::string data )
+std::string utils::text::remove_html(std::string data)
 {
 	std::string new_string = "";
 
-	for ( std::string::size_type i = 0; i < data.length( ); i++ )
+	for (std::string::size_type i = 0; i < data.length(); i++)
 	{
-		if ( data.at(i) == '<' && data.at(i+1) != ' ' )
+		if (data.at(i) == '<' && data.at(i+1) != ' ')
 		{
-			i = data.find( ">", i );
+			i = data.find(">", i);
 			if (i == std::string::npos)
 				break;
 
@@ -270,32 +312,16 @@ std::string utils::text::remove_html( std::string data )
 	return new_string;
 }
 
-std::string utils::text::slashu_to_utf8( std::string data )
+std::string utils::text::slashu_to_utf8(std::string data)
 {
 	std::string new_string = "";
 
-	for ( std::string::size_type i = 0; i < data.length( ); i++ )
+	for (std::string::size_type i = 0; i < data.length(); i++)
 	{
-		if ( data.at(i) == '\\' && (i+1) < data.length( ) && data.at(i+1) == 'u' )
+		if (data.at(i) == '\\' && (i+1) < data.length() && data.at(i+1) == 'u')
 		{
-			unsigned int udn = strtol( data.substr( i + 2, 4 ).c_str(), NULL, 16 );
-
-			if ( udn >= 128 && udn <= 2047 )
-			{ // U+0080 .. U+07FF
-				new_string += ( char )( 192 + ( udn / 64 ));
-				new_string += ( char )( 128 + ( udn % 64 ));
-			} 
-			else if ( udn >= 2048 && udn <= 65535 )
-			{ // U+0800 .. U+FFFF
-				new_string += ( char )( 224 + ( udn / 4096 ));
-				new_string += ( char )( 128 + ( ( udn / 64 ) % 64 ));
-				new_string += ( char )( 128 + ( udn % 64  ));
-			}
-			else if ( udn <= 127 )
-			{ // U+0000 .. U+007F (should not appear)
-				new_string += ( char )udn;
-			}
-
+			unsigned int udn = strtol(data.substr(i + 2, 4).c_str(), NULL, 16);
+			append_ordinal(udn, &new_string);
 			i += 5;
 			continue;
 		}
@@ -306,13 +332,13 @@ std::string utils::text::slashu_to_utf8( std::string data )
 	return new_string;
 }
 
-std::string utils::text::trim( std::string data )
+std::string utils::text::trim(std::string data)
 {
 	std::string spaces = " \t\r\n"; // TODO: include "nbsp"?
-	std::string::size_type begin = data.find_first_not_of( spaces );
-	std::string::size_type end = data.find_last_not_of( spaces ) + 1;
+	std::string::size_type begin = data.find_first_not_of(spaces);
+	std::string::size_type end = data.find_last_not_of(spaces) + 1;
 
-	return (begin != std::string::npos) ? data.substr( begin, end - begin ) : "";
+	return (begin != std::string::npos) ? data.substr(begin, end - begin) : "";
 }
 
 void utils::text::explode(std::string str, std::string separator, std::vector<std::string>* results)
@@ -331,36 +357,36 @@ void utils::text::explode(std::string str, std::string separator, std::vector<st
 	}
 }
 
-std::string utils::text::source_get_value( std::string* data, unsigned int argument_count, ... )
+std::string utils::text::source_get_value(std::string* data, unsigned int argument_count, ...)
 {
 	va_list arg;
 	std::string ret;
 	std::string::size_type start = 0, end = 0;
 	
-	va_start( arg, argument_count );
+	va_start(arg, argument_count);
 	
-	for ( unsigned int i = argument_count; i > 0; i-- )
+	for (unsigned int i = argument_count; i > 0; i--)
 	{
-		if ( i == 1 )
+		if (i == 1)
 		{
-			end = data->find( va_arg( arg, char* ), start );
-			if ( start == std::string::npos || end == std::string::npos )
+			end = data->find(va_arg(arg, char*), start);
+			if (start == std::string::npos || end == std::string::npos)
 				break;
-			ret = data->substr( start, end - start );
+			ret = data->substr(start, end - start);
 		} else {
-			std::string term = va_arg( arg, char* );
-			start = data->find( term, start );
-			if ( start == std::string::npos )
+			std::string term = va_arg(arg, char*);
+			start = data->find(term, start);
+			if (start == std::string::npos)
 				break;
 			start += term.length();
 		}
 	}
 	
-	va_end( arg );	
+	va_end(arg);	
 	return ret;
 }
 
-std::string utils::text::source_get_value2( std::string* data, const char *term, const char *endings)
+std::string utils::text::source_get_value2(std::string* data, const char *term, const char *endings)
 {
 	std::string::size_type start = 0, end = 0;
 	std::string ret;
@@ -371,17 +397,17 @@ std::string utils::text::source_get_value2( std::string* data, const char *term,
 
 		end = data->find_first_of(endings, start);
 		if (end != std::string::npos) {
-			ret = data->substr( start, end - start );
+			ret = data->substr(start, end - start);
 		}
 	}
 
 	return ret;
 }
 
-int utils::number::random( )
+int utils::number::random()
 {
-	srand( ::time( NULL ));
-	return rand( );
+	srand(::time(NULL));
+	return rand();
 }
 
 int utils::debug::log(std::string file_name, std::string text)
@@ -389,23 +415,23 @@ int utils::debug::log(std::string file_name, std::string text)
 	char szFile[MAX_PATH];
 	GetModuleFileNameA(g_hInstance, szFile, SIZEOF(szFile));
 	std::string path = szFile;
-	path = path.substr( 0, path.rfind( "\\" ));
-	path = path.substr( 0, path.rfind( "\\" ) + 1 );
+	path = path.substr(0, path.rfind("\\"));
+	path = path.substr(0, path.rfind("\\") + 1);
 	path = path + file_name.c_str() + ".txt";
 
 	SYSTEMTIME time;
-	GetLocalTime( &time );
+	GetLocalTime(&time);
 
-	std::ofstream out( path.c_str(), std::ios_base::out | std::ios_base::app | std::ios_base::ate );
+	std::ofstream out(path.c_str(), std::ios_base::out | std::ios_base::app | std::ios_base::ate);
 	out << "[" << (time.wHour < 10 ? "0" : "") << time.wHour << ":" << (time.wMinute < 10 ? "0" : "") << time.wMinute << ":" << (time.wSecond < 10 ? "0" : "") << time.wSecond << "] " << text << std::endl;
-	out.close( );
+	out.close();
 
 	return EXIT_SUCCESS;
 }
 
-void __fastcall utils::mem::detract(char** str )
+void __fastcall utils::mem::detract(char** str)
 {
-	utils::mem::detract( ( void** )str );
+	utils::mem::detract((void**)str);
 }
 
 void __fastcall utils::mem::detract(void** p)
-- 
cgit v1.2.3