From d6edd541b10926aee0e9442d6577bf8213e0f757 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20P=C3=B6sel?= Date: Mon, 5 Nov 2012 16:02:13 +0000 Subject: Facebook: Replace #&... html codes to chars (fixes #32) git-svn-id: http://svn.miranda-ng.org/main/trunk@2208 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c --- protocols/FacebookRM/src/utils.cpp | 286 ++++++++++++++++++++----------------- protocols/FacebookRM/src/utils.h | 21 +-- 2 files changed, 167 insertions(+), 140 deletions(-) diff --git a/protocols/FacebookRM/src/utils.cpp b/protocols/FacebookRM/src/utils.cpp index 8e33b26e01..999cb69c56 100644 --- a/protocols/FacebookRM/src/utils.cpp +++ b/protocols/FacebookRM/src/utils.cpp @@ -24,10 +24,9 @@ along with this program. If not, see . std::string utils::url::encode(const std::string &s) { - char *encoded = reinterpret_cast(CallService( MS_NETLIB_URLENCODE, - 0,reinterpret_cast(s.c_str()))); + char *encoded = reinterpret_cast(CallService(MS_NETLIB_URLENCODE, 0, reinterpret_cast(s.c_str()))); std::string ret = encoded; - HeapFree(GetProcessHeap(),0,encoded); + HeapFree(GetProcessHeap(), 0, encoded); return ret; } @@ -35,30 +34,30 @@ std::string utils::url::encode(const std::string &s) std::string utils::url::decode(std::string data) { // TODO: Better and universal method? - utils::text::replace_all( &data, "%2F", "/" ); - utils::text::replace_all( &data, "%3F", "?" ); - utils::text::replace_all( &data, "%3D", "=" ); - utils::text::replace_all( &data, "%26", "&" ); + utils::text::replace_all(&data, "%2F", "/"); + utils::text::replace_all(&data, "%3F", "?"); + utils::text::replace_all(&data, "%3D", "="); + utils::text::replace_all(&data, "%26", "&"); return data; } -std::string utils::time::unix_timestamp( ) +std::string utils::time::unix_timestamp() { - time_t in = ::time( NULL ); - return utils::conversion::to_string( ( void* )&in, UTILS_CONV_TIME_T ); + time_t in = ::time(NULL); + return utils::conversion::to_string((void*)&in, UTILS_CONV_TIME_T); } -std::string utils::time::mili_timestamp( ) +std::string utils::time::mili_timestamp() { SYSTEMTIME st; std::string timestamp = utils::time::unix_timestamp(); GetSystemTime(&st); - timestamp.append(utils::conversion::to_string( ( void* )&st.wMilliseconds, UTILS_CONV_UNSIGNED_NUMBER )); + timestamp.append(utils::conversion::to_string((void*)&st.wMilliseconds, UTILS_CONV_UNSIGNED_NUMBER)); return timestamp; } -DWORD utils::time::fix_timestamp( double mili_timestamp ) +DWORD utils::time::fix_timestamp(double mili_timestamp) { // If it is really mili_timestamp if (mili_timestamp > 100000000000) { @@ -67,7 +66,7 @@ DWORD utils::time::fix_timestamp( double mili_timestamp ) return (DWORD) mili_timestamp; } -DWORD utils::conversion::to_timestamp( std::string data ) +DWORD utils::conversion::to_timestamp(std::string data) { DWORD timestamp = NULL; if (!utils::conversion::from_string(timestamp, data, std::dec)) { @@ -76,57 +75,57 @@ DWORD utils::conversion::to_timestamp( std::string data ) return timestamp; } -std::string utils::conversion::to_string( void* data, WORD type ) +std::string utils::conversion::to_string(void* data, WORD type) { std::stringstream out; - switch ( type ) + switch (type) { case UTILS_CONV_BOOLEAN: out << (data ? "true" : "false"); case UTILS_CONV_TIME_T: - out << (*( time_t* )data); + out << (*(time_t*)data); break; case UTILS_CONV_SIGNED_NUMBER: - out << (*( signed int* )data); + out << (*(signed int*)data); break; case UTILS_CONV_UNSIGNED_NUMBER: - out << (*( unsigned int* )data); + out << (*(unsigned int*)data); break; } - return out.str( ); + return out.str(); } -void utils::text::replace_first( std::string* data, std::string from, std::string to ) +void utils::text::replace_first(std::string* data, std::string from, std::string to) { std::string::size_type position = data->find(from); - if ( position != std::string::npos ) + if (position != std::string::npos) { - data->replace( position, from.size(), to ); + data->replace(position, from.size(), to); } } -void utils::text::replace_all( std::string* data, std::string from, std::string to ) +void utils::text::replace_all(std::string* data, std::string from, std::string to) { std::string::size_type position = 0; - while ( ( position = data->find( from, position )) != std::string::npos ) + while ((position = data->find(from, position)) != std::string::npos) { - data->replace( position, from.size(), to ); + data->replace(position, from.size(), to); position++; } } -unsigned int utils::text::count_all( std::string* data, std::string term ) +unsigned int utils::text::count_all(std::string* data, std::string term) { unsigned int count = 0; std::string::size_type position = 0; - while ( ( position = data->find( term, position )) != std::string::npos ) + while ((position = data->find(term, position)) != std::string::npos) { count++; position++; @@ -135,20 +134,37 @@ unsigned int utils::text::count_all( std::string* data, std::string term ) return count; } -std::string utils::text::special_expressions_decode( std::string data ) +void utils::text::append_ordinal(unsigned int value, std::string* data) { - utils::text::replace_all( &data, "&", "&" ); - utils::text::replace_all( &data, """, "\"" ); - utils::text::replace_all( &data, "'", "'" ); - utils::text::replace_all( &data, "@", "@" ); - utils::text::replace_all( &data, "<", "<" ); - utils::text::replace_all( &data, ">", ">" ); + if (value >= 128 && value <= 2047) + { // U+0080 .. U+07FF + *data += (char)(192 + (value / 64)); + *data += (char)(128 + (value % 64)); + } + else if (value >= 2048 && value <= 65535) + { // U+0800 .. U+FFFF + *data += (char)(224 + (value / 4096)); + *data += (char)(128 + ((value / 64) % 64)); + *data += (char)(128 + (value % 64)); + } + else if (value <= 127) + { // U+0000 .. U+007F + *data += (char)value; + } +} - utils::text::replace_all( &data, "♥", "\xE2\x99\xA5" ); // direct byte replacement -// utils::text::replace_all( &data, "♥", "\\u2665" ); // indirect slashu replacement +std::string utils::text::special_expressions_decode(std::string data) +{ + utils::text::replace_all(&data, "&", "&"); + utils::text::replace_all(&data, """, "\""); + utils::text::replace_all(&data, "<", "<"); + utils::text::replace_all(&data, ">", ">"); + + utils::text::replace_all(&data, "♥", "\xE2\x99\xA5"); // direct byte replacement +// utils::text::replace_all(&data, "♥", "\\u2665"); // indirect slashu replacement - utils::text::replace_all( &data, "\\/", "/" ); - utils::text::replace_all( &data, "\\\\", "\\" ); + utils::text::replace_all(&data, "\\/", "/"); + utils::text::replace_all(&data, "\\\\", "\\"); // TODO: Add more to comply general usage // http://www.utexas.edu/learn/html/spchar.html @@ -158,25 +174,51 @@ std::string utils::text::special_expressions_decode( std::string data ) // http://www.w3schools.com/tags/ref_entities.asp // http://www.natural-innovations.com/wa/doc-charset.html // http://webdesign.about.com/library/bl_htmlcodes.htm - - return data; + + std::string new_string = ""; + for (std::string::size_type i = 0; i < data.length(); i++) + { + if (data.at(i) == '&' && (i+1) < data.length() && data.at(i+1) == '#') + { + unsigned int udn; + std::string::size_type comma = data.find(";", i); + if (comma != std::string::npos) { + bool hexa = false; + if ((i+2) < data.length() && data.at(i+2) == 'x') { + hexa = true; + i += 3; + } else { + i += 2; + } + udn = strtol(data.substr(i, comma-i).c_str(), NULL, hexa ? 16 : 10); + i = comma; + } + + utils::text::append_ordinal(udn, &new_string); + continue; + } + + new_string += data.at(i); + } + + return new_string; } -std::string utils::text::edit_html( std::string data ) +std::string utils::text::edit_html(std::string data) { std::string::size_type end = 0; std::string::size_type start = 0; std::string new_string = ""; - while ( end != std::string::npos ) + while (end != std::string::npos) { - end = data.find( "", start ); - if ( end != std::string::npos ) + end = data.find("translate_story_link\\\">", start); + if (end != std::string::npos) { - new_string += data.substr( start, end - start ); - start = data.find( "<\\/div", end ); + new_string += data.substr(start, end - start); + start = data.find("<\\/div", end); } else { - new_string += data.substr( start, data.length() - start ); + new_string += data.substr(start, data.length() - start); } } // Append newline after attachement title - start = new_string.find( "class=\\\"uiAttachmentTitle", 0 ); - if ( start != std::string::npos ) + start = new_string.find("class=\\\"uiAttachmentTitle", 0); + if (start != std::string::npos) { - data = new_string.substr( 0, start ); - data = utils::text::trim( data ); + data = new_string.substr(0, start); + data = utils::text::trim(data); - start = new_string.find( ">", start ); - if ( start != std::string::npos ) + start = new_string.find(">", start); + if (start != std::string::npos) new_string.insert(start+1, "\n\n"); - start = new_string.find( "<\\/div>", start ); - if ( start != std::string::npos ) + start = new_string.find("<\\/div>", start); + if (start != std::string::npos) new_string.insert(start, "\n"); } // Append newline between attachement link and description - start = new_string.find( "uiAttachmentDesc", 0 ); - if ( start != std::string::npos ) + start = new_string.find("uiAttachmentDesc", 0); + if (start != std::string::npos) { - start = new_string.find( ">", start ); - if ( start != std::string::npos ) + start = new_string.find(">", start); + if (start != std::string::npos) new_string.insert(start+1, "\n"); - start = new_string.find( "<\\/div>", start ); - if ( start != std::string::npos ) + start = new_string.find("<\\/div>", start); + if (start != std::string::npos) new_string.insert(start, "\n"); } - utils::text::replace_all( &new_string, "
", "\n" ); - utils::text::replace_all( &new_string, "\n\n\n", "\n\n" ); - //utils::text::replace_all( &new_string, "\\t", "" ); - //utils::text::replace_all( &new_string, "\\n", "" ); + utils::text::replace_all(&new_string, "
", "\n"); + utils::text::replace_all(&new_string, "\n\n\n", "\n\n"); + //utils::text::replace_all(&new_string, "\\t", ""); + //utils::text::replace_all(&new_string, "\\n", ""); return new_string; } -std::string utils::text::remove_html( std::string data ) +std::string utils::text::remove_html(std::string data) { std::string new_string = ""; - for ( std::string::size_type i = 0; i < data.length( ); i++ ) + for (std::string::size_type i = 0; i < data.length(); i++) { - if ( data.at(i) == '<' && data.at(i+1) != ' ' ) + if (data.at(i) == '<' && data.at(i+1) != ' ') { - i = data.find( ">", i ); + i = data.find(">", i); if (i == std::string::npos) break; @@ -270,32 +312,16 @@ std::string utils::text::remove_html( std::string data ) return new_string; } -std::string utils::text::slashu_to_utf8( std::string data ) +std::string utils::text::slashu_to_utf8(std::string data) { std::string new_string = ""; - for ( std::string::size_type i = 0; i < data.length( ); i++ ) + for (std::string::size_type i = 0; i < data.length(); i++) { - if ( data.at(i) == '\\' && (i+1) < data.length( ) && data.at(i+1) == 'u' ) + if (data.at(i) == '\\' && (i+1) < data.length() && data.at(i+1) == 'u') { - unsigned int udn = strtol( data.substr( i + 2, 4 ).c_str(), NULL, 16 ); - - if ( udn >= 128 && udn <= 2047 ) - { // U+0080 .. U+07FF - new_string += ( char )( 192 + ( udn / 64 )); - new_string += ( char )( 128 + ( udn % 64 )); - } - else if ( udn >= 2048 && udn <= 65535 ) - { // U+0800 .. U+FFFF - new_string += ( char )( 224 + ( udn / 4096 )); - new_string += ( char )( 128 + ( ( udn / 64 ) % 64 )); - new_string += ( char )( 128 + ( udn % 64 )); - } - else if ( udn <= 127 ) - { // U+0000 .. U+007F (should not appear) - new_string += ( char )udn; - } - + unsigned int udn = strtol(data.substr(i + 2, 4).c_str(), NULL, 16); + append_ordinal(udn, &new_string); i += 5; continue; } @@ -306,13 +332,13 @@ std::string utils::text::slashu_to_utf8( std::string data ) return new_string; } -std::string utils::text::trim( std::string data ) +std::string utils::text::trim(std::string data) { std::string spaces = " \t\r\n"; // TODO: include "nbsp"? - std::string::size_type begin = data.find_first_not_of( spaces ); - std::string::size_type end = data.find_last_not_of( spaces ) + 1; + std::string::size_type begin = data.find_first_not_of(spaces); + std::string::size_type end = data.find_last_not_of(spaces) + 1; - return (begin != std::string::npos) ? data.substr( begin, end - begin ) : ""; + return (begin != std::string::npos) ? data.substr(begin, end - begin) : ""; } void utils::text::explode(std::string str, std::string separator, std::vector* results) @@ -331,36 +357,36 @@ void utils::text::explode(std::string str, std::string separator, std::vector 0; i-- ) + for (unsigned int i = argument_count; i > 0; i--) { - if ( i == 1 ) + if (i == 1) { - end = data->find( va_arg( arg, char* ), start ); - if ( start == std::string::npos || end == std::string::npos ) + end = data->find(va_arg(arg, char*), start); + if (start == std::string::npos || end == std::string::npos) break; - ret = data->substr( start, end - start ); + ret = data->substr(start, end - start); } else { - std::string term = va_arg( arg, char* ); - start = data->find( term, start ); - if ( start == std::string::npos ) + std::string term = va_arg(arg, char*); + start = data->find(term, start); + if (start == std::string::npos) break; start += term.length(); } } - va_end( arg ); + va_end(arg); return ret; } -std::string utils::text::source_get_value2( std::string* data, const char *term, const char *endings) +std::string utils::text::source_get_value2(std::string* data, const char *term, const char *endings) { std::string::size_type start = 0, end = 0; std::string ret; @@ -371,17 +397,17 @@ std::string utils::text::source_get_value2( std::string* data, const char *term, end = data->find_first_of(endings, start); if (end != std::string::npos) { - ret = data->substr( start, end - start ); + ret = data->substr(start, end - start); } } return ret; } -int utils::number::random( ) +int utils::number::random() { - srand( ::time( NULL )); - return rand( ); + srand(::time(NULL)); + return rand(); } int utils::debug::log(std::string file_name, std::string text) @@ -389,23 +415,23 @@ int utils::debug::log(std::string file_name, std::string text) char szFile[MAX_PATH]; GetModuleFileNameA(g_hInstance, szFile, SIZEOF(szFile)); std::string path = szFile; - path = path.substr( 0, path.rfind( "\\" )); - path = path.substr( 0, path.rfind( "\\" ) + 1 ); + path = path.substr(0, path.rfind("\\")); + path = path.substr(0, path.rfind("\\") + 1); path = path + file_name.c_str() + ".txt"; SYSTEMTIME time; - GetLocalTime( &time ); + GetLocalTime(&time); - std::ofstream out( path.c_str(), std::ios_base::out | std::ios_base::app | std::ios_base::ate ); + std::ofstream out(path.c_str(), std::ios_base::out | std::ios_base::app | std::ios_base::ate); out << "[" << (time.wHour < 10 ? "0" : "") << time.wHour << ":" << (time.wMinute < 10 ? "0" : "") << time.wMinute << ":" << (time.wSecond < 10 ? "0" : "") << time.wSecond << "] " << text << std::endl; - out.close( ); + out.close(); return EXIT_SUCCESS; } -void __fastcall utils::mem::detract(char** str ) +void __fastcall utils::mem::detract(char** str) { - utils::mem::detract( ( void** )str ); + utils::mem::detract((void**)str); } void __fastcall utils::mem::detract(void** p) diff --git a/protocols/FacebookRM/src/utils.h b/protocols/FacebookRM/src/utils.h index 75712d6a2d..b9cb11a369 100644 --- a/protocols/FacebookRM/src/utils.h +++ b/protocols/FacebookRM/src/utils.h @@ -82,17 +82,18 @@ namespace utils namespace text { - void replace_first( std::string* data, std::string from, std::string to ); - void replace_all( std::string* data, std::string from, std::string to ); - unsigned int count_all( std::string* data, std::string term ); - std::string special_expressions_decode( std::string data ); - std::string edit_html( std::string data ); - std::string remove_html( std::string data ); - std::string slashu_to_utf8( std::string data ); - std::string trim( std::string data ); - std::string source_get_value( std::string* data, unsigned int argument_count, ... ); - std::string source_get_value2( std::string* data, const char *term, const char *endings); + void replace_first(std::string* data, std::string from, std::string to); + void replace_all(std::string* data, std::string from, std::string to); + unsigned int count_all(std::string* data, std::string term); + std::string special_expressions_decode( std::string data); + std::string edit_html(std::string data); + std::string remove_html(std::string data); + std::string slashu_to_utf8(std::string data); + std::string trim(std::string data); + std::string source_get_value(std::string* data, unsigned int argument_count, ...); + std::string source_get_value2(std::string* data, const char *term, const char *endings); void explode(std::string str, std::string separator, std::vector* results); + void append_ordinal(unsigned int value, std::string* data); }; namespace conversion -- cgit v1.2.3