diff options
Diffstat (limited to 'protocols/WebView/src/webview_cleanup.cpp')
-rw-r--r-- | protocols/WebView/src/webview_cleanup.cpp | 797 |
1 files changed, 797 insertions, 0 deletions
diff --git a/protocols/WebView/src/webview_cleanup.cpp b/protocols/WebView/src/webview_cleanup.cpp new file mode 100644 index 0000000000..af620f7351 --- /dev/null +++ b/protocols/WebView/src/webview_cleanup.cpp @@ -0,0 +1,797 @@ +/* +* A plugin for Miranda IM which displays web page text in a window Copyright +* (C) 2005 Vincent Joyce. +* +* Miranda IM: the free icq client for MS Windows Copyright (C) 2000-2 +* Richard Hughes, Roland Rabien & Tristan Van de Vreede +* +* This program is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License as published by the Free +* Software Foundation; either version 2 of the License, or (at your option) +* any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +* for more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place - Suite 330, Boston, MA 02111-1307, USA. +*/ + +#include "stdafx.h" +#include "webview.h" + +// /////////////////////// +// characters and symbols// +// /////////////////////// + +#define AMOUNT3 164 + +char*CharacterCodes[AMOUNT3] = +{ + """, + "&", + "<", + ">", + " ", + "¡", + "¢", + "£", + "¤", + "¥", + "¦", + "§", + "¨", + "©", + "ª", + "«", + "¬", + "­", + "®", + "¯", + "°", + "±", + "²", + "³", + "´", + "µ", + "¶", + "·", + "¸", + "¹", + "º", + "»", + "¼", + "½", + "¾", + "¿", + "À", + "Á", + "Â", + "Ã", + "Ä", + "Å", + "Æ", + "Ç", + "È", + "É", + "Ê", + "Ë", + "Ì", + "Í", + "Î", + "Ï", + "Ð", + "Ñ", + "Ò", + "Ó", + "Ô", + "Õ", + "Ö", + "×", + "Ø", + "Ù", + "Ú", + "Û", + "Ü", + "Ý", + "Þ", + "ß", + "à", + "á", + "â", + "ã", + "ä", + "å", + "æ", + "ç", + "è", + "é", + "ê", + "ë", + "ì", + "í", + "î", + "ï", + "ð", + "ñ", + "ò", + "ó", + "ô", + "õ", + "ö", + "÷", + "ø", + "ù", + "ú", + "û", + "ü", + "ý", + "þ", + "&yumil;", + "Œ", // greater that 255, extra latin characters + "œ", + "Š", + "š", + "Ÿ", + "ƒ", + "ˆ", + "˜", + "Œ", + "œ", + "Š", + "š", + "Ÿ", + "ƒ", + "ˆ", + "˜", + "–", // Misc other characters + "—", + "‘", + "’", + "‚", + "“", + "”", + "„", + "†", + "‡", + "•", + "…", + "‰", + "‹", + "›", + "€", + "ℑ", + "ℜ", + "™", + "–", + "—", + "‘", + "’", + "‚", + "“", + "”", + "„", + "†", + "‡", + "•", + "…", + "‰", + "‹", + "›", + "€", + "ℑ", + "ℜ", + "™", + " ", + " ", + " ", + " ", + " ", + " ", + "õ", // symbols without numeric code + "¿", + "¦", + "¯"}; + +char Characters[AMOUNT3] = +{ + '\"', + '&', + '<', + '>', + ' ', + '¡', + '¢', + '£', + '¤', + '¥', + '¦', + '§', + '¨', + '©', + 'ª', + '«', + '¬', + '', + '®', + '¯', + '°', + '±', + '²', + '³', + '´', + 'µ', + '¶', + '·', + '¸', + '¹', + 'º', + '»', + '¼', + '½', + '¾', + '¿', + 'À', + 'Á', + 'Â', + 'Ã', + 'Ä', + 'Å', + 'Æ', + 'Ç', + 'È', + 'É', + 'Ê', + 'Ë', + 'Ì', + 'Í', + 'Î', + 'Ï', + 'Ð', + 'Ñ', + 'Ò', + 'Ó', + 'Ô', + 'Õ', + 'Ö', + '×', + 'Ø', + 'Ù', + 'Ú', + 'Û', + 'Ü', + 'Ý', + 'Þ', + 'ß', + 'à', + 'á', + 'â', + 'ã', + 'ä', + 'å', + 'æ', + 'ç', + 'è', + 'é', + 'ê', + 'ë', + 'ì', + 'í', + 'î', + 'ï', + 'ð', + 'ñ', + 'ò', + 'ó', + 'ô', + 'õ', + 'ö', + '÷', + 'ø', + 'ù', + 'ú', + 'û', + 'ü', + 'ý', + 'þ', + 'ÿ', + 'Œ', // greater than 255 extra latin characters + 'œ', + 'Š', + 'š', + 'Ÿ', + 'ƒ', + 'ˆ', + '˜', + 'Œ', + 'œ', + 'Š', + 'š', + 'Ÿ', + 'ƒ', + 'ˆ', + '˜', + '–', + '—', // misc other characters + '‘', + '’', + '‚', + '“', + '”', + '„', + '†', + '‡', + '•', + '…', + '‰', + '‹', + '›', + '€', + 'I', + 'R', + '™', + '–', + '—', + '‘', + '’', + '‚', + '“', + '”', + '„', + '†', + '‡', + '•', + '…', + '‰', + '‹', + '›', + '€', + 'I', + 'R', + '™', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + 'õ', + '¿', + '¦', + '¯'}; + +/*****************************************************************************/ +void CodetoSymbol(char *truncated) +{ + int counter = 0; + int position = 0, recpos = 0; + static char *stringfrompos; + + for (int n = 0; n < AMOUNT3; n++) { + while (true) { // loop forever + Sleep(1); // avoid 100% CPU + + if ( strstr(truncated, CharacterCodes[n]) != nullptr) { // does character code exist? + stringfrompos = strstr(truncated, CharacterCodes[n]); + + position = stringfrompos - truncated; + counter = 0; + while (counter != mir_strlen(CharacterCodes[n])) { + truncated[position + counter] = ' '; + counter++; + } + + truncated[(position + counter) - 1] = Characters[n]; + strncpy(&truncated[position], &truncated[position + mir_strlen(CharacterCodes[n])] - 1, mir_strlen(&truncated[position]) - 1); + } // end does character code exist? + + if (recpos == position) + break; // break out of loop if doesn't find new character code + + recpos = position; + } // end loop forever + } // for +} + +/*****************************************************************************/ +void EraseBlock(char *truncated) +{ + int counter = 0; + int positionStart = 0, positionEnd = 0; + char *stringfrompos; + int BlockLength = 0; + + char* tempraw = (char*)malloc(MAXSIZE1); + if (truncated) + strncpy(tempraw, truncated, MAXSIZE1); + + // /////////////////////////// + + while (true) { + Sleep(1); // avoid 100% CPU + // /get start and end of block + + if (strstr(tempraw, "<!--") != nullptr) // does tag exist? + { + stringfrompos = strstr(tempraw, "<!--"); + positionStart = stringfrompos - tempraw; + } + + if (strstr(tempraw, "-->") != nullptr) // does tag exist? + { + stringfrompos = strstr(tempraw, "-->"); + positionEnd = stringfrompos - tempraw; + } + BlockLength = (positionEnd - positionStart) + 3; + + if ((strstr(tempraw, "<!--") == nullptr) || (strstr(tempraw, "-->") == nullptr)) + break; + + ///////////////////////////////////////// + + if (strstr(tempraw, "<!--") != nullptr) + for (counter = 0; counter < BlockLength; counter++) + tempraw[positionStart + counter] = ' '; + + if ((positionStart == 0) && (positionEnd == 0)) + break; + if (positionStart > positionEnd) + break; + + positionStart = 0; + positionEnd = 0; + } + + // /////////////////////////// + positionStart = 0; + positionEnd = 0; + + // 2// + while (true) { + Sleep(1); // avoid 100% CPU + // /get start and end of block + + if (((strstr(tempraw, "<script")) != nullptr) || (strstr(tempraw, "<SCRIPT") != nullptr)) // does + { + if (strstr(tempraw, "<script") != nullptr) + stringfrompos = strstr(tempraw, "<script"); + else + stringfrompos = strstr(tempraw, "<SCRIPT"); + + positionStart = stringfrompos - tempraw; + } + + if (((strstr(tempraw, "</script")) != nullptr) || (strstr(tempraw, "</SCRIPT") != nullptr)) // does + { + if (strstr(tempraw, "<script") != nullptr) + stringfrompos = strstr(tempraw, "</script"); + else + stringfrompos = strstr(tempraw, "</SCRIPT"); + + positionEnd = stringfrompos - tempraw; + } + BlockLength = (positionEnd - positionStart) + 9; + + if ((strstr(tempraw, "<script") != nullptr) && (strstr(tempraw, "</script") == nullptr)) + break; + + if ((strstr(tempraw, "<SCRIPT") != nullptr) && (strstr(tempraw, "</SCRIPT") == nullptr)) + break; + + /////////////////////////////////////// + + if (((strstr(tempraw, "<script")) != nullptr) || ((strstr(tempraw, "<SCRIPT")) != nullptr)) + for (counter = 0; counter < BlockLength; counter++) + tempraw[positionStart + counter] = ' '; + + if ((positionStart == 0) && (positionEnd == 0)) + break; + if (positionStart > positionEnd) + break; + + positionStart = 0; + positionEnd = 0; + } + + // //// + // 3// + while (true) + { + Sleep(1); // avoid 100% CPU + // /get start and end of block + + if (((strstr(tempraw, "<style")) != nullptr) || (strstr(tempraw, "<STYLE") != nullptr)) // does + { + if (strstr(tempraw, "<style") != nullptr) + stringfrompos = strstr(tempraw, "<style"); + else + stringfrompos = strstr(tempraw, "<STYLE"); + + positionStart = stringfrompos - tempraw; + } + + if (((strstr(tempraw, "</style")) != nullptr) || (strstr(tempraw, "</STYLE") != nullptr)) // does + { + if (strstr(tempraw, "<style") != nullptr) + stringfrompos = strstr(tempraw, "</style"); + else + stringfrompos = strstr(tempraw, "</STYLE"); + + positionEnd = stringfrompos - tempraw; + } + BlockLength = (positionEnd - positionStart) + 8; + + if ((strstr(tempraw, "<style") != nullptr) && (strstr(tempraw, "</style") == nullptr)) + break; + + if ((strstr(tempraw, "<STYLE") != nullptr) && (strstr(tempraw, "</STYLE") == nullptr)) + break; + + /////////////////////////////////////// + if (((strstr(tempraw, "<style")) != nullptr) || ((strstr(tempraw, "<STYLE")) != nullptr)) + for (counter = 0; counter < BlockLength; counter++) + tempraw[positionStart + counter] = ' '; + + if ((positionStart == 0) && (positionEnd == 0)) + break; + if (positionStart > positionEnd) + break; + + positionStart = 0; + positionEnd = 0; + } + + // 4// + + while (true) { + Sleep(1); // avoid 100% CPU + // /get start and end of block + if (strstr(tempraw, "{") != nullptr) // does tag exist? + { + stringfrompos = strstr(tempraw, "{"); + positionStart = stringfrompos - tempraw; + } + + if (strstr(tempraw, "}") != nullptr) // does tag exist? + { + stringfrompos = strstr(tempraw, "}"); + positionEnd = stringfrompos - tempraw; + } + BlockLength = (positionEnd - positionStart) + 1; + + if ((strstr(tempraw, "}") == nullptr) || (strstr(tempraw, "{") == nullptr)) + break; + + ///////////////////////////////////////// + if (strstr(tempraw, "{") != nullptr) + for (counter = 0; counter < BlockLength; counter++) + tempraw[positionStart + counter] = ' '; + + if ((positionStart == 0) && (positionEnd == 0)) + break; + if (positionStart > positionEnd) + break; + + positionStart = 0; + positionEnd = 0; + } + + // /////////////////////////// + positionStart = 0; + positionEnd = 0; + + strncpy(truncated, tempraw, mir_strlen(truncated)); + free(tempraw); +} + +/*****************************************************************************/ +void EraseSymbols(char *truncated) +{ + int counter = 0; + int position = 0, recpos = 0; + char *stringfrompos; + + char *tempraw = (char*)malloc(MAXSIZE1); + if (truncated) + strncpy(tempraw, truncated, MAXSIZE1); + + // ////// + while (true) { + Sleep(1); // avoid 100% CPU + + /**/ + counter = 0; + if ((strstr(tempraw, "&#")) != nullptr) { + stringfrompos = strstr(tempraw, "&#"); + position = stringfrompos - tempraw; + + while (true) { + tempraw[position + counter] = ' '; + counter++; + if (counter > 20) + break; + if (tempraw[position + counter] == ' ') + break; + } + if (tempraw[position + counter] == ';') + tempraw[position + counter] = ' '; + } + if (recpos == position) + break; + recpos = position; + } + + strncpy(truncated, tempraw, mir_strlen(truncated)); + free(tempraw); +} + +/*****************************************************************************/ +void NumSymbols(char *truncated) +{ + int counter = 0; + int position = 0, recpos = 0; + char*stringfrompos; + char symbol[20]; + int character; + + char *tempraw = (char*)malloc(MAXSIZE1); + if (truncated) + strncpy(tempraw, truncated, MAXSIZE1); + + while (true) { + Sleep(1); // avoid 100% CPU + + counter = 0; + + if ((strstr(tempraw, "&#")) != nullptr) { + stringfrompos = strstr(tempraw, "&#"); + position = stringfrompos - tempraw; + + while (true) { + if (counter > 1) + symbol[counter - 2] = tempraw[position + counter]; + + tempraw[position + counter] = ' '; + counter++; + if (counter > 20) + break; + + if ((tempraw[position + counter] == ';')) { + symbol[counter - 2] = '\0'; + character = atoi(symbol); + + if (character > 0 && character < 256) + memset(&tempraw[position], character, 1); + break; + } + } + + if (tempraw[position + counter] == ';') + tempraw[position + counter] = ' '; + } + if (recpos == position) + break; + recpos = position; + } + + strncpy(truncated, tempraw, mir_strlen(truncated)); + free(tempraw); +} + +/*****************************************************************************/ +void FastTagFilter(char *truncated) +{ + char *tempraw = (char*)malloc(MAXSIZE1); + if (truncated) + strncpy(tempraw, truncated, MAXSIZE1); + + for (int counter = 0; counter < mir_strlen(tempraw); counter++) { + if (tempraw[counter] == '<') { + while (tempraw[counter] != '>') { + if (counter >= mir_strlen(tempraw)) + break; + + tempraw[counter] = ' '; + counter++; + } + if (tempraw[counter] == '>') + tempraw[counter] = ' '; + } + } + + strncpy(truncated, tempraw, mir_strlen(truncated)); + free(tempraw); +} + +/*****************************************************************************/ +void RemoveInvis(char *truncated, int AmountWspcRem) +{ + int erase = 0; + int RemovalLevel = 0; + + char *tempraw = (char*)malloc(MAXSIZE1); + if (truncated) + strncpy(tempraw, truncated, MAXSIZE1); + + switch (AmountWspcRem) { + case 1: + RemovalLevel = 80; // small + break; + case 2: + RemovalLevel = 30; // medium + break; + case 3: + RemovalLevel = 10; // large + break; + } + + for (int counter = 0; counter < mir_strlen(tempraw); counter++) { + if (AmountWspcRem != 0 && AmountWspcRem != 4) { + if ((tempraw[counter] == '\n') || (tempraw[counter] == ' ') || (tempraw[counter] == '\r')) + erase = erase + 1; + else + erase = 0; + + if (erase > RemovalLevel) + tempraw[counter] = ' '; + } + + if (AmountWspcRem == 4) + if ((tempraw[counter] == '\n') || (tempraw[counter] == ' ') || (tempraw[counter] == '\r')) + tempraw[counter] = ' '; + } // end for + + strncpy(truncated, tempraw, mir_strlen(truncated)); + free(tempraw); +} + +/*****************************************************************************/ +void RemoveTabs(char *truncated) +{ + char *tempraw = (char*)malloc(MAXSIZE1); + if (truncated) + strncpy(tempraw, truncated, MAXSIZE1); + + for (int counter = 0; counter < mir_strlen(tempraw); counter++) + if (tempraw[counter] == '\t') + tempraw[counter] = ' '; + + strncpy(truncated, tempraw, mir_strlen(truncated)); + free(tempraw); +} + +/*****************************************************************************/ +void Removewhitespace(char *truncated) +{ + int counter2 = 0; + int pos1 = 0, pos2 = 0; + + for (int counter = 0; counter < mir_strlen(truncated); counter++) { + if (truncated[counter] == ' ' && truncated[counter + 1] == ' ') { + pos1 = counter + 1; + counter2 = counter; + + while (truncated[counter2] == ' ') + counter2++; + + pos2 = counter2; + strncpy(&truncated[pos1], &truncated[pos2], mir_strlen(&truncated[pos1]) - 1); + } // end if + } // end for +} + +/*****************************************************************************/ +void Filter(char *truncated) +{ + char tempraw[MAXSIZE1]; + strncpy(tempraw, truncated, _countof(tempraw)); + + for (int counter = 0; counter < mir_strlen(tempraw); counter++) + if ((tempraw[counter] == '\n') || (tempraw[counter] == '\r') || (tempraw[counter] == '\t')) + strncpy(&tempraw[counter], &tempraw[counter + 1], mir_strlen(&tempraw[counter]) - 1); + + strncpy(truncated, tempraw, mir_strlen(truncated)); +} |