diff options
Diffstat (limited to 'protocols/NewsAggregator/Src/Utils.cpp')
-rw-r--r-- | protocols/NewsAggregator/Src/Utils.cpp | 445 |
1 files changed, 445 insertions, 0 deletions
diff --git a/protocols/NewsAggregator/Src/Utils.cpp b/protocols/NewsAggregator/Src/Utils.cpp new file mode 100644 index 0000000000..80248ba48f --- /dev/null +++ b/protocols/NewsAggregator/Src/Utils.cpp @@ -0,0 +1,445 @@ +/* +Copyright (C) 2012 Mataes + +This is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public +License as published by the Free Software Foundation; either +version 2 of the License, or (at your option) any later version. + +This is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with this file; see the file license.txt. If +not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. +*/ + +#include "stdafx.h" + +HNETLIBUSER hNetlibUser = nullptr; +HNETLIBCONN hNetlibHttp; +bool UpdateListFlag = FALSE; + +bool IsMyContact(MCONTACT hContact) +{ + const char *szProto = GetContactProto(hContact); + return szProto != nullptr && mir_strcmp(MODULENAME, szProto) == 0; +} + +void NetlibInit() +{ + NETLIBUSER nlu = {}; + nlu.flags = NUF_OUTGOING | NUF_INCOMING | NUF_HTTPCONNS | NUF_UNICODE; + nlu.szDescriptiveName.w = TranslateT("NewsAggregator HTTP connections"); + nlu.szSettingsModule = MODULENAME; + hNetlibUser = Netlib_RegisterUser(&nlu); +} + +void NetlibUnInit() +{ + Netlib_CloseHandle(hNetlibUser); + hNetlibUser = nullptr; +} + +void GetNewsData(wchar_t *tszUrl, char **szData, MCONTACT hContact, CFeedEditor *pEditDlg) +{ + Netlib_LogfW(hNetlibUser, L"Getting feed data %s.", tszUrl); + NETLIBHTTPREQUEST nlhr = { 0 }; + + // initialize the netlib request + nlhr.cbSize = sizeof(nlhr); + nlhr.requestType = REQUEST_GET; + nlhr.flags = NLHRF_DUMPASTEXT | NLHRF_HTTP11 | NLHRF_REDIRECT; + if (wcsstr(tszUrl, L"https://") != nullptr) + nlhr.flags |= NLHRF_SSL; + char *szUrl = mir_u2a(tszUrl); + nlhr.szUrl = szUrl; + nlhr.nlc = hNetlibHttp; + + // change the header so the plugin is pretended to be IE 6 + WinXP + NETLIBHTTPHEADER headers[5]; + nlhr.headersCount = 4; + nlhr.headers = headers; + nlhr.headers[0].szName = "User-Agent"; + nlhr.headers[0].szValue = NETLIB_USER_AGENT; + nlhr.headers[1].szName = "Cache-Control"; + nlhr.headers[1].szValue = "no-cache"; + nlhr.headers[2].szName = "Pragma"; + nlhr.headers[2].szValue = "no-cache"; + nlhr.headers[3].szName = "Connection"; + nlhr.headers[3].szValue = "close"; + char auth[256]; + if (g_plugin.getByte(hContact, "UseAuth", 0) || (pEditDlg && pEditDlg->m_useauth.IsChecked()) /*IsDlgButtonChecked(hwndDlg, IDC_USEAUTH)*/) { + nlhr.headersCount++; + nlhr.headers[4].szName = "Authorization"; + + CreateAuthString(auth, hContact, pEditDlg); + nlhr.headers[4].szValue = auth; + } + + // download the page + NETLIBHTTPREQUEST *nlhrReply = Netlib_HttpTransaction(hNetlibUser, &nlhr); + if (nlhrReply) { + // if the recieved code is 200 OK + if (nlhrReply->resultCode == 200 && nlhrReply->dataLength > 0) { + Netlib_LogfW(hNetlibUser, L"Code 200: Succeeded getting feed data %s.", tszUrl); + // allocate memory and save the retrieved data + *szData = (char *)mir_alloc((size_t)(nlhrReply->dataLength + 2)); + memcpy(*szData, nlhrReply->pData, (size_t)nlhrReply->dataLength); + (*szData)[nlhrReply->dataLength] = 0; + } + else if (nlhrReply->resultCode == 401) { + Netlib_LogfW(hNetlibUser, L"Code 401: feed %s needs auth data.", tszUrl); + + if (CAuthRequest(pEditDlg, hContact).DoModal() == IDOK) + GetNewsData(tszUrl, szData, hContact, pEditDlg); + } + else Netlib_LogfW(hNetlibUser, L"Code %d: Failed getting feed data %s.", nlhrReply->resultCode, tszUrl); + + Netlib_FreeHttpRequest(nlhrReply); + } + else Netlib_LogfW(hNetlibUser, L"Failed getting feed data %s, no response.", tszUrl); + + mir_free(szUrl); +} + +time_t DateToUnixTime(const char *stamp, bool FeedType) +{ + struct tm timestamp; + char date[9]; + int i, y; + time_t t; + + if (stamp == nullptr) + return 0; + + char *p = NEWSTR_ALLOCA(stamp); + + if (FeedType) { + // skip '-' chars + int si = 0, sj = 0; + while (true) { + if (p[si] == '-') + si++; + else if (!(p[sj++] = p[si++])) + break; + } + } + else { + char monthstr[4], timezonesign[2]; + int day, month = 0, year, hour, min, sec, timezoneh, timezonem; + if (strchr(p, ',')) { + strtok(p, ","); + p = strtok(nullptr, ","); + sscanf(p + 1, "%d %3s %d %d:%d:%d %1s%02d%02d", &day, &monthstr, &year, &hour, &min, &sec, &timezonesign, &timezoneh, &timezonem); + if (!mir_strcmpi(monthstr, "Jan")) + month = 1; + if (!mir_strcmpi(monthstr, "Feb")) + month = 2; + if (!mir_strcmpi(monthstr, "Mar")) + month = 3; + if (!mir_strcmpi(monthstr, "Apr")) + month = 4; + if (!mir_strcmpi(monthstr, "May")) + month = 5; + if (!mir_strcmpi(monthstr, "Jun")) + month = 6; + if (!mir_strcmpi(monthstr, "Jul")) + month = 7; + if (!mir_strcmpi(monthstr, "Aug")) + month = 8; + if (!mir_strcmpi(monthstr, "Sep")) + month = 9; + if (!mir_strcmpi(monthstr, "Oct")) + month = 10; + if (!mir_strcmpi(monthstr, "Nov")) + month = 11; + if (!mir_strcmpi(monthstr, "Dec")) + month = 12; + if (year < 2000) + year += 2000; + if (!mir_strcmp(timezonesign, "+")) + mir_snprintf(p, 4 + 2 + 2 + 1 + 2 + 1 + 2 + 1 + 2 + 1, "%04d%02d%02dT%02d:%02d:%02d", year, month, day, hour - timezoneh, min - timezonem, sec); + else if (!mir_strcmp(timezonesign, "-")) + mir_snprintf(p, 4 + 2 + 2 + 1 + 2 + 1 + 2 + 1 + 2 + 1, "%04d%02d%02dT%02d:%02d:%02d", year, month, day, hour + timezoneh, min + timezonem, sec); + else + mir_snprintf(p, 4 + 2 + 2 + 1 + 2 + 1 + 2 + 1 + 2 + 1, "%04d%02d%02dT%02d:%02d:%02d", year, month, day, hour, min, sec); + } + else if (strchr(p, 'T')) { + sscanf(p, "%d-%d-%dT%d:%d:%d", &year, &month, &day, &hour, &min, &sec); + mir_snprintf(p, 4 + 2 + 2 + 1 + 2 + 1 + 2 + 1 + 2 + 1, "%04d%02d%02dT%02d:%02d:%02d", year, month, day, hour, min, sec); + } + else { + sscanf(p, "%d-%d-%d %d:%d:%d %1s%02d%02d", &year, &month, &day, &hour, &min, &sec, &timezonesign, &timezoneh, &timezonem); + if (!mir_strcmp(timezonesign, "+")) + mir_snprintf(p, 4 + 2 + 2 + 1 + 2 + 1 + 2 + 1 + 2 + 1, "%04d%02d%02dT%02d:%02d:%02d", year, month, day, hour - timezoneh, min - timezonem, sec); + else if (!mir_strcmp(timezonesign, "-")) + mir_snprintf(p, 4 + 2 + 2 + 1 + 2 + 1 + 2 + 1 + 2 + 1, "%04d%02d%02dT%02d:%02d:%02d", year, month, day, hour + timezoneh, min + timezonem, sec); + else + mir_snprintf(p, 4 + 2 + 2 + 1 + 2 + 1 + 2 + 1 + 2 + 1, "%04d%02d%02dT%02d:%02d:%02d", year, month, day, hour, min, sec); + } + } + // Get the date part + for (i = 0; *p != '\0' && i < 8 && isdigit(*p); p++, i++) + date[i] = *p; + + // Parse year + if (i == 6) { + // 2-digit year ( 1970-2069 ) + y = (date[0] - '0') * 10 + (date[1] - '0'); + if (y < 70) + y += 100; + } + else if (i == 8) { + // 4-digit year + y = (date[0] - '0') * 1000 + (date[1] - '0') * 100 + (date[2] - '0') * 10 + date[3] - '0'; + y -= 1900; + } + else return 0; + + timestamp.tm_year = y; + // Parse month + timestamp.tm_mon = (date[i - 4] - '0') * 10 + date[i - 3] - '0' - 1; + // Parse date + timestamp.tm_mday = (date[i - 2] - '0') * 10 + date[i - 1] - '0'; + + // Skip any date/time delimiter + for (; *p != '\0' && !isdigit(*p); p++); + + // Parse time + if (sscanf(p, "%d:%d:%d", ×tamp.tm_hour, ×tamp.tm_min, ×tamp.tm_sec) != 3) + return 0; + + timestamp.tm_isdst = 0; // DST is already present in _timezone below + t = mktime(×tamp); + + _tzset(); + t -= (time_t)_timezone; + return (t >= 0) ? t : 0; +} + +bool DownloadFile(LPCTSTR tszURL, LPCTSTR tszLocal) +{ + NETLIBHTTPREQUEST nlhr = { 0 }; + nlhr.cbSize = sizeof(nlhr); + nlhr.requestType = REQUEST_GET; + nlhr.flags = NLHRF_DUMPASTEXT | NLHRF_HTTP11; + char *szUrl = mir_u2a(tszURL); + nlhr.szUrl = szUrl; + NETLIBHTTPHEADER headers[4]; + nlhr.headersCount = 4; + nlhr.headers = headers; + nlhr.headers[0].szName = "User-Agent"; + nlhr.headers[0].szValue = NETLIB_USER_AGENT; + nlhr.headers[1].szName = "Connection"; + nlhr.headers[1].szValue = "close"; + nlhr.headers[2].szName = "Cache-Control"; + nlhr.headers[2].szValue = "no-cache"; + nlhr.headers[3].szName = "Pragma"; + nlhr.headers[3].szValue = "no-cache"; + + bool ret = false; + NETLIBHTTPREQUEST *pReply = Netlib_HttpTransaction(hNetlibUser, &nlhr); + if (pReply) { + if ((200 == pReply->resultCode) && (pReply->dataLength > 0)) { + char *date = nullptr, *size = nullptr; + for (int i = 0; i < pReply->headersCount; i++) { + if (!mir_strcmpi(pReply->headers[i].szName, "Last-Modified")) { + date = pReply->headers[i].szValue; + continue; + } + else if (!mir_strcmpi(pReply->headers[i].szName, "Content-Length")) { + size = pReply->headers[i].szValue; + continue; + } + } + if (date != nullptr && size != nullptr) { + wchar_t *tsize = mir_a2u(size); + struct _stat buf; + + int fh = _wopen(tszLocal, _O_RDONLY); + if (fh != -1) { + _fstat(fh, &buf); + time_t modtime = DateToUnixTime(date, 0); + time_t filemodtime = mktime(localtime(&buf.st_atime)); + if (modtime > filemodtime && buf.st_size != _wtoi(tsize)) { + DWORD dwBytes; + HANDLE hFile = CreateFile(tszLocal, GENERIC_READ | GENERIC_WRITE, NULL, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr); + WriteFile(hFile, pReply->pData, (DWORD)pReply->dataLength, &dwBytes, nullptr); + ret = true; + if (hFile) + CloseHandle(hFile); + } + _close(fh); + } + else { + DWORD dwBytes; + HANDLE hFile = CreateFile(tszLocal, GENERIC_READ | GENERIC_WRITE, NULL, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr); + WriteFile(hFile, pReply->pData, (DWORD)pReply->dataLength, &dwBytes, nullptr); + ret = true; + if (hFile) + CloseHandle(hFile); + } + mir_free(tsize); + } + else { + DWORD dwBytes; + HANDLE hFile = CreateFile(tszLocal, GENERIC_READ | GENERIC_WRITE, NULL, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr); + WriteFile(hFile, pReply->pData, (DWORD)pReply->dataLength, &dwBytes, nullptr); + ret = true; + if (hFile) + CloseHandle(hFile); + } + } + Netlib_FreeHttpRequest(pReply); + } + + mir_free(szUrl); + + return ret; +} + +typedef HRESULT(MarkupCallback)(IHTMLDocument3 *, BSTR &message); + +HRESULT TestMarkupServices(BSTR bstrHtml, MarkupCallback *pCallback, BSTR &message) +{ + IHTMLDocument3 *pHtmlDocRoot = nullptr; + + // Create the root document -- a "workspace" for parsing. + HRESULT hr = CoCreateInstance(CLSID_HTMLDocument, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pHtmlDocRoot)); + if (SUCCEEDED(hr) && pHtmlDocRoot) { + IPersistStreamInit *pPersistStreamInit = nullptr; + + hr = pHtmlDocRoot->QueryInterface(IID_PPV_ARGS(&pPersistStreamInit)); + if (SUCCEEDED(hr)) { + // Initialize the root document to a default state -- ready for parsing. + pPersistStreamInit->InitNew(); + + IMarkupServices *pMarkupServices = nullptr; + hr = pHtmlDocRoot->QueryInterface(IID_PPV_ARGS(&pMarkupServices)); + if (SUCCEEDED(hr)) { + IMarkupPointer *pMarkupBegin = nullptr; + IMarkupPointer *pMarkupEnd = nullptr; + + // These markup pointers indicate the insertion point. + hr = pMarkupServices->CreateMarkupPointer(&pMarkupBegin); + if (SUCCEEDED(hr)) + hr = pMarkupServices->CreateMarkupPointer(&pMarkupEnd); + + if (SUCCEEDED(hr) && pMarkupBegin && pMarkupEnd) { + IMarkupContainer *pMarkupContainer = nullptr; + + // Parse the string -- the markup container contains the parsed HTML. + // Markup pointers are updated to point to begining and end of new container. + hr = pMarkupServices->ParseString(bstrHtml, 0, &pMarkupContainer, pMarkupBegin, pMarkupEnd); + if (SUCCEEDED(hr) && pMarkupContainer) { + IHTMLDocument3 *pHtmlDoc = nullptr; + + // Retrieve the document interface to the markup container. + hr = pMarkupContainer->QueryInterface(IID_PPV_ARGS(&pHtmlDoc)); + if (SUCCEEDED(hr) && pHtmlDoc) { + // Invoke the user-defined action for this new fragment. + hr = pCallback(pHtmlDoc, message); + + // Clean up. + pHtmlDoc->Release(); + } + pMarkupContainer->Release(); + } + pMarkupEnd->Release(); + } + if (pMarkupBegin) + pMarkupBegin->Release(); + pMarkupServices->Release(); + } + pPersistStreamInit->Release(); + } + pHtmlDocRoot->Release(); + } + return hr; +} + +HRESULT TestDocumentText(IHTMLDocument3 *pHtmlDoc, BSTR &message) +{ + IHTMLDocument2 *pDoc = nullptr; + IHTMLElement *pElem = nullptr; + BSTR bstrId = SysAllocString(L"test"); + + HRESULT hr = pHtmlDoc->QueryInterface(IID_PPV_ARGS(&pDoc)); + if (SUCCEEDED(hr) && pDoc) { + hr = pDoc->get_body(&pElem); + if (SUCCEEDED(hr) && pElem) { + BSTR bstrText = nullptr; + pElem->get_innerText(&bstrText); + message = SysAllocString(bstrText); + SysFreeString(bstrText); + pElem->Release(); + } + + pDoc->Release(); + } + + SysFreeString(bstrId); + return hr; +} + +LPCTSTR ClearText(CMStringW &result, const wchar_t *message) +{ + BSTR bstrHtml = SysAllocString(message), bstrRes = SysAllocString(L""); + HRESULT hr = TestMarkupServices(bstrHtml, &TestDocumentText, bstrRes); + if (SUCCEEDED(hr)) + result = bstrRes; + else + result = message; + SysFreeString(bstrHtml); + SysFreeString(bstrRes); + + result.Replace(L"£", L"£"); //pound + result.Replace(L"²", L"²"); //sup2 + result.Replace(L"ä", L"ä"); //auml + result.Replace(L"é", L"é"); //latin small letter e with acute + result.Replace(L"ë", L"ë"); //euml + result.Replace(L"ö", L"ö"); //ouml + result.Replace(L"ž", L"ž"); //Latin Small Letter Z With Caron + result.Replace(L"ʙ", L"ʙ"); //latin letter small capital b + result.Replace(L"̆", L"˘"); //Combining Breve + result.Replace(L"́", L"´"); //Combining Acute Accent острое ударение + result.Replace(L"ο", L"ό"); // greek small letter omicron with tonos + result.Replace(L"ѣ", L"ѣ"); //Cyrillic Small Letter Yat + result.Replace(L"Ҝ", L"Ҝ"); //cyrillic capital letter ka with vertical stroke + result.Replace(L"​", L""); + result.Replace(L"‎", L""); //lrm + result.Replace(L"‏", L""); //rlm + result.Replace(L"‑", L"‑"); //Non-Breaking Hyphen + result.Replace(L"‣", L"‣"); //Triangular Bullet + result.Replace(L"−", L"−"); //minus + result.Replace(L"☺", L"☺"); //White Smiling Face + result.Replace(L"", L""); + + result.Trim(); + + return result; +} + +MCONTACT GetContactByNick(const wchar_t *nick) +{ + for (auto &hContact : Contacts(MODULENAME)) { + ptrW contactNick(g_plugin.getWStringA(hContact, "Nick")); + if (!mir_wstrcmpi(contactNick, nick)) + return hContact; + } + return 0; +} + +MCONTACT GetContactByURL(const wchar_t *url) +{ + for (auto &hContact : Contacts(MODULENAME)) { + ptrW contactURL(g_plugin.getWStringA(hContact, "URL")); + if (!mir_wstrcmpi(contactURL, url)) + return hContact; + } + return 0; +} |