From 0ece30dc7c0e34b4c5911969b8fa99c33c6d023c Mon Sep 17 00:00:00 2001 From: George Hazan Date: Wed, 30 Nov 2022 17:48:47 +0300 Subject: Telegram: update for TDLIB --- protocols/Telegram/tdlib/td/tdutils/test/gzip.cpp | 188 ++++++++++++++++++---- 1 file changed, 161 insertions(+), 27 deletions(-) (limited to 'protocols/Telegram/tdlib/td/tdutils/test/gzip.cpp') diff --git a/protocols/Telegram/tdlib/td/tdutils/test/gzip.cpp b/protocols/Telegram/tdlib/td/tdutils/test/gzip.cpp index e4bd81eb0d..32d75474e8 100644 --- a/protocols/Telegram/tdlib/td/tdutils/test/gzip.cpp +++ b/protocols/Telegram/tdlib/td/tdutils/test/gzip.cpp @@ -1,49 +1,59 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // +#include "td/utils/algorithm.h" #include "td/utils/buffer.h" #include "td/utils/ByteFlow.h" +#include "td/utils/common.h" #include "td/utils/Gzip.h" #include "td/utils/GzipByteFlow.h" #include "td/utils/logging.h" +#include "td/utils/port/thread_local.h" +#include "td/utils/Slice.h" #include "td/utils/Status.h" #include "td/utils/tests.h" +#include "td/utils/Time.h" -static void encode_decode(td::string s) { +static void encode_decode(const td::string &s) { auto r = td::gzencode(s, 2); ASSERT_TRUE(!r.empty()); - if (r.empty()) { - return; - } - auto new_s = td::gzdecode(r.as_slice()); - ASSERT_TRUE(!new_s.empty()); - if (new_s.empty()) { - return; - } - ASSERT_EQ(s, new_s.as_slice().str()); + ASSERT_EQ(s, td::gzdecode(r.as_slice())); } TEST(Gzip, gzencode_gzdecode) { - auto str = td::rand_string(0, 127, 1000); - encode_decode(str); - str = td::rand_string('a', 'z', 1000000); - encode_decode(str); - str = td::string(1000000, 'a'); - encode_decode(str); + encode_decode(td::rand_string(0, 255, 1000)); + encode_decode(td::rand_string('a', 'z', 1000000)); + encode_decode(td::string(1000000, 'a')); +} + +static void test_gzencode(const td::string &s) { + auto begin_time = td::Time::now(); + auto r = td::gzencode(s, td::max(2, static_cast(100 / s.size()))); + ASSERT_TRUE(!r.empty()); + LOG(INFO) << "Encoded string of size " << s.size() << " in " << (td::Time::now() - begin_time) + << " with compression ratio " << static_cast(r.size()) / static_cast(s.size()); +} + +TEST(Gzip, gzencode) { + for (size_t len = 1; len <= 10000000; len *= 10) { + test_gzencode(td::rand_string('a', 'a', len)); + test_gzencode(td::rand_string('a', 'z', len)); + test_gzencode(td::rand_string(0, 255, len)); + } } TEST(Gzip, flow) { auto str = td::rand_string('a', 'z', 1000000); auto parts = td::rand_split(str); - auto input_writer = td::ChainBufferWriter::create_empty(); + td::ChainBufferWriter input_writer; auto input = input_writer.extract_reader(); td::ByteFlowSource source(&input); - td::GzipByteFlow gzip_flow(td::Gzip::Encode); - gzip_flow = td::GzipByteFlow(td::Gzip::Encode); + td::GzipByteFlow gzip_flow(td::Gzip::Mode::Encode); + gzip_flow = td::GzipByteFlow(td::Gzip::Mode::Encode); td::ByteFlowSink sink; source >> gzip_flow >> sink; @@ -63,14 +73,15 @@ TEST(Gzip, flow) { } TEST(Gzip, flow_error) { auto str = td::rand_string('a', 'z', 1000000); - auto zip = td::gzencode(str).as_slice().str(); + auto zip = td::gzencode(str, 0.9).as_slice().str(); + ASSERT_TRUE(!zip.empty()); zip.resize(zip.size() - 1); auto parts = td::rand_split(zip); auto input_writer = td::ChainBufferWriter(); auto input = input_writer.extract_reader(); td::ByteFlowSource source(&input); - td::GzipByteFlow gzip_flow(td::Gzip::Decode); + td::GzipByteFlow gzip_flow(td::Gzip::Mode::Decode); td::ByteFlowSink sink; source >> gzip_flow >> sink; @@ -89,13 +100,13 @@ TEST(Gzip, flow_error) { TEST(Gzip, encode_decode_flow) { auto str = td::rand_string('a', 'z', 1000000); auto parts = td::rand_split(str); - auto input_writer = td::ChainBufferWriter::create_empty(); + td::ChainBufferWriter input_writer; auto input = input_writer.extract_reader(); td::ByteFlowSource source(&input); - td::GzipByteFlow gzip_encode_flow(td::Gzip::Encode); - td::GzipByteFlow gzip_decode_flow(td::Gzip::Decode); - td::GzipByteFlow gzip_encode_flow2(td::Gzip::Encode); - td::GzipByteFlow gzip_decode_flow2(td::Gzip::Decode); + td::GzipByteFlow gzip_encode_flow(td::Gzip::Mode::Encode); + td::GzipByteFlow gzip_decode_flow(td::Gzip::Mode::Decode); + td::GzipByteFlow gzip_encode_flow2(td::Gzip::Mode::Encode); + td::GzipByteFlow gzip_decode_flow2(td::Gzip::Mode::Decode); td::ByteFlowSink sink; source >> gzip_encode_flow >> gzip_decode_flow >> gzip_encode_flow2 >> gzip_decode_flow2 >> sink; @@ -111,3 +122,126 @@ TEST(Gzip, encode_decode_flow) { ASSERT_TRUE(sink.status().is_ok()); ASSERT_EQ(str, sink.result()->move_as_buffer_slice().as_slice().str()); } + +TEST(Gzip, encode_decode_flow_big) { + td::clear_thread_locals(); + auto start_mem = td::BufferAllocator::get_buffer_mem(); + { + auto str = td::string(200000, 'a'); + td::ChainBufferWriter input_writer; + auto input = input_writer.extract_reader(); + td::ByteFlowSource source(&input); + td::GzipByteFlow gzip_encode_flow(td::Gzip::Mode::Encode); + td::GzipByteFlow gzip_decode_flow(td::Gzip::Mode::Decode); + td::GzipByteFlow gzip_encode_flow2(td::Gzip::Mode::Encode); + td::GzipByteFlow gzip_decode_flow2(td::Gzip::Mode::Decode); + td::ByteFlowSink sink; + source >> gzip_encode_flow >> gzip_decode_flow >> gzip_encode_flow2 >> gzip_decode_flow2 >> sink; + + ASSERT_TRUE(!sink.is_ready()); + size_t n = 200; + size_t left_size = n * str.size(); + auto validate = [&](td::Slice chunk) { + CHECK(chunk.size() <= left_size); + left_size -= chunk.size(); + ASSERT_TRUE(td::all_of(chunk, [](auto c) { return c == 'a'; })); + }; + + for (size_t i = 0; i < n; i++) { + input_writer.append(str); + source.wakeup(); + auto extra_mem = td::BufferAllocator::get_buffer_mem() - start_mem; + // limit means nothing. just check that we do not use 200Mb or so + CHECK(extra_mem < (10 << 20)); + + auto size = sink.get_output()->size(); + validate(sink.get_output()->cut_head(size).move_as_buffer_slice().as_slice()); + } + ASSERT_TRUE(!sink.is_ready()); + source.close_input(td::Status::OK()); + ASSERT_TRUE(sink.is_ready()); + LOG_IF(ERROR, sink.status().is_error()) << sink.status(); + ASSERT_TRUE(sink.status().is_ok()); + validate(sink.result()->move_as_buffer_slice().as_slice()); + ASSERT_EQ(0u, left_size); + } + td::clear_thread_locals(); + ASSERT_EQ(start_mem, td::BufferAllocator::get_buffer_mem()); +} + +TEST(Gzip, decode_encode_flow_bomb) { + td::string gzip_bomb_str; + size_t N = 200; + { + td::ChainBufferWriter input_writer; + auto input = input_writer.extract_reader(); + td::GzipByteFlow gzip_flow(td::Gzip::Mode::Encode); + td::ByteFlowSource source(&input); + td::ByteFlowSink sink; + source >> gzip_flow >> sink; + + td::string s(1 << 16, 'a'); + for (size_t i = 0; i < N; i++) { + input_writer.append(s); + source.wakeup(); + } + source.close_input(td::Status::OK()); + ASSERT_TRUE(sink.is_ready()); + LOG_IF(ERROR, sink.status().is_error()) << sink.status(); + ASSERT_TRUE(sink.status().is_ok()); + gzip_bomb_str = sink.result()->move_as_buffer_slice().as_slice().str(); + } + + td::clear_thread_locals(); + auto start_mem = td::BufferAllocator::get_buffer_mem(); + { + td::ChainBufferWriter input_writer; + auto input = input_writer.extract_reader(); + td::ByteFlowSource source(&input); + td::GzipByteFlow::Options decode_options; + decode_options.write_watermark.low = 2 << 20; + decode_options.write_watermark.high = 4 << 20; + td::GzipByteFlow::Options encode_options; + encode_options.read_watermark.low = 2 << 20; + encode_options.read_watermark.high = 4 << 20; + td::GzipByteFlow gzip_decode_flow(td::Gzip::Mode::Decode); + gzip_decode_flow.set_options(decode_options); + td::GzipByteFlow gzip_encode_flow(td::Gzip::Mode::Encode); + gzip_encode_flow.set_options(encode_options); + td::GzipByteFlow gzip_decode_flow2(td::Gzip::Mode::Decode); + gzip_decode_flow2.set_options(decode_options); + td::GzipByteFlow gzip_encode_flow2(td::Gzip::Mode::Encode); + gzip_encode_flow2.set_options(encode_options); + td::GzipByteFlow gzip_decode_flow3(td::Gzip::Mode::Decode); + gzip_decode_flow3.set_options(decode_options); + td::ByteFlowSink sink; + source >> gzip_decode_flow >> gzip_encode_flow >> gzip_decode_flow2 >> gzip_encode_flow2 >> gzip_decode_flow3 >> + sink; + + ASSERT_TRUE(!sink.is_ready()); + size_t left_size = N * (1 << 16); + auto validate = [&](td::Slice chunk) { + CHECK(chunk.size() <= left_size); + left_size -= chunk.size(); + ASSERT_TRUE(td::all_of(chunk, [](auto c) { return c == 'a'; })); + }; + + input_writer.append(gzip_bomb_str); + source.close_input(td::Status::OK()); + + do { + gzip_decode_flow3.wakeup(); + gzip_decode_flow2.wakeup(); + gzip_decode_flow.wakeup(); + source.wakeup(); + auto extra_mem = td::BufferAllocator::get_buffer_mem() - start_mem; + // limit means nothing. just check that we do not use 15Mb or so + CHECK(extra_mem < (5 << 20)); + auto size = sink.get_output()->size(); + validate(sink.get_output()->cut_head(size).move_as_buffer_slice().as_slice()); + } while (!sink.is_ready()); + ASSERT_EQ(0u, left_size); + } + td::clear_thread_locals(); + ASSERT_EQ(start_mem, td::BufferAllocator::get_buffer_mem()); +} -- cgit v1.2.3