diff options
Diffstat (limited to 'protocols/Telegram/tdlib/td/tddb/td/db/binlog')
13 files changed, 607 insertions, 249 deletions
diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/Binlog.cpp b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/Binlog.cpp index 5d76028dba..5b14eed69d 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/Binlog.cpp +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/Binlog.cpp @@ -1,5 +1,5 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -11,14 +11,16 @@ #include "td/utils/buffer.h" #include "td/utils/format.h" -#include "td/utils/logging.h" #include "td/utils/misc.h" #include "td/utils/port/Clocks.h" -#include "td/utils/port/Fd.h" +#include "td/utils/port/FileFd.h" #include "td/utils/port/path.h" +#include "td/utils/port/PollFlags.h" +#include "td/utils/port/sleep.h" #include "td/utils/port/Stat.h" #include "td/utils/Random.h" #include "td/utils/ScopeGuard.h" +#include "td/utils/SliceBuilder.h" #include "td/utils/Status.h" #include "td/utils/Time.h" #include "td/utils/tl_helpers.h" @@ -53,7 +55,7 @@ struct AesCtrEncryptionEvent { BufferSlice iv_; BufferSlice key_hash_; - BufferSlice generate_key(const DbKey &db_key) { + BufferSlice generate_key(const DbKey &db_key) const { CHECK(!db_key.is_empty()); BufferSlice key(key_size()); size_t iteration_count = kdf_iteration_count(); @@ -63,7 +65,8 @@ struct AesCtrEncryptionEvent { pbkdf2_sha256(db_key.data(), key_salt_.as_slice(), narrow_cast<int>(iteration_count), key.as_slice()); return key; } - BufferSlice generate_hash(Slice key) { + + static BufferSlice generate_hash(Slice key) { BufferSlice hash(hash_size()); hmac_sha256(key, "cucumbers everywhere", hash.as_slice()); return hash; @@ -91,18 +94,23 @@ struct AesCtrEncryptionEvent { class BinlogReader { public: - BinlogReader() = default; explicit BinlogReader(ChainBufferReader *input) : input_(input) { } - void set_input(ChainBufferReader *input) { + void set_input(ChainBufferReader *input, bool is_encrypted, int64 expected_size) { input_ = input; + is_encrypted_ = is_encrypted; + expected_size_ = expected_size; + } + + ChainBufferReader *input() { + return input_; } - int64 offset() { + int64 offset() const { return offset_; } Result<size_t> read_next(BinlogEvent *event) { - if (state_ == ReadLength) { + if (state_ == State::ReadLength) { if (input_->size() < 4) { return 4; } @@ -112,35 +120,52 @@ class BinlogReader { it.advance(4, MutableSlice(buf, 4)); size_ = static_cast<size_t>(TlParser(Slice(buf, 4)).fetch_int()); - if (size_ > MAX_EVENT_SIZE) { + if (size_ > BinlogEvent::MAX_SIZE) { return Status::Error(PSLICE() << "Too big event " << tag("size", size_)); } - if (size_ < MIN_EVENT_SIZE) { + if (size_ < BinlogEvent::MIN_SIZE) { return Status::Error(PSLICE() << "Too small event " << tag("size", size_)); } - state_ = ReadEvent; + if (size_ % 4 != 0) { + return Status::Error(-2, PSLICE() << "Event of size " << size_ << " at offset " << offset() << " out of " + << expected_size_ << ' ' << tag("is_encrypted", is_encrypted_) + << format::as_hex_dump<4>(Slice(input_->prepare_read().truncate(28)))); + } + state_ = State::ReadEvent; } if (input_->size() < size_) { return size_; } + event->debug_info_ = BinlogDebugInfo{__FILE__, __LINE__}; TRY_STATUS(event->init(input_->cut_head(size_).move_as_buffer_slice())); offset_ += size_; event->offset_ = offset_; - state_ = ReadLength; + state_ = State::ReadLength; return 0; } private: ChainBufferReader *input_; - enum { ReadLength, ReadEvent } state_ = ReadLength; + enum class State { ReadLength, ReadEvent }; + State state_ = State::ReadLength; size_t size_{0}; int64 offset_{0}; + int64 expected_size_{0}; + bool is_encrypted_{false}; }; + +static int64 file_size(CSlice path) { + auto r_stat = stat(path); + if (r_stat.is_error()) { + return 0; + } + return r_stat.ok().size_; +} } // namespace detail -bool Binlog::IGNORE_ERASE_HACK = false; +int32 VERBOSITY_NAME(binlog) = VERBOSITY_NAME(DEBUG) + 8; Binlog::Binlog() = default; @@ -148,9 +173,9 @@ Binlog::~Binlog() { close().ignore(); } -Result<FileFd> Binlog::open_binlog(CSlice path, int32 flags) { +Result<FileFd> Binlog::open_binlog(const string &path, int32 flags) { TRY_RESULT(fd, FileFd::open(path, flags)); - TRY_STATUS(fd.lock(FileFd::LockFlags::Write, 100)); + TRY_STATUS(fd.lock(FileFd::LockFlags::Write, path, 100)); return std::move(fd); } @@ -161,9 +186,9 @@ Status Binlog::init(string path, const Callback &callback, DbKey db_key, DbKey o db_key_ = std::move(db_key); old_db_key_ = std::move(old_db_key); - processor_ = std::make_unique<detail::BinlogEventsProcessor>(); + processor_ = make_unique<detail::BinlogEventsProcessor>(); // Turn off BinlogEventsBuffer - // events_buffer_ = std::make_unique<detail::BinlogEventsBuffer>(); + // events_buffer_ = make_unique<detail::BinlogEventsBuffer>(); // try to restore binlog from regenerated version if (stat(path).is_error()) { @@ -200,6 +225,10 @@ Status Binlog::init(string path, const Callback &callback, DbKey db_key, DbKey o } void Binlog::add_event(BinlogEvent &&event) { + if (event.size_ % 4 != 0) { + LOG(FATAL) << "Trying to add event with bad size " << event.public_to_string(); + } + if (!events_buffer_) { do_add_event(std::move(event)); } else { @@ -215,7 +244,7 @@ void Binlog::add_event(BinlogEvent &&event) { auto need_reindex = [&](int64 min_size, int rate) { return fd_size > min_size && fd_size / rate > processor_->total_raw_events_size(); }; - if (need_reindex(100000, 5) || need_reindex(500000, 2)) { + if (need_reindex(50000, 5) || need_reindex(100000, 4) || need_reindex(300000, 3) || need_reindex(500000, 2)) { LOG(INFO) << tag("fd_size", format::as_size(fd_size)) << tag("total events size", format::as_size(processor_->total_raw_events_size())); do_reindex(); @@ -254,20 +283,25 @@ Status Binlog::close(bool need_sync) { if (fd_.empty()) { return Status::OK(); } - SCOPE_EXIT { - path_ = ""; - info_.is_opened = false; - fd_.close(); - need_sync_ = false; - }; if (need_sync) { sync(); } else { flush(); } + + fd_.lock(FileFd::LockFlags::Unlock, path_, 1).ensure(); + fd_.close(); + path_.clear(); + info_.is_opened = false; + need_sync_ = false; return Status::OK(); } +void Binlog::close(Promise<> promise) { + TRY_STATUS_PROMISE(promise, close()); + promise.set_value({}); +} + void Binlog::change_key(DbKey new_db_key) { db_key_ = std::move(new_db_key); aes_ctr_key_salt_ = BufferSlice(); @@ -280,18 +314,24 @@ Status Binlog::close_and_destroy() { destroy(path).ignore(); return close_status; } + Status Binlog::destroy(Slice path) { + unlink(PSLICE() << path << ".new").ignore(); // delete regenerated version first to avoid it becoming main version unlink(PSLICE() << path).ignore(); - unlink(PSLICE() << path << ".new").ignore(); return Status::OK(); } void Binlog::do_event(BinlogEvent &&event) { - fd_events_++; - fd_size_ += event.raw_event_.size(); + auto event_size = event.raw_event_.size(); if (state_ == State::Run || state_ == State::Reindex) { - VLOG(binlog) << "Write binlog event: " << format::cond(state_ == State::Reindex, "[reindex] ") << event; + auto validate_status = event.validate(); + if (validate_status.is_error()) { + LOG(FATAL) << "Failed to validate binlog event " << validate_status << " " + << format::as_hex_dump<4>(Slice(event.raw_event_.as_slice().truncate(28))); + } + VLOG(binlog) << "Write binlog event: " << format::cond(state_ == State::Reindex, "[reindex] ") + << event.public_to_string(); switch (encryption_type_) { case EncryptionType::None: { buffer_writer_.append(event.raw_event_.clone()); @@ -311,16 +351,18 @@ void Binlog::do_event(BinlogEvent &&event) { BufferSlice key; if (aes_ctr_key_salt_.as_slice() == encryption_event.key_salt_.as_slice()) { - key = BufferSlice(Slice(aes_ctr_key_.raw, sizeof(aes_ctr_key_.raw))); + key = BufferSlice(as_slice(aes_ctr_key_)); } else if (!db_key_.is_empty()) { key = encryption_event.generate_key(db_key_); } - if (encryption_event.generate_hash(key.as_slice()).as_slice() != encryption_event.key_hash_.as_slice()) { + if (detail::AesCtrEncryptionEvent::generate_hash(key.as_slice()).as_slice() != + encryption_event.key_hash_.as_slice()) { CHECK(state_ == State::Load); if (!old_db_key_.is_empty()) { key = encryption_event.generate_key(old_db_key_); - if (encryption_event.generate_hash(key.as_slice()).as_slice() != encryption_event.key_hash_.as_slice()) { + if (detail::AesCtrEncryptionEvent::generate_hash(key.as_slice()).as_slice() != + encryption_event.key_hash_.as_slice()) { info_.wrong_password = true; } } else { @@ -344,13 +386,31 @@ void Binlog::do_event(BinlogEvent &&event) { update_write_encryption(); //LOG(INFO) << format::cond(state_ == State::Run, "Run", "Reindex") << ": init encryption"; } - return; } } if (state_ != State::Reindex) { - processor_->add_event(std::move(event)); + auto status = processor_->add_event(std::move(event)); + if (status.is_error()) { + auto old_size = detail::file_size(path_); + auto data = debug_get_binlog_data(fd_size_, old_size); + if (state_ == State::Load) { + fd_.seek(fd_size_).ensure(); + fd_.truncate_to_current_position(fd_size_).ensure(); + + if (data.empty()) { + return; + } + } + + LOG(FATAL) << "Truncate binlog \"" << path_ << "\" from size " << old_size << " to size " << fd_size_ + << " in state " << static_cast<int32>(state_) << " due to error: " << status << " after reading " + << data; + } } + + fd_events_++; + fd_size_ += event_size; } void Binlog::sync() { @@ -396,7 +456,9 @@ void Binlog::update_read_encryption() { CHECK(binlog_reader_ptr_); switch (encryption_type_) { case EncryptionType::None: { - binlog_reader_ptr_->set_input(&buffer_reader_); + auto r_file_size = fd_.get_size(); + r_file_size.ensure(); + binlog_reader_ptr_->set_input(&buffer_reader_, false, r_file_size.ok()); byte_flow_flag_ = false; break; } @@ -407,7 +469,9 @@ void Binlog::update_read_encryption() { byte_flow_sink_ = ByteFlowSink(); byte_flow_source_ >> aes_xcode_byte_flow_ >> byte_flow_sink_; byte_flow_flag_ = true; - binlog_reader_ptr_->set_input(byte_flow_sink_.get_output()); + auto r_file_size = fd_.get_size(); + r_file_size.ensure(); + binlog_reader_ptr_->set_input(byte_flow_sink_.get_output(), true, r_file_size.ok()); break; } } @@ -439,67 +503,70 @@ Status Binlog::load_binlog(const Callback &callback, const Callback &debug_callb buffer_writer_ = ChainBufferWriter(); buffer_reader_ = buffer_writer_.extract_reader(); fd_.set_input_writer(&buffer_writer_); - detail::BinlogReader reader; + detail::BinlogReader reader{nullptr}; binlog_reader_ptr_ = &reader; update_read_encryption(); - bool ready_flag = false; - fd_.update_flags(Fd::Flag::Read); + fd_.get_poll_info().add_flags(PollFlags::Read()); info_.wrong_password = false; while (true) { BinlogEvent event; auto r_need_size = reader.read_next(&event); if (r_need_size.is_error()) { + if (r_need_size.error().code() == -2) { + auto old_size = detail::file_size(path_); + auto offset = reader.offset(); + auto data = debug_get_binlog_data(offset, old_size); + fd_.seek(offset).ensure(); + fd_.truncate_to_current_position(offset).ensure(); + if (data.empty()) { + break; + } + LOG(FATAL) << "Truncate binlog \"" << path_ << "\" from size " << old_size << " to size " << offset + << " due to error: " << r_need_size.error() << " after reading " << data; + } LOG(ERROR) << r_need_size.error(); break; } auto need_size = r_need_size.move_as_ok(); - // LOG(ERROR) << "need size = " << need_size; + // LOG(ERROR) << "Need size = " << need_size; if (need_size == 0) { - if (IGNORE_ERASE_HACK && event.type_ == BinlogEvent::ServiceTypes::Empty && - (event.flags_ & BinlogEvent::Flags::Rewrite) != 0) { - // skip erase - } else { - if (debug_callback) { - debug_callback(event); - } - do_add_event(std::move(event)); - if (info_.wrong_password) { - return Status::OK(); - } + if (debug_callback) { + debug_callback(event); } - ready_flag = false; - } else { - // TODO(now): fix bug - if (ready_flag) { - break; + do_add_event(std::move(event)); + if (info_.wrong_password) { + return Status::OK(); } + } else { TRY_STATUS(fd_.flush_read(max(need_size, static_cast<size_t>(4096)))); buffer_reader_.sync_with_writer(); if (byte_flow_flag_) { byte_flow_source_.wakeup(); } - ready_flag = true; + if (reader.input()->size() < need_size) { + break; + } } } auto offset = processor_->offset(); processor_->for_each([&](BinlogEvent &event) { - VLOG(binlog) << "Replay binlog event: " << event; + VLOG(binlog) << "Replay binlog event: " << event.public_to_string(); if (callback) { callback(event); } }); - auto fd_size = fd_.get_size(); + TRY_RESULT(fd_size, fd_.get_size()); if (offset != fd_size) { LOG(ERROR) << "Truncate " << tag("path", path_) << tag("old_size", fd_size) << tag("new_size", offset); fd_.seek(offset).ensure(); fd_.truncate_to_current_position(offset).ensure(); db_key_used_ = false; // force reindex } - CHECK(IGNORE_ERASE_HACK || fd_size_ == offset) << fd_size << " " << fd_size_ << " " << offset; + LOG_CHECK(fd_size_ == offset) << fd_size << " " << fd_size_ << " " << offset; binlog_reader_ptr_ = nullptr; state_ = State::Run; @@ -515,19 +582,11 @@ Status Binlog::load_binlog(const Callback &callback, const Callback &debug_callb return Status::OK(); } -static int64 file_size(CSlice path) { - auto r_stat = stat(path); - if (r_stat.is_error()) { - return 0; - } - return r_stat.ok().size_; -} - void Binlog::update_encryption(Slice key, Slice iv) { - MutableSlice(aes_ctr_key_.raw, sizeof(aes_ctr_key_.raw)).copy_from(key); + as_slice(aes_ctr_key_).copy_from(key); UInt128 aes_ctr_iv; - MutableSlice(aes_ctr_iv.raw, sizeof(aes_ctr_iv.raw)).copy_from(iv); - aes_ctr_state_.init(aes_ctr_key_, aes_ctr_iv); + as_slice(aes_ctr_iv).copy_from(iv); + aes_ctr_state_.init(as_slice(aes_ctr_key_), as_slice(aes_ctr_iv)); } void Binlog::reset_encryption() { @@ -550,15 +609,16 @@ void Binlog::reset_encryption() { BufferSlice key; if (aes_ctr_key_salt_.as_slice() == event.key_salt_.as_slice()) { - key = BufferSlice(Slice(aes_ctr_key_.raw, sizeof(aes_ctr_key_.raw))); + key = BufferSlice(as_slice(aes_ctr_key_)); } else { key = event.generate_key(db_key_); } - event.key_hash_ = event.generate_hash(key.as_slice()); + event.key_hash_ = EncryptionEvent::generate_hash(key.as_slice()); do_event(BinlogEvent( - BinlogEvent::create_raw(0, BinlogEvent::ServiceTypes::AesCtrEncryption, 0, create_default_storer(event)))); + BinlogEvent::create_raw(0, BinlogEvent::ServiceTypes::AesCtrEncryption, 0, create_default_storer(event)), + BinlogDebugInfo{__FILE__, __LINE__})); } void Binlog::do_reindex() { @@ -571,7 +631,7 @@ void Binlog::do_reindex() { }; auto start_time = Clocks::monotonic(); - auto start_size = file_size(path_); + auto start_size = detail::file_size(path_); auto start_events = fd_events_; string new_path = path_ + ".new"; @@ -581,7 +641,7 @@ void Binlog::do_reindex() { LOG(ERROR) << "Can't open new binlog for regenerate: " << r_opened_file.error(); return; } - fd_.close(); + auto old_fd = std::move(fd_); // can't close fd_ now, because it will release file lock fd_ = BufferedFdBase<FileFd>(r_opened_file.move_as_ok()); buffer_writer_ = ChainBufferWriter(); @@ -594,27 +654,49 @@ void Binlog::do_reindex() { fd_events_ = 0; reset_encryption(); processor_->for_each([&](BinlogEvent &event) { + event.realloc(); do_event(std::move(event)); // NB: no move is actually happens }); - need_sync_ = true; // must sync creation of the file + need_sync_ = start_size != 0; // must sync creation of the file if it is non-empty sync(); // finish_reindex auto status = unlink(path_); LOG_IF(FATAL, status.is_error()) << "Failed to unlink old binlog: " << status; + old_fd.close(); // now we can close old file and release the system lock status = rename(new_path, path_); + FileFd::remove_local_lock(new_path); // now we can release local lock for temporary file LOG_IF(FATAL, status.is_error()) << "Failed to rename binlog: " << status; auto finish_time = Clocks::monotonic(); auto finish_size = fd_size_; auto finish_events = fd_events_; - CHECK(fd_size_ == file_size(path_)); + for (int left_tries = 10; left_tries > 0; left_tries--) { + auto r_stat = stat(path_); + if (r_stat.is_error()) { + if (left_tries != 1) { + usleep_for(200000 / left_tries); + continue; + } + LOG(FATAL) << "Failed to rename binlog of size " << fd_size_ << " to " << path_ << ": " << r_stat.error() + << ". Temp file size is " << detail::file_size(new_path) << ", new size " << detail::file_size(path_); + } + LOG_CHECK(fd_size_ == r_stat.ok().size_) << fd_size_ << ' ' << r_stat.ok().size_ << ' ' + << detail::file_size(new_path) << ' ' << fd_events_ << ' ' << path_; + break; + } - // TODO: print warning only if time or ratio is suspicious - double ratio = static_cast<double>(start_size) / static_cast<double>(finish_size + 1); - LOG(INFO) << "regenerate index " << tag("name", path_) << tag("time", format::as_time(finish_time - start_time)) - << tag("before_size", format::as_size(start_size)) << tag("after_size", format::as_size(finish_size)) - << tag("ratio", ratio) << tag("before_events", start_events) << tag("after_events", finish_events); + auto ratio = static_cast<double>(start_size) / static_cast<double>(finish_size + 1); + + [&](Slice msg) { + if (start_size > (10 << 20) || finish_time - start_time > 1) { + LOG(WARNING) << "Slow " << msg; + } else { + LOG(INFO) << msg; + } + }(PSLICE() << "Regenerate index " << tag("name", path_) << tag("time", format::as_time(finish_time - start_time)) + << tag("before_size", format::as_size(start_size)) << tag("after_size", format::as_size(finish_size)) + << tag("ratio", ratio) << tag("before_events", start_events) << tag("after_events", finish_events)); buffer_writer_ = ChainBufferWriter(); buffer_reader_ = buffer_writer_.extract_reader(); @@ -626,4 +708,58 @@ void Binlog::do_reindex() { update_write_encryption(); } +string Binlog::debug_get_binlog_data(int64 begin_offset, int64 end_offset) { + if (begin_offset > end_offset) { + return "Begin offset is bigger than end_offset"; + } + if (begin_offset == end_offset) { + return string(); + } + + static int64 MAX_DATA_LENGTH = 512; + if (end_offset - begin_offset > MAX_DATA_LENGTH) { + end_offset = begin_offset + MAX_DATA_LENGTH; + } + + auto r_fd = FileFd::open(path_, FileFd::Flags::Read); + if (r_fd.is_error()) { + return PSTRING() << "Failed to open binlog: " << r_fd.error(); + } + auto fd = r_fd.move_as_ok(); + + fd_.lock(FileFd::LockFlags::Unlock, path_, 1).ignore(); + SCOPE_EXIT { + fd_.lock(FileFd::LockFlags::Write, path_, 1).ensure(); + }; + auto expected_data_length = narrow_cast<size_t>(end_offset - begin_offset); + string data(expected_data_length, '\0'); + auto r_data_size = fd.pread(data, begin_offset); + if (r_data_size.is_error()) { + return PSTRING() << "Failed to read binlog: " << r_data_size.error(); + } + + if (r_data_size.ok() < expected_data_length) { + data.resize(r_data_size.ok()); + data = PSTRING() << format::as_hex_dump<4>(Slice(data)) << " | with " << expected_data_length - r_data_size.ok() + << " missed bytes"; + } else { + if (encryption_type_ == EncryptionType::AesCtr) { + bool is_zero = true; + for (auto &c : data) { + if (c != '\0') { + is_zero = false; + } + } + // very often we have '\0' bytes written to disk instead of a real log event + // this is clearly impossible content for a real encrypted log event, so just ignore it + if (is_zero) { + return string(); + } + } + + data = PSTRING() << format::as_hex_dump<4>(Slice(data)); + } + return data; +} + } // namespace td diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/Binlog.h b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/Binlog.h index cdda868b4e..88dbacf58f 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/Binlog.h +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/Binlog.h @@ -1,5 +1,5 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -15,16 +15,23 @@ #include "td/utils/ByteFlow.h" #include "td/utils/common.h" #include "td/utils/crypto.h" +#include "td/utils/logging.h" #include "td/utils/port/FileFd.h" +#include "td/utils/Promise.h" #include "td/utils/Slice.h" #include "td/utils/Status.h" +#include "td/utils/StorerBase.h" +#include "td/utils/UInt.h" #include <functional> namespace td { + +extern int32 VERBOSITY_NAME(binlog); + struct BinlogInfo { - bool was_created; - uint64 last_id; + bool was_created{false}; + uint64 last_id{0}; bool is_encrypted{false}; bool wrong_password{false}; bool is_opened{false}; @@ -34,12 +41,11 @@ namespace detail { class BinlogReader; class BinlogEventsProcessor; class BinlogEventsBuffer; -}; // namespace detail +} // namespace detail class Binlog { public: enum Error : int { WrongPassword = -1 }; - static bool IGNORE_ERASE_HACK; Binlog(); Binlog(const Binlog &other) = delete; Binlog &operator=(const Binlog &other) = delete; @@ -67,8 +73,28 @@ class Binlog { return fd_.empty(); } - void add_raw_event(BufferSlice &&raw_event) { - add_event(BinlogEvent(std::move(raw_event))); + uint64 add(int32 type, const Storer &storer) { + auto log_event_id = next_id(); + add_raw_event(BinlogEvent::create_raw(log_event_id, type, 0, storer), {}); + return log_event_id; + } + + uint64 rewrite(uint64 log_event_id, int32 type, const Storer &storer) { + auto seq_no = next_id(); + add_raw_event(BinlogEvent::create_raw(log_event_id, type, BinlogEvent::Flags::Rewrite, storer), {}); + return seq_no; + } + + uint64 erase(uint64 log_event_id) { + auto seq_no = next_id(); + add_raw_event(BinlogEvent::create_raw(log_event_id, BinlogEvent::ServiceTypes::Empty, BinlogEvent::Flags::Rewrite, + EmptyStorer()), + {}); + return seq_no; + } + + void add_raw_event(BufferSlice &&raw_event, BinlogDebugInfo info) { + add_event(BinlogEvent(std::move(raw_event), info)); } void add_event(BinlogEvent &&event); @@ -81,6 +107,7 @@ class Binlog { void change_key(DbKey new_db_key); Status close(bool need_sync = true) TD_WARN_UNUSED_RESULT; + void close(Promise<> promise); Status close_and_destroy() TD_WARN_UNUSED_RESULT; static Status destroy(Slice path) TD_WARN_UNUSED_RESULT; @@ -96,7 +123,7 @@ class Binlog { BufferedFdBase<FileFd> fd_; ChainBufferWriter buffer_writer_; ChainBufferReader buffer_reader_; - detail::BinlogReader *binlog_reader_ptr_; + detail::BinlogReader *binlog_reader_ptr_ = nullptr; BinlogInfo info_; DbKey db_key_; @@ -118,17 +145,15 @@ class Binlog { uint64 fd_events_{0}; string path_; std::vector<BinlogEvent> pending_events_; - std::unique_ptr<detail::BinlogEventsProcessor> processor_; - std::unique_ptr<detail::BinlogEventsBuffer> events_buffer_; + unique_ptr<detail::BinlogEventsProcessor> processor_; + unique_ptr<detail::BinlogEventsBuffer> events_buffer_; bool in_flush_events_buffer_{false}; uint64 last_id_{0}; double need_flush_since_ = 0; bool need_sync_{false}; enum class State { Empty, Load, Reindex, Run } state_{State::Empty}; - static constexpr uint32 MAX_EVENT_SIZE = 65536; - - Result<FileFd> open_binlog(CSlice path, int32 flags); + static Result<FileFd> open_binlog(const string &path, int32 flags); size_t flush_events_buffer(bool force); void do_add_event(BinlogEvent &&event); void do_event(BinlogEvent &&event); @@ -139,5 +164,8 @@ class Binlog { void reset_encryption(); void update_read_encryption(); void update_write_encryption(); + + string debug_get_binlog_data(int64 begin_offset, int64 end_offset); }; + } // namespace td diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogEvent.cpp b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogEvent.cpp index e4584e920e..09f9fc40e3 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogEvent.cpp +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogEvent.cpp @@ -1,38 +1,78 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #include "td/db/binlog/BinlogEvent.h" +#include "td/utils/crypto.h" +#include "td/utils/logging.h" +#include "td/utils/misc.h" #include "td/utils/tl_parsers.h" +#include "td/utils/tl_storers.h" namespace td { -int32 VERBOSITY_NAME(binlog) = VERBOSITY_NAME(DEBUG) + 8; Status BinlogEvent::init(BufferSlice &&raw_event, bool check_crc) { TlParser parser(raw_event.as_slice()); size_ = parser.fetch_int(); - CHECK(size_ == raw_event.size()); + LOG_CHECK(size_ == raw_event.size()) << size_ << " " << raw_event.size() << debug_info_; id_ = parser.fetch_long(); type_ = parser.fetch_int(); flags_ = parser.fetch_int(); extra_ = parser.fetch_long(); - CHECK(size_ >= MIN_EVENT_SIZE); - auto slice_data = parser.fetch_string_raw<Slice>(size_ - MIN_EVENT_SIZE); + CHECK(size_ >= MIN_SIZE); + auto slice_data = parser.fetch_string_raw<Slice>(size_ - MIN_SIZE); data_ = MutableSlice(const_cast<char *>(slice_data.begin()), slice_data.size()); crc32_ = static_cast<uint32>(parser.fetch_int()); if (check_crc) { - CHECK(size_ >= EVENT_TAIL_SIZE); - auto calculated_crc = crc32(raw_event.as_slice().truncate(size_ - EVENT_TAIL_SIZE)); + auto calculated_crc = crc32(raw_event.as_slice().substr(0, size_ - TAIL_SIZE)); if (calculated_crc != crc32_) { return Status::Error(PSLICE() << "crc mismatch " << tag("actual", format::as_hex(calculated_crc)) - << tag("expected", format::as_hex(crc32_))); + << tag("expected", format::as_hex(crc32_)) << public_to_string()); } } raw_event_ = std::move(raw_event); return Status::OK(); } +Status BinlogEvent::validate() const { + BinlogEvent event; + if (raw_event_.size() < 4) { + return Status::Error("Too small event"); + } + uint32 size = TlParser(raw_event_.as_slice().substr(0, 4)).fetch_int(); + if (size_ != size) { + return Status::Error(PSLICE() << "Size of event changed: " << tag("was", size_) << tag("now", size)); + } + return event.init(raw_event_.clone(), true); +} + +BufferSlice BinlogEvent::create_raw(uint64 id, int32 type, int32 flags, const Storer &storer) { + auto raw_event = BufferSlice{storer.size() + MIN_SIZE}; + + TlStorerUnsafe tl_storer(raw_event.as_slice().ubegin()); + tl_storer.store_int(narrow_cast<int32>(raw_event.size())); + tl_storer.store_long(id); + tl_storer.store_int(type); + tl_storer.store_int(flags); + tl_storer.store_long(0); + + CHECK(tl_storer.get_buf() == raw_event.as_slice().ubegin() + HEADER_SIZE); + tl_storer.store_storer(storer); + + CHECK(tl_storer.get_buf() == raw_event.as_slice().uend() - TAIL_SIZE); + tl_storer.store_int(crc32(raw_event.as_slice().truncate(raw_event.size() - TAIL_SIZE))); + + return raw_event; +} + +void BinlogEvent::realloc() { + auto data_offset = data_.begin() - raw_event_.as_slice().begin(); + auto data_size = data_.size(); + raw_event_ = raw_event_.copy(); + data_ = raw_event_.as_slice().substr(data_offset, data_size); +} + } // namespace td diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogEvent.h b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogEvent.h index 1874543dff..8fab08da98 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogEvent.h +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogEvent.h @@ -1,5 +1,5 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -8,17 +8,16 @@ #include "td/utils/buffer.h" #include "td/utils/common.h" -#include "td/utils/crypto.h" #include "td/utils/format.h" -#include "td/utils/logging.h" -#include "td/utils/misc.h" #include "td/utils/Slice.h" +#include "td/utils/SliceBuilder.h" #include "td/utils/Status.h" #include "td/utils/Storer.h" +#include "td/utils/StorerBase.h" #include "td/utils/StringBuilder.h" -#include "td/utils/tl_storers.h" namespace td { + struct EmptyStorerImpl { EmptyStorerImpl() { } @@ -33,15 +32,27 @@ inline auto EmptyStorer() { return create_default_storer(impl); } -static constexpr size_t MAX_EVENT_SIZE = 1 << 24; -static constexpr size_t EVENT_HEADER_SIZE = 4 + 8 + 4 + 4 + 8; -static constexpr size_t EVENT_TAIL_SIZE = 4; -static constexpr size_t MIN_EVENT_SIZE = EVENT_HEADER_SIZE + EVENT_TAIL_SIZE; +struct BinlogDebugInfo { + BinlogDebugInfo() = default; + BinlogDebugInfo(const char *file, int line) : file(file), line(line) { + } + const char *file{""}; + int line{0}; +}; -extern int32 VERBOSITY_NAME(binlog); +inline StringBuilder &operator<<(StringBuilder &sb, const BinlogDebugInfo &info) { + if (info.line == 0) { + return sb; + } + return sb << "[" << info.file << ":" << info.line << "]"; +} -// TODO: smaller BinlogEvent struct BinlogEvent { + static constexpr size_t MAX_SIZE = 1 << 24; + static constexpr size_t HEADER_SIZE = 4 + 8 + 4 + 4 + 8; + static constexpr size_t TAIL_SIZE = 4; + static constexpr size_t MIN_SIZE = HEADER_SIZE + TAIL_SIZE; + int64 offset_; uint32 size_; @@ -54,6 +65,8 @@ struct BinlogEvent { BufferSlice raw_event_; + BinlogDebugInfo debug_info_; + enum ServiceTypes { Header = -1, Empty = -2, AesCtrEncryption = -3, NoEncryption = -4 }; enum Flags { Rewrite = 1, Partial = 2 }; @@ -65,6 +78,7 @@ struct BinlogEvent { } BinlogEvent clone() const { BinlogEvent result; + result.debug_info_ = BinlogDebugInfo{__FILE__, __LINE__}; result.init(raw_event_.clone()).ensure(); return result; } @@ -74,36 +88,32 @@ struct BinlogEvent { } BinlogEvent() = default; - explicit BinlogEvent(BufferSlice &&raw_event) { + //explicit BinlogEvent(BufferSlice &&raw_event) { + //init(std::move(raw_event), false).ensure(); + //} + BinlogEvent(BufferSlice &&raw_event, BinlogDebugInfo info) { + debug_info_ = info; init(std::move(raw_event), false).ensure(); } + Status init(BufferSlice &&raw_event, bool check_crc = true) TD_WARN_UNUSED_RESULT; static BufferSlice create_raw(uint64 id, int32 type, int32 flags, const Storer &storer); + + std::string public_to_string() const { + return PSTRING() << "LogEvent[" << tag("id", format::as_hex(id_)) << tag("type", type_) << tag("flags", flags_) + << tag("data", data_.size()) << "]" << debug_info_; + } + + Status validate() const; + + void realloc(); }; inline StringBuilder &operator<<(StringBuilder &sb, const BinlogEvent &event) { return sb << "LogEvent[" << tag("id", format::as_hex(event.id_)) << tag("type", event.type_) - << tag("flags", event.flags_) << tag("data", format::as_hex_dump<4>(event.data_)) << "]"; + << tag("flags", event.flags_) << tag("data", format::as_hex_dump<4>(event.data_)) << "]" + << event.debug_info_; } -// Implementation -inline BufferSlice BinlogEvent::create_raw(uint64 id, int32 type, int32 flags, const Storer &storer) { - auto raw_event = BufferSlice{storer.size() + MIN_EVENT_SIZE}; - - TlStorerUnsafe tl_storer(raw_event.as_slice().begin()); - tl_storer.store_int(narrow_cast<int32>(raw_event.size())); - tl_storer.store_long(id); - tl_storer.store_int(type); - tl_storer.store_int(flags); - tl_storer.store_long(0); - - CHECK(tl_storer.get_buf() == raw_event.as_slice().begin() + EVENT_HEADER_SIZE); - tl_storer.store_storer(storer); - - CHECK(tl_storer.get_buf() == raw_event.as_slice().end() - EVENT_TAIL_SIZE); - tl_storer.store_int(::td::crc32(raw_event.as_slice().truncate(raw_event.size() - EVENT_TAIL_SIZE))); - - return raw_event; -} } // namespace td diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogHelper.h b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogHelper.h index 1224ac4ac2..66a65e060d 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogHelper.h +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogHelper.h @@ -1,45 +1,32 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #pragma once -#include "td/actor/PromiseFuture.h" - #include "td/db/binlog/BinlogEvent.h" +#include "td/db/binlog/BinlogInterface.h" #include "td/utils/common.h" -#include "td/utils/Storer.h" +#include "td/utils/Promise.h" +#include "td/utils/StorerBase.h" namespace td { -class BinlogHelper { - public: - template <class BinlogT, class StorerT> - static uint64 add(const BinlogT &binlog_ptr, int32 type, const StorerT &storer, Promise<> promise = Promise<>()) { - auto logevent_id = binlog_ptr->next_id(); - binlog_ptr->add_raw_event(logevent_id, BinlogEvent::create_raw(logevent_id, type, 0, storer), std::move(promise)); - return logevent_id; - } - template <class BinlogT, class StorerT> - static uint64 rewrite(const BinlogT &binlog_ptr, uint64 logevent_id, int32 type, const StorerT &storer, - Promise<> promise = Promise<>()) { - auto seq_no = binlog_ptr->next_id(); - binlog_ptr->add_raw_event(seq_no, BinlogEvent::create_raw(logevent_id, type, BinlogEvent::Flags::Rewrite, storer), - std::move(promise)); - return seq_no; - } +inline uint64 binlog_add(BinlogInterface *binlog_ptr, int32 type, const Storer &storer, + Promise<> promise = Promise<>()) { + return binlog_ptr->add(type, storer, std::move(promise)); +} + +inline uint64 binlog_rewrite(BinlogInterface *binlog_ptr, uint64 log_event_id, int32 type, const Storer &storer, + Promise<> promise = Promise<>()) { + return binlog_ptr->rewrite(log_event_id, type, storer, std::move(promise)); +} + +inline uint64 binlog_erase(BinlogInterface *binlog_ptr, uint64 log_event_id, Promise<> promise = Promise<>()) { + return binlog_ptr->erase(log_event_id, std::move(promise)); +} - template <class BinlogT> - static uint64 erase(const BinlogT &binlog_ptr, uint64 logevent_id, Promise<> promise = Promise<>()) { - auto seq_no = binlog_ptr->next_id(); - binlog_ptr->add_raw_event(seq_no, - BinlogEvent::create_raw(logevent_id, BinlogEvent::ServiceTypes::Empty, - BinlogEvent::Flags::Rewrite, EmptyStorer()), - std::move(promise)); - return seq_no; - } -}; } // namespace td diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogInterface.h b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogInterface.h index 2360f3c480..b3ec7b119f 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogInterface.h +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/BinlogInterface.h @@ -1,20 +1,21 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #pragma once -#include "td/actor/PromiseFuture.h" - #include "td/db/binlog/BinlogEvent.h" #include "td/db/DbKey.h" #include "td/utils/buffer.h" #include "td/utils/common.h" +#include "td/utils/Promise.h" +#include "td/utils/StorerBase.h" namespace td { + class BinlogInterface { public: BinlogInterface() = default; @@ -30,15 +31,41 @@ class BinlogInterface { void close_and_destroy(Promise<> promise = {}) { close_and_destroy_impl(std::move(promise)); } + void add_raw_event(BinlogDebugInfo info, uint64 id, BufferSlice &&raw_event, Promise<> promise = Promise<>()) { + add_raw_event_impl(id, std::move(raw_event), std::move(promise), info); + } void add_raw_event(uint64 id, BufferSlice &&raw_event, Promise<> promise = Promise<>()) { - add_raw_event_impl(id, std::move(raw_event), std::move(promise)); + add_raw_event_impl(id, std::move(raw_event), std::move(promise), {}); } void lazy_sync(Promise<> promise = Promise<>()) { - add_raw_event_impl(next_id(), BufferSlice(), std::move(promise)); + add_raw_event_impl(next_id(), BufferSlice(), std::move(promise), {}); } + + uint64 add(int32 type, const Storer &storer, Promise<> promise = Promise<>()) { + auto log_event_id = next_id(); + add_raw_event_impl(log_event_id, BinlogEvent::create_raw(log_event_id, type, 0, storer), std::move(promise), {}); + return log_event_id; + } + + uint64 rewrite(uint64 log_event_id, int32 type, const Storer &storer, Promise<> promise = Promise<>()) { + auto seq_no = next_id(); + add_raw_event_impl(seq_no, BinlogEvent::create_raw(log_event_id, type, BinlogEvent::Flags::Rewrite, storer), + std::move(promise), {}); + return seq_no; + } + + uint64 erase(uint64 log_event_id, Promise<> promise = Promise<>()) { + auto seq_no = next_id(); + add_raw_event_impl(seq_no, + BinlogEvent::create_raw(log_event_id, BinlogEvent::ServiceTypes::Empty, + BinlogEvent::Flags::Rewrite, EmptyStorer()), + std::move(promise), {}); + return seq_no; + } + virtual void force_sync(Promise<> promise) = 0; virtual void force_flush() = 0; - virtual void change_key(DbKey db_key, Promise<> promise = Promise<>()) = 0; + virtual void change_key(DbKey db_key, Promise<> promise) = 0; virtual uint64 next_id() = 0; virtual uint64 next_id(int32 shift) = 0; @@ -46,6 +73,7 @@ class BinlogInterface { protected: virtual void close_impl(Promise<> promise) = 0; virtual void close_and_destroy_impl(Promise<> promise) = 0; - virtual void add_raw_event_impl(uint64 id, BufferSlice &&raw_event, Promise<> promise) = 0; + virtual void add_raw_event_impl(uint64 id, BufferSlice &&raw_event, Promise<> promise, BinlogDebugInfo info) = 0; }; + } // namespace td diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/ConcurrentBinlog.cpp b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/ConcurrentBinlog.cpp index aaf07f2967..16a4e2c010 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/ConcurrentBinlog.cpp +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/ConcurrentBinlog.cpp @@ -1,5 +1,5 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -8,37 +8,41 @@ #include "td/utils/logging.h" #include "td/utils/OrderedEventsProcessor.h" +#include "td/utils/SliceBuilder.h" #include "td/utils/Time.h" #include <map> namespace td { namespace detail { -class BinlogActor : public Actor { +class BinlogActor final : public Actor { public: - BinlogActor(std::unique_ptr<Binlog> binlog, uint64 seq_no) : binlog_(std::move(binlog)), processor_(seq_no) { + BinlogActor(unique_ptr<Binlog> binlog, uint64 seq_no) : binlog_(std::move(binlog)), processor_(seq_no) { } void close(Promise<> promise) { binlog_->close().ensure(); - promise.set_value(Unit()); - LOG(INFO) << "close: done"; + LOG(INFO) << "Finished to close binlog"; stop(); + + promise.set_value(Unit()); // setting promise can complete closing and destroy the current actor context } void close_and_destroy(Promise<> promise) { binlog_->close_and_destroy().ensure(); - promise.set_value(Unit()); - LOG(INFO) << "close_and_destroy: done"; + LOG(INFO) << "Finished to destroy binlog"; stop(); + + promise.set_value(Unit()); // setting promise can complete closing and destroy the current actor context } struct Event { BufferSlice raw_event; Promise<> sync_promise; + BinlogDebugInfo debug_info; }; - void add_raw_event(uint64 seq_no, BufferSlice &&raw_event, Promise<> &&promise) { - processor_.add(seq_no, Event{std::move(raw_event), std::move(promise)}, [&](uint64 id, Event &&event) { + void add_raw_event(uint64 seq_no, BufferSlice &&raw_event, Promise<> &&promise, BinlogDebugInfo info) { + processor_.add(seq_no, Event{std::move(raw_event), std::move(promise), info}, [&](uint64 id, Event &&event) { if (!event.raw_event.empty()) { - do_add_raw_event(std::move(event.raw_event)); + do_add_raw_event(std::move(event.raw_event), event.debug_info); } do_lazy_sync(std::move(event.sync_promise)); }); @@ -67,7 +71,7 @@ class BinlogActor : public Actor { } private: - std::unique_ptr<Binlog> binlog_; + unique_ptr<Binlog> binlog_; OrderedEventsProcessor<Event> processor_; @@ -78,7 +82,7 @@ class BinlogActor : public Actor { bool flush_flag_ = false; double wakeup_at_ = 0; - static constexpr int32 FLUSH_TIMEOUT = 1; // 1s + static constexpr double FLUSH_TIMEOUT = 0.001; // 1ms void wakeup_after(double after) { auto now = Time::now_cached(); @@ -92,8 +96,8 @@ class BinlogActor : public Actor { } } - void do_add_raw_event(BufferSlice &&raw_event) { - binlog_->add_raw_event(std::move(raw_event)); + void do_add_raw_event(BufferSlice &&raw_event, BinlogDebugInfo info) { + binlog_->add_raw_event(std::move(raw_event), info); } void try_flush() { @@ -138,7 +142,7 @@ class BinlogActor : public Actor { } } - void timeout_expired() override { + void timeout_expired() final { bool need_sync = lazy_sync_flag_ || force_sync_flag_; lazy_sync_flag_ = false; force_sync_flag_ = false; @@ -148,10 +152,7 @@ class BinlogActor : public Actor { if (need_sync) { binlog_->sync(); // LOG(ERROR) << "BINLOG SYNC"; - for (auto &promise : sync_promises_) { - promise.set_value(Unit()); - } - sync_promises_.clear(); + set_promises(sync_promises_); } else if (need_flush) { try_flush(); // LOG(ERROR) << "BINLOG FLUSH"; @@ -162,24 +163,24 @@ class BinlogActor : public Actor { ConcurrentBinlog::ConcurrentBinlog() = default; ConcurrentBinlog::~ConcurrentBinlog() = default; -ConcurrentBinlog::ConcurrentBinlog(std::unique_ptr<Binlog> binlog, int scheduler_id) { +ConcurrentBinlog::ConcurrentBinlog(unique_ptr<Binlog> binlog, int scheduler_id) { init_impl(std::move(binlog), scheduler_id); } Result<BinlogInfo> ConcurrentBinlog::init(string path, const Callback &callback, DbKey db_key, DbKey old_db_key, int scheduler_id) { - auto binlog = std::make_unique<Binlog>(); + auto binlog = make_unique<Binlog>(); TRY_STATUS(binlog->init(std::move(path), callback, std::move(db_key), std::move(old_db_key))); auto info = binlog->get_info(); init_impl(std::move(binlog), scheduler_id); return info; } -void ConcurrentBinlog::init_impl(std::unique_ptr<Binlog> binlog, int32 scheduler_id) { +void ConcurrentBinlog::init_impl(unique_ptr<Binlog> binlog, int32 scheduler_id) { path_ = binlog->get_path().str(); last_id_ = binlog->peek_next_id(); - binlog_actor_ = - create_actor_on_scheduler<detail::BinlogActor>("Binlog " + path_, scheduler_id, std::move(binlog), last_id_); + binlog_actor_ = create_actor_on_scheduler<detail::BinlogActor>(PSLICE() << "Binlog " << path_, scheduler_id, + std::move(binlog), last_id_); } void ConcurrentBinlog::close_impl(Promise<> promise) { @@ -188,8 +189,8 @@ void ConcurrentBinlog::close_impl(Promise<> promise) { void ConcurrentBinlog::close_and_destroy_impl(Promise<> promise) { send_closure(std::move(binlog_actor_), &detail::BinlogActor::close_and_destroy, std::move(promise)); } -void ConcurrentBinlog::add_raw_event_impl(uint64 id, BufferSlice &&raw_event, Promise<> promise) { - send_closure(binlog_actor_, &detail::BinlogActor::add_raw_event, id, std::move(raw_event), std::move(promise)); +void ConcurrentBinlog::add_raw_event_impl(uint64 id, BufferSlice &&raw_event, Promise<> promise, BinlogDebugInfo info) { + send_closure(binlog_actor_, &detail::BinlogActor::add_raw_event, id, std::move(raw_event), std::move(promise), info); } void ConcurrentBinlog::force_sync(Promise<> promise) { send_closure(binlog_actor_, &detail::BinlogActor::force_sync, std::move(promise)); diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/ConcurrentBinlog.h b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/ConcurrentBinlog.h index ce77a78a84..ad6c9bc5c3 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/ConcurrentBinlog.h +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/ConcurrentBinlog.h @@ -1,19 +1,20 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #pragma once -#include "td/actor/actor.h" -#include "td/actor/PromiseFuture.h" - #include "td/db/binlog/Binlog.h" #include "td/db/binlog/BinlogInterface.h" +#include "td/db/DbKey.h" + +#include "td/actor/actor.h" #include "td/utils/buffer.h" #include "td/utils/common.h" +#include "td/utils/Promise.h" #include "td/utils/Slice.h" #include "td/utils/Status.h" @@ -26,28 +27,28 @@ namespace detail { class BinlogActor; } // namespace detail -class ConcurrentBinlog : public BinlogInterface { +class ConcurrentBinlog final : public BinlogInterface { public: using Callback = std::function<void(const BinlogEvent &)>; Result<BinlogInfo> init(string path, const Callback &callback, DbKey db_key = DbKey::empty(), DbKey old_db_key = DbKey::empty(), int scheduler_id = -1) TD_WARN_UNUSED_RESULT; ConcurrentBinlog(); - explicit ConcurrentBinlog(std::unique_ptr<Binlog> binlog, int scheduler_id = -1); + explicit ConcurrentBinlog(unique_ptr<Binlog> binlog, int scheduler_id = -1); ConcurrentBinlog(const ConcurrentBinlog &other) = delete; ConcurrentBinlog &operator=(const ConcurrentBinlog &other) = delete; ConcurrentBinlog(ConcurrentBinlog &&other) = delete; ConcurrentBinlog &operator=(ConcurrentBinlog &&other) = delete; - ~ConcurrentBinlog() override; + ~ConcurrentBinlog() final; - void force_sync(Promise<> promise) override; - void force_flush() override; - void change_key(DbKey db_key, Promise<> promise) override; + void force_sync(Promise<> promise) final; + void force_flush() final; + void change_key(DbKey db_key, Promise<> promise) final; - uint64 next_id() override { + uint64 next_id() final { return last_id_.fetch_add(1, std::memory_order_relaxed); } - uint64 next_id(int32 shift) override { + uint64 next_id(int32 shift) final { return last_id_.fetch_add(shift, std::memory_order_relaxed); } @@ -56,13 +57,14 @@ class ConcurrentBinlog : public BinlogInterface { } private: - void init_impl(std::unique_ptr<Binlog> binlog, int scheduler_id); - void close_impl(Promise<> promise) override; - void close_and_destroy_impl(Promise<> promise) override; - void add_raw_event_impl(uint64 id, BufferSlice &&raw_event, Promise<> promise) override; + void init_impl(unique_ptr<Binlog> binlog, int scheduler_id); + void close_impl(Promise<> promise) final; + void close_and_destroy_impl(Promise<> promise) final; + void add_raw_event_impl(uint64 id, BufferSlice &&raw_event, Promise<> promise, BinlogDebugInfo info) final; ActorOwn<detail::BinlogActor> binlog_actor_; string path_; - std::atomic<uint64> last_id_; + std::atomic<uint64> last_id_{0}; }; + } // namespace td diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp index a8b8bf9e1b..f3984062fc 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp @@ -1,52 +1,156 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #include "td/db/binlog/Binlog.h" +#include "td/db/DbKey.h" + #include "td/utils/common.h" #include "td/utils/format.h" #include "td/utils/logging.h" +#include "td/utils/misc.h" +#include "td/utils/port/Stat.h" +#include "td/utils/Slice.h" +#include "td/utils/SliceBuilder.h" +#include "td/utils/StringBuilder.h" +#include "td/utils/tl_parsers.h" -#include <cstdio> #include <map> +struct Trie { + Trie() { + nodes_.resize(1); + } + + void add(td::Slice value) { + do_add(0, PSLICE() << value << '\0'); + } + + void dump() { + if (nodes_[0].sum == 0) { // division by zero + return; + } + LOG(PLAIN) << "TOTAL: " << nodes_[0].sum; + do_dump("", 0); + } + + private: + struct FullNode { + int next[256] = {}; + int sum = 0; + }; + td::vector<FullNode> nodes_; + + void do_add(int id, td::Slice value) { + nodes_[id].sum++; + if (value.empty()) { + return; + } + + auto c = static_cast<td::uint8>(value[0]); + auto next_id = nodes_[id].next[c]; + if (next_id == 0) { + next_id = static_cast<int>(nodes_.size()); + nodes_.emplace_back(); + nodes_[id].next[c] = next_id; + } + do_add(next_id, value.substr(1)); + } + + void do_dump(td::string path, int v) { + bool is_word_end = !path.empty() && path.back() == '\0'; + + bool need_stop = false; + int next_count = 0; + for (int c = 0; c < 256; c++) { + if (nodes_[v].next[c] != 0) { + need_stop |= c >= 128 || !(td::is_alpha(static_cast<char>(c)) || c == '.' || c == '_'); + next_count++; + } + } + need_stop |= next_count == 0 || (next_count >= 2 && nodes_[v].sum <= nodes_[0].sum / 100); + + if (is_word_end || need_stop) { + if (is_word_end) { + path.pop_back(); + } else if (next_count != 1 || nodes_[v].next[0] == 0) { + path.push_back('*'); + } + LOG(PLAIN) << nodes_[v].sum << " " << td::StringBuilder::FixedDouble(nodes_[v].sum * 100.0 / nodes_[0].sum, 2) + << "% [" << td::format::escaped(path) << "]"; + return; + } + for (int c = 0; c < 256; c++) { + auto next_id = nodes_[v].next[c]; + if (next_id == 0) { + continue; + } + do_dump(path + static_cast<char>(c), next_id); + } + } +}; + +enum Magic { ConfigPmcMagic = 0x1f18, BinlogPmcMagic = 0x4327 }; + int main(int argc, char *argv[]) { if (argc < 2) { - std::fprintf(stderr, "Usage: binlog_dump <binlog_file_name>\n"); + LOG(PLAIN) << "Usage: binlog_dump <binlog_file_name>"; + return 1; + } + td::string binlog_file_name = argv[1]; + auto r_stat = td::stat(binlog_file_name); + if (r_stat.is_error() || r_stat.ok().size_ == 0 || !r_stat.ok().is_reg_) { + LOG(PLAIN) << "Wrong binlog file name specified"; + LOG(PLAIN) << "Usage: binlog_dump <binlog_file_name>"; return 1; } struct Info { std::size_t full_size = 0; std::size_t compressed_size = 0; + Trie trie; + Trie compressed_trie; }; std::map<td::uint64, Info> info; SET_VERBOSITY_LEVEL(VERBOSITY_NAME(ERROR)); td::Binlog binlog; binlog - .init(argv[1], - [&](auto &event) { - info[0].compressed_size += event.raw_event_.size(); - info[event.type_].compressed_size += event.raw_event_.size(); - }, - td::DbKey::empty(), td::DbKey::empty(), -1, - [&](auto &event) mutable { - info[0].full_size += event.raw_event_.size(); - info[event.type_].full_size += event.raw_event_.size(); - LOG(PLAIN) << "LogEvent[" << td::tag("id", td::format::as_hex(event.id_)) << td::tag("type", event.type_) - << td::tag("flags", event.flags_) << td::tag("data", td::format::escaped(event.data_)) - << "]\n"; - }) + .init( + binlog_file_name, + [&](auto &event) { + info[0].compressed_size += event.raw_event_.size(); + info[event.type_].compressed_size += event.raw_event_.size(); + if (event.type_ == ConfigPmcMagic || event.type_ == BinlogPmcMagic) { + auto key = td::TlParser(event.data_).fetch_string<td::Slice>(); + info[event.type_].compressed_trie.add(key); + } + }, + td::DbKey::raw_key("cucumber"), td::DbKey::empty(), -1, + [&](auto &event) mutable { + info[0].full_size += event.raw_event_.size(); + info[event.type_].full_size += event.raw_event_.size(); + if (event.type_ == ConfigPmcMagic || event.type_ == BinlogPmcMagic) { + auto key = td::TlParser(event.data_).fetch_string<td::Slice>(); + info[event.type_].trie.add(key); + } + LOG(PLAIN) << "LogEvent[" << td::tag("id", td::format::as_hex(event.id_)) << td::tag("type", event.type_) + << td::tag("flags", event.flags_) << td::tag("size", event.data_.size()) + << td::tag("data", td::format::escaped(event.data_)) << "]\n"; + }) .ensure(); for (auto &it : info) { - LOG(ERROR) << td::tag("handler", td::format::as_hex(it.first)) + LOG(PLAIN) << td::tag("handler", td::format::as_hex(it.first)) << td::tag("full_size", td::format::as_size(it.second.full_size)) << td::tag("compressed_size", td::format::as_size(it.second.compressed_size)); + it.second.trie.dump(); + if (it.second.full_size != it.second.compressed_size) { + it.second.compressed_trie.dump(); + } } return 0; diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsBuffer.cpp b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsBuffer.cpp index b7ddc98ff1..cb4e4817c5 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsBuffer.cpp +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsBuffer.cpp @@ -1,5 +1,5 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,6 +10,7 @@ namespace td { namespace detail { + void BinlogEventsBuffer::add_event(BinlogEvent &&event) { total_events_++; if ((event.flags_ & BinlogEvent::Flags::Partial) == 0) { @@ -26,14 +27,17 @@ void BinlogEventsBuffer::add_event(BinlogEvent &&event) { size_ += event.size_; events_.push_back(std::move(event)); } + bool BinlogEventsBuffer::need_flush() const { return total_events_ > 5000 || ids_.size() > 100; } + void BinlogEventsBuffer::clear() { ids_.clear(); events_.clear(); total_events_ = 0; size_ = 0; } + } // namespace detail } // namespace td diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsBuffer.h b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsBuffer.h index dcd6d7c1b3..5b7cab8212 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsBuffer.h +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsBuffer.h @@ -1,16 +1,18 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #pragma once + #include "td/db/binlog/BinlogEvent.h" #include "td/utils/common.h" namespace td { namespace detail { + class BinlogEventsBuffer { public: void add_event(BinlogEvent &&event); @@ -23,7 +25,8 @@ class BinlogEventsBuffer { auto &event = events_[i]; if (i + 1 != ids_.size() && (event.flags_ & BinlogEvent::Flags::Partial) == 0) { callback(BinlogEvent(BinlogEvent::create_raw(event.id_, event.type_, event.flags_ | BinlogEvent::Flags::Partial, - create_storer(event.data_)))); + create_storer(event.data_)), + BinlogDebugInfo{__FILE__, __LINE__})); } else { callback(std::move(event)); } @@ -43,5 +46,6 @@ class BinlogEventsBuffer { void do_event(BinlogEvent &&event); void clear(); }; + } // namespace detail } // namespace td diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsProcessor.cpp b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsProcessor.cpp index 50ad91bff8..7b2832b39a 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsProcessor.cpp +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsProcessor.cpp @@ -1,25 +1,26 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #include "td/db/binlog/detail/BinlogEventsProcessor.h" -#include "td/utils/logging.h" +#include "td/utils/SliceBuilder.h" +#include "td/utils/Status.h" #include <algorithm> namespace td { namespace detail { -void BinlogEventsProcessor::do_event(BinlogEvent &&event) { + +Status BinlogEventsProcessor::do_event(BinlogEvent &&event) { offset_ = event.offset_; auto fixed_id = event.id_ * 2; if ((event.flags_ & BinlogEvent::Flags::Rewrite) && !ids_.empty() && ids_.back() >= fixed_id) { auto it = std::lower_bound(ids_.begin(), ids_.end(), fixed_id); if (it == ids_.end() || *it != fixed_id) { - LOG(FATAL) << "Ignore rewrite logevent"; - return; + return Status::Error(PSLICE() << "Ignore rewrite log event " << event.public_to_string()); } auto pos = it - ids_.begin(); total_raw_events_size_ -= static_cast<int64>(events_[pos].raw_event_.size()); @@ -32,10 +33,14 @@ void BinlogEventsProcessor::do_event(BinlogEvent &&event) { total_raw_events_size_ += static_cast<int64>(event.raw_event_.size()); events_[pos] = std::move(event); } - } else if (event.type_ == BinlogEvent::ServiceTypes::Empty) { - // just skip this event + } else if (event.type_ < 0) { + // just skip service events } else { - CHECK(ids_.empty() || ids_.back() < fixed_id); + if (!(ids_.empty() || ids_.back() < fixed_id)) { + return Status::Error(PSLICE() << offset_ << " " << ids_.size() << " " << ids_.back() << " " << fixed_id << " " + << event.public_to_string() << " " << total_events_ << " " + << total_raw_events_size_); + } last_id_ = event.id_; total_raw_events_size_ += static_cast<int64>(event.raw_event_.size()); total_events_++; @@ -46,6 +51,7 @@ void BinlogEventsProcessor::do_event(BinlogEvent &&event) { if (total_events_ > 10 && empty_events_ * 4 > total_events_ * 3) { compactify(); } + return Status::OK(); } void BinlogEventsProcessor::compactify() { @@ -66,5 +72,6 @@ void BinlogEventsProcessor::compactify() { empty_events_ = 0; CHECK(ids_.size() == events_.size()); } + } // namespace detail } // namespace td diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsProcessor.h b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsProcessor.h index 645f8c50ab..8b276aef03 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsProcessor.h +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/detail/BinlogEventsProcessor.h @@ -1,25 +1,31 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #pragma once + #include "td/db/binlog/BinlogEvent.h" #include "td/utils/common.h" +#include "td/utils/logging.h" +#include "td/utils/Status.h" namespace td { namespace detail { + class BinlogEventsProcessor { public: - void add_event(BinlogEvent &&event) { - do_event(std::move(event)); + Status add_event(BinlogEvent &&event) TD_WARN_UNUSED_RESULT { + return do_event(std::move(event)); } template <class CallbackT> void for_each(CallbackT &&callback) { for (size_t i = 0; i < ids_.size(); i++) { + LOG_CHECK(i == 0 || ids_[i - 1] < ids_[i]) << ids_[i - 1] << " " << events_[i - 1].public_to_string() << " " + << ids_[i] << " " << events_[i].public_to_string(); if ((ids_[i] & 1) == 0) { callback(events_[i]); } @@ -46,8 +52,9 @@ class BinlogEventsProcessor { int64 offset_{0}; int64 total_raw_events_size_{0}; - void do_event(BinlogEvent &&event); + Status do_event(BinlogEvent &&event); void compactify(); }; + } // namespace detail } // namespace td |