diff options
Diffstat (limited to 'protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp')
-rw-r--r-- | protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp | 138 |
1 files changed, 121 insertions, 17 deletions
diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp index a8b8bf9e1b..f3984062fc 100644 --- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp +++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp @@ -1,52 +1,156 @@ // -// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018 +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #include "td/db/binlog/Binlog.h" +#include "td/db/DbKey.h" + #include "td/utils/common.h" #include "td/utils/format.h" #include "td/utils/logging.h" +#include "td/utils/misc.h" +#include "td/utils/port/Stat.h" +#include "td/utils/Slice.h" +#include "td/utils/SliceBuilder.h" +#include "td/utils/StringBuilder.h" +#include "td/utils/tl_parsers.h" -#include <cstdio> #include <map> +struct Trie { + Trie() { + nodes_.resize(1); + } + + void add(td::Slice value) { + do_add(0, PSLICE() << value << '\0'); + } + + void dump() { + if (nodes_[0].sum == 0) { // division by zero + return; + } + LOG(PLAIN) << "TOTAL: " << nodes_[0].sum; + do_dump("", 0); + } + + private: + struct FullNode { + int next[256] = {}; + int sum = 0; + }; + td::vector<FullNode> nodes_; + + void do_add(int id, td::Slice value) { + nodes_[id].sum++; + if (value.empty()) { + return; + } + + auto c = static_cast<td::uint8>(value[0]); + auto next_id = nodes_[id].next[c]; + if (next_id == 0) { + next_id = static_cast<int>(nodes_.size()); + nodes_.emplace_back(); + nodes_[id].next[c] = next_id; + } + do_add(next_id, value.substr(1)); + } + + void do_dump(td::string path, int v) { + bool is_word_end = !path.empty() && path.back() == '\0'; + + bool need_stop = false; + int next_count = 0; + for (int c = 0; c < 256; c++) { + if (nodes_[v].next[c] != 0) { + need_stop |= c >= 128 || !(td::is_alpha(static_cast<char>(c)) || c == '.' || c == '_'); + next_count++; + } + } + need_stop |= next_count == 0 || (next_count >= 2 && nodes_[v].sum <= nodes_[0].sum / 100); + + if (is_word_end || need_stop) { + if (is_word_end) { + path.pop_back(); + } else if (next_count != 1 || nodes_[v].next[0] == 0) { + path.push_back('*'); + } + LOG(PLAIN) << nodes_[v].sum << " " << td::StringBuilder::FixedDouble(nodes_[v].sum * 100.0 / nodes_[0].sum, 2) + << "% [" << td::format::escaped(path) << "]"; + return; + } + for (int c = 0; c < 256; c++) { + auto next_id = nodes_[v].next[c]; + if (next_id == 0) { + continue; + } + do_dump(path + static_cast<char>(c), next_id); + } + } +}; + +enum Magic { ConfigPmcMagic = 0x1f18, BinlogPmcMagic = 0x4327 }; + int main(int argc, char *argv[]) { if (argc < 2) { - std::fprintf(stderr, "Usage: binlog_dump <binlog_file_name>\n"); + LOG(PLAIN) << "Usage: binlog_dump <binlog_file_name>"; + return 1; + } + td::string binlog_file_name = argv[1]; + auto r_stat = td::stat(binlog_file_name); + if (r_stat.is_error() || r_stat.ok().size_ == 0 || !r_stat.ok().is_reg_) { + LOG(PLAIN) << "Wrong binlog file name specified"; + LOG(PLAIN) << "Usage: binlog_dump <binlog_file_name>"; return 1; } struct Info { std::size_t full_size = 0; std::size_t compressed_size = 0; + Trie trie; + Trie compressed_trie; }; std::map<td::uint64, Info> info; SET_VERBOSITY_LEVEL(VERBOSITY_NAME(ERROR)); td::Binlog binlog; binlog - .init(argv[1], - [&](auto &event) { - info[0].compressed_size += event.raw_event_.size(); - info[event.type_].compressed_size += event.raw_event_.size(); - }, - td::DbKey::empty(), td::DbKey::empty(), -1, - [&](auto &event) mutable { - info[0].full_size += event.raw_event_.size(); - info[event.type_].full_size += event.raw_event_.size(); - LOG(PLAIN) << "LogEvent[" << td::tag("id", td::format::as_hex(event.id_)) << td::tag("type", event.type_) - << td::tag("flags", event.flags_) << td::tag("data", td::format::escaped(event.data_)) - << "]\n"; - }) + .init( + binlog_file_name, + [&](auto &event) { + info[0].compressed_size += event.raw_event_.size(); + info[event.type_].compressed_size += event.raw_event_.size(); + if (event.type_ == ConfigPmcMagic || event.type_ == BinlogPmcMagic) { + auto key = td::TlParser(event.data_).fetch_string<td::Slice>(); + info[event.type_].compressed_trie.add(key); + } + }, + td::DbKey::raw_key("cucumber"), td::DbKey::empty(), -1, + [&](auto &event) mutable { + info[0].full_size += event.raw_event_.size(); + info[event.type_].full_size += event.raw_event_.size(); + if (event.type_ == ConfigPmcMagic || event.type_ == BinlogPmcMagic) { + auto key = td::TlParser(event.data_).fetch_string<td::Slice>(); + info[event.type_].trie.add(key); + } + LOG(PLAIN) << "LogEvent[" << td::tag("id", td::format::as_hex(event.id_)) << td::tag("type", event.type_) + << td::tag("flags", event.flags_) << td::tag("size", event.data_.size()) + << td::tag("data", td::format::escaped(event.data_)) << "]\n"; + }) .ensure(); for (auto &it : info) { - LOG(ERROR) << td::tag("handler", td::format::as_hex(it.first)) + LOG(PLAIN) << td::tag("handler", td::format::as_hex(it.first)) << td::tag("full_size", td::format::as_size(it.second.full_size)) << td::tag("compressed_size", td::format::as_size(it.second.compressed_size)); + it.second.trie.dump(); + if (it.second.full_size != it.second.compressed_size) { + it.second.compressed_trie.dump(); + } } return 0; |