summaryrefslogtreecommitdiff
path: root/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp
diff options
context:
space:
mode:
authorGeorge Hazan <ghazan@miranda.im>2022-11-30 17:48:47 +0300
committerGeorge Hazan <ghazan@miranda.im>2022-11-30 17:48:47 +0300
commit0ece30dc7c0e34b4c5911969b8fa99c33c6d023c (patch)
tree671325d3fec09b999411e4e3ab84ef8259261818 /protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp
parent46c53ffc6809c67e4607e99951a2846c382b63b2 (diff)
Telegram: update for TDLIB
Diffstat (limited to 'protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp')
-rw-r--r--protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp138
1 files changed, 121 insertions, 17 deletions
diff --git a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp
index a8b8bf9e1b..f3984062fc 100644
--- a/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp
+++ b/protocols/Telegram/tdlib/td/tddb/td/db/binlog/binlog_dump.cpp
@@ -1,52 +1,156 @@
//
-// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2018
+// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#include "td/db/binlog/Binlog.h"
+#include "td/db/DbKey.h"
+
#include "td/utils/common.h"
#include "td/utils/format.h"
#include "td/utils/logging.h"
+#include "td/utils/misc.h"
+#include "td/utils/port/Stat.h"
+#include "td/utils/Slice.h"
+#include "td/utils/SliceBuilder.h"
+#include "td/utils/StringBuilder.h"
+#include "td/utils/tl_parsers.h"
-#include <cstdio>
#include <map>
+struct Trie {
+ Trie() {
+ nodes_.resize(1);
+ }
+
+ void add(td::Slice value) {
+ do_add(0, PSLICE() << value << '\0');
+ }
+
+ void dump() {
+ if (nodes_[0].sum == 0) { // division by zero
+ return;
+ }
+ LOG(PLAIN) << "TOTAL: " << nodes_[0].sum;
+ do_dump("", 0);
+ }
+
+ private:
+ struct FullNode {
+ int next[256] = {};
+ int sum = 0;
+ };
+ td::vector<FullNode> nodes_;
+
+ void do_add(int id, td::Slice value) {
+ nodes_[id].sum++;
+ if (value.empty()) {
+ return;
+ }
+
+ auto c = static_cast<td::uint8>(value[0]);
+ auto next_id = nodes_[id].next[c];
+ if (next_id == 0) {
+ next_id = static_cast<int>(nodes_.size());
+ nodes_.emplace_back();
+ nodes_[id].next[c] = next_id;
+ }
+ do_add(next_id, value.substr(1));
+ }
+
+ void do_dump(td::string path, int v) {
+ bool is_word_end = !path.empty() && path.back() == '\0';
+
+ bool need_stop = false;
+ int next_count = 0;
+ for (int c = 0; c < 256; c++) {
+ if (nodes_[v].next[c] != 0) {
+ need_stop |= c >= 128 || !(td::is_alpha(static_cast<char>(c)) || c == '.' || c == '_');
+ next_count++;
+ }
+ }
+ need_stop |= next_count == 0 || (next_count >= 2 && nodes_[v].sum <= nodes_[0].sum / 100);
+
+ if (is_word_end || need_stop) {
+ if (is_word_end) {
+ path.pop_back();
+ } else if (next_count != 1 || nodes_[v].next[0] == 0) {
+ path.push_back('*');
+ }
+ LOG(PLAIN) << nodes_[v].sum << " " << td::StringBuilder::FixedDouble(nodes_[v].sum * 100.0 / nodes_[0].sum, 2)
+ << "% [" << td::format::escaped(path) << "]";
+ return;
+ }
+ for (int c = 0; c < 256; c++) {
+ auto next_id = nodes_[v].next[c];
+ if (next_id == 0) {
+ continue;
+ }
+ do_dump(path + static_cast<char>(c), next_id);
+ }
+ }
+};
+
+enum Magic { ConfigPmcMagic = 0x1f18, BinlogPmcMagic = 0x4327 };
+
int main(int argc, char *argv[]) {
if (argc < 2) {
- std::fprintf(stderr, "Usage: binlog_dump <binlog_file_name>\n");
+ LOG(PLAIN) << "Usage: binlog_dump <binlog_file_name>";
+ return 1;
+ }
+ td::string binlog_file_name = argv[1];
+ auto r_stat = td::stat(binlog_file_name);
+ if (r_stat.is_error() || r_stat.ok().size_ == 0 || !r_stat.ok().is_reg_) {
+ LOG(PLAIN) << "Wrong binlog file name specified";
+ LOG(PLAIN) << "Usage: binlog_dump <binlog_file_name>";
return 1;
}
struct Info {
std::size_t full_size = 0;
std::size_t compressed_size = 0;
+ Trie trie;
+ Trie compressed_trie;
};
std::map<td::uint64, Info> info;
SET_VERBOSITY_LEVEL(VERBOSITY_NAME(ERROR));
td::Binlog binlog;
binlog
- .init(argv[1],
- [&](auto &event) {
- info[0].compressed_size += event.raw_event_.size();
- info[event.type_].compressed_size += event.raw_event_.size();
- },
- td::DbKey::empty(), td::DbKey::empty(), -1,
- [&](auto &event) mutable {
- info[0].full_size += event.raw_event_.size();
- info[event.type_].full_size += event.raw_event_.size();
- LOG(PLAIN) << "LogEvent[" << td::tag("id", td::format::as_hex(event.id_)) << td::tag("type", event.type_)
- << td::tag("flags", event.flags_) << td::tag("data", td::format::escaped(event.data_))
- << "]\n";
- })
+ .init(
+ binlog_file_name,
+ [&](auto &event) {
+ info[0].compressed_size += event.raw_event_.size();
+ info[event.type_].compressed_size += event.raw_event_.size();
+ if (event.type_ == ConfigPmcMagic || event.type_ == BinlogPmcMagic) {
+ auto key = td::TlParser(event.data_).fetch_string<td::Slice>();
+ info[event.type_].compressed_trie.add(key);
+ }
+ },
+ td::DbKey::raw_key("cucumber"), td::DbKey::empty(), -1,
+ [&](auto &event) mutable {
+ info[0].full_size += event.raw_event_.size();
+ info[event.type_].full_size += event.raw_event_.size();
+ if (event.type_ == ConfigPmcMagic || event.type_ == BinlogPmcMagic) {
+ auto key = td::TlParser(event.data_).fetch_string<td::Slice>();
+ info[event.type_].trie.add(key);
+ }
+ LOG(PLAIN) << "LogEvent[" << td::tag("id", td::format::as_hex(event.id_)) << td::tag("type", event.type_)
+ << td::tag("flags", event.flags_) << td::tag("size", event.data_.size())
+ << td::tag("data", td::format::escaped(event.data_)) << "]\n";
+ })
.ensure();
for (auto &it : info) {
- LOG(ERROR) << td::tag("handler", td::format::as_hex(it.first))
+ LOG(PLAIN) << td::tag("handler", td::format::as_hex(it.first))
<< td::tag("full_size", td::format::as_size(it.second.full_size))
<< td::tag("compressed_size", td::format::as_size(it.second.compressed_size));
+ it.second.trie.dump();
+ if (it.second.full_size != it.second.compressed_size) {
+ it.second.compressed_trie.dump();
+ }
}
return 0;