From 682f7fafcc29c39f52ff1f0713a7f34136758754 Mon Sep 17 00:00:00 2001 From: silverweed Date: Tue, 10 Sep 2024 16:59:22 +0200 Subject: [PATCH] properly read field and column desc --- src/hover.cpp | 600 ++++++++++++++++++++++++++++++++++++++++++++++ src/rntuple.cpp | 563 ------------------------------------------- src/rntviewer.cpp | 1 + 3 files changed, 601 insertions(+), 563 deletions(-) create mode 100644 src/hover.cpp diff --git a/src/hover.cpp b/src/hover.cpp new file mode 100644 index 0000000..d673837 --- /dev/null +++ b/src/hover.cpp @@ -0,0 +1,600 @@ +using ROOT::Experimental::Internal::RNTupleSerializer; + +struct Sec_Hover_Info { + Byte_Range rng; + // A string tree where children are more indented than parents + String8_Node *desc; +}; + +template T bswap_if_needed(T x) { return x; } +template <> u16 bswap_if_needed(u16 x) { return bswap(x); } +template <> u32 bswap_if_needed(u32 x) { return bswap(x); } +template <> u64 bswap_if_needed(u64 x) { return bswap(x); } + +template +String8_Node *hover_display_val_be(Arena *arena, String8_Node *prev, const char *fmt, T val) +{ + val = bswap_if_needed(val); + return push_str8_node_child(arena, prev, fmt, val); +} + +template <> +String8_Node *hover_display_val_be(Arena *arena, String8_Node *prev, const char *fmt, String8 val) +{ + return push_str8_node_child(arena, prev, fmt, val.c()); +} + +template +String8_Node *hover_display_val_le(Arena *arena, String8_Node *prev, const char *fmt, T val) +{ + return push_str8_node_child(arena, prev, fmt, val); +} + +template +String8_Node *hover_display_val_le_abs(Arena *arena, String8_Node *prev, const char *fmt, T val) +{ + return push_str8_node_child(arena, prev, fmt, std::abs(val)); +} + +internal +String8_Node *hover_display_datetime_str(Arena *arena, String8_Node *prev, const char *fmt_pre, u32 datetime) +{ + datetime = bswap(datetime); + + // datetime: + // year (6b) | month (4b) | day (5b) | hour (5b) | min (6b) | sec (6b) + u32 year = (datetime >> 26) + 1995; + u32 month = ((datetime & 0x3ff'ffff) >> 22) - 1; + u32 day = (datetime & 0x3f'ffff) >> 17; + u32 hour = (datetime & 0x1'ffff) >> 12; + u32 min = (datetime & 0xfff) >> 6; + u32 sec = datetime & 0x3f; + return push_str8_node_child(arena, prev, "%s%u/%02u/%02u %02u:%02u:%02u", fmt_pre, year, month, day, hour, min, sec); +} + +internal +String8_Node *display_val_rootzip(Arena *arena, String8_Node *prev, const char *fmt, const u8 *src) +{ + const u8 Z_DEFLATED = 8; + + String8 zip_method; + if (src[0] == 'Z' && src[1] == 'L' && src[2] == Z_DEFLATED) { + zip_method = str8("ZLIB"); + } else if (src[0] == 'C' && src[1] == 'S' && src[2] == Z_DEFLATED) { + zip_method = str8("Old"); + } else if (src[0] == 'X' && src[1] == 'Z' && src[2] == 0) { + zip_method = str8("LZMA"); + } else if (src[0] == 'L' && src[1] == '4') { + zip_method = str8("LZ4"); + } else if (src[0] == 'Z' && src[1] == 'S' && src[2] == 1) { + zip_method = str8("ZSTD"); + } else { + return nullptr; + } + u32 comp_size = src[3] | (src[4] << 8) | (src[5] << 16); + u32 uncomp_size = src[6] | (src[7] << 8) | (src[8] << 16); + + String8_Node *sn = push_str8_node_child(arena, prev, "%s", fmt); + sn = push_str8_node_child(arena, sn, "Zip method: %s", zip_method.c()); + sn = push_str8_node(arena, sn, "Compressed size: %s", to_pretty_size(arena, comp_size).c()); + sn = push_str8_node(arena, sn, "Uncompressed size: %s", to_pretty_size(arena, uncomp_size).c()); + sn = push_str8_node(arena, sn, "Comp. ratio: %.2f", (f32)comp_size / uncomp_size); + + return sn; +} + +template +using Display_Fn = String8_Node *(*)(Arena *, String8_Node *, const char *, T); + +// Functor used by get_section_hover_info to describe the structure of a section and print data about it. +struct Sec_Hover_Fn { + u64 start; + u64 roff; + const u8 *data; + Arena *arena; + Sec_Hover_Info &info; + u64 &cur_field_off; + + template + b8 titled_section(const char *title, F &&fn) const + { + String8_Node *prev_desc = info.desc; + info.desc = push_str8_node_child(arena, prev_desc, title); + + b8 ok = fn(); + + if (!ok) { + pop_str8_node_child(prev_desc, info.desc); + info.desc = prev_desc; + } + return ok; + } + + template + b8 field(const char *desc_fmt, Display_Fn display_val) const + { + static_assert(!std::is_same_v, "use field_str8 instead."); + u64 field_len = sizeof(T); + if (roff < cur_field_off + field_len) { + info.rng = { start + cur_field_off, field_len }; + T val; + memcpy(&val, data + info.rng.start, info.rng.len); + display_val(arena, info.desc, desc_fmt, val); + return true; + } + cur_field_off += field_len; + return false; + } + + template + b8 field_be(const char *desc_fmt) const + { + return field(desc_fmt, hover_display_val_be); + } + + template + b8 field_le(const char *desc_fmt) const + { + return field(desc_fmt, hover_display_val_le); + } + + template + b8 field_str8(const char *desc_fmt, Display_Fn display_val = hover_display_val_be) const + { + // String size can be stored as different types, like u8 (by ROOT I/O) or u32 (by RNTuple). + TStrSize str_size; + memcpy(&str_size, data + start + cur_field_off, sizeof(TStrSize)); + // TEMP DEBUG + if (str_size > 1000) + return false; + if (roff < cur_field_off + sizeof(TStrSize) + str_size) { + info.rng = { start + cur_field_off, sizeof(TStrSize) + (u64)str_size }; + u8 *buf = arena_push_array_nozero(arena, str_size + 1); + memcpy(buf, data + start + cur_field_off + sizeof(TStrSize), str_size); + buf[str_size] = 0; + String8 s = { buf, str_size }; + display_val(arena, info.desc, desc_fmt, s); + return true; + } + cur_field_off += sizeof(TStrSize) + str_size; + return false; + } + + b8 range(const char *desc, u64 range_len) const + { + if (roff < cur_field_off + range_len) { + info.rng = { start + cur_field_off, range_len }; + push_str8_node_child(arena, info.desc, "%s", desc); + return true; + } + cur_field_off += range_len; + return false; + } + + b8 range_data(const char *desc, u64 range_len, Display_Fn display_val) const + { + if (roff < cur_field_off + range_len) { + info.rng = { start + cur_field_off, range_len }; + display_val(arena, info.desc, desc, data + start + cur_field_off); + return true; + } + cur_field_off += range_len; + return false; + } + + b8 maybe_rootzip(b8 *was_zipped = nullptr) const + { + // TODO boundary checks + const u64 range_len = 9; + if (display_val_rootzip(arena, info.desc, "Zipped Block", data + start + cur_field_off)) { + if (was_zipped) *was_zipped = true; + if (roff < cur_field_off + range_len) { + info.rng = { start + cur_field_off, range_len }; + hover_display_val_be(arena, info.desc, "", data + start + cur_field_off); + return true; + } + // discard the description (it's fine since it's allocated in the scratch arena) + if (info.desc->first_child == info.desc->last_child) { + info.desc->first_child = info.desc->last_child = nullptr; + } else { + info.desc->last_child = info.desc->last_child->prev; + } + cur_field_off += range_len; + } else if (was_zipped) { + *was_zipped = false; + } + return false; + } + + b8 tkey() const + { + return titled_section("TKey", [this] { + u16 version_be; + memcpy(&version_be, data + start + 4, sizeof(u16)); + u32 version = bswap(version_be); + b8 is_big = version > 1000; + + if (is_big) { + return field_be("NBytes: %u") + || field("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) { + x = bswap(x); + x -= 1000; + return push_str8_node_child(arena, prev, fmt, x); + }) + || field_be("Obj Len: %u") + || field("Datetime: ", hover_display_datetime_str) + || field_be("Key Len: %u") + || field_be("Cycle: %u") + || field_be("Seek Key: 0x%" PRIX64) + || field_be("Seek Pdir: 0x%" PRIX64) + || field_str8("Class Name: %s") + || field_str8("Obj Name: %s") + || field_str8("Obj Title: %s") + ; + } else { + return field_be("NBytes: %u") + || field("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) { + x = bswap(x); + return push_str8_node_child(arena, prev, fmt, x); + }) + || field_be("Obj Len: %u") + || field("Datetime: ", hover_display_datetime_str) + || field_be("Key Len: %u") + || field_be("Cycle: %u") + || field_be("Seek Key: 0x%" PRIX64) + || field_be("Seek Pdir: 0x%" PRIX64) + || field_str8("Class Name: %s") + || field_str8("Obj Name: %s") + || field_str8("Obj Title: %s") + ; + } + }); + } + + b8 envelope_preamble() const + { + static const char *const envelope_names[] = { "INVALID", "Header", "Footer", "Page List" }; + return field("Envelope type: %s", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 val) { + const char *name = (val >= countof(envelope_names)) ? "Unknown" : envelope_names[val]; + return push_str8_node_child(arena, prev, fmt, name); + }) + || range_data("Envelope size: %s", 6, [] (Arena *arena, String8_Node *prev, const char *fmt, const u8 *payload) { + u64 size; + memcpy(&size, payload, 6); + return push_str8_node_child(arena, prev, fmt, to_pretty_size(arena, size)); + }); + } + + b8 list_frame_preamble() const + { + return titled_section("Frame Preamble", [this] { + return field("Size: %" PRIi64, hover_display_val_le_abs) + || field_le("N Items: %u") + ; + }); + } + + b8 field_desc() const + { + static const char *const field_struct_names[] = { + "Leaf", "Collection", "Record", "Variant", "Unsplit" + }; + return titled_section("Field", [this] { + u64 flags_off = start + cur_field_off + 22; + u16 flags; + memcpy(&flags, data + flags_off, sizeof(flags)); + b8 ok = field_le("Size: %" PRIi64) + || field_le("Field version: %u") + || field_le("Type version: %u") + || field_le("On-disk parent id: %u") + || field("Field structure: %s", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 type) { + const char *name = (type >= countof(field_struct_names)) ? "Unknown" : field_struct_names[type]; + return push_str8_node_child(arena, prev, fmt, name); + }) + || field_le("Flags: 0b%b") + ; + if (ok) + return ok; + + if ((flags & RNTupleSerializer::kFlagRepetitiveField) && field_le("N Repetitions: %" PRIu64)) + return true; + if ((flags & RNTupleSerializer::kFlagProjectedField) && field_le("On disk proj.src id: %u")) + return true; + if ((flags & RNTupleSerializer::kFlagHasTypeChecksum) && field_le("Checksum: %u")) + return true; + + return field_str8("Name: %s") + || field_str8("Type Name: %s") + || field_str8("Type Alias: %s") + || field_str8("Description: %s") + ; + }); + } + + b8 column_desc() const + { + return titled_section("Column", [this] { + u64 flags_off = start + cur_field_off + 16; + u16 flags; + memcpy(&flags, data + flags_off, sizeof(flags)); + b8 ok = field_le("Size: %" PRIi64) + || field_le("Column Type: 0x%X") // TODO: prettify name + || field_le("Bits on storage: %u") + || field_le("Field ID: %u") + || field_le("Flags: 0b%b") + || field_le("Representation idx: %u") + ; + if (ok) + return ok; + + if ((flags & RNTupleSerializer::kFlagDeferredColumn) && field_le("First element: %" PRIu64)) + return true; + if ((flags & RNTupleSerializer::kFlagHasValueRange) && (field_le("Value Min: %f") || field_le("Value Max: %f"))) + return true; + + return false; + }); + } + + b8 schema_description() const + { + return titled_section("Schema Description", [this] { + // Fields + if (list_frame_preamble()) + return true; + // we need to read back the number of fields to know how long is the next section. + u64 n_fields_off = cur_field_off - sizeof(u32); + u32 n_fields; + memcpy(&n_fields, data + start + n_fields_off, sizeof(n_fields)); + for (u32 i = 0; i < n_fields; ++i) + if (field_desc()) + return true; + + // Columns + if (list_frame_preamble()) + return true; + u64 n_columns_off = cur_field_off - sizeof(u32); + u32 n_columns; + memcpy(&n_columns, data + start + n_columns_off, sizeof(n_columns)); + for (u32 i = 0; i < n_columns; ++i) + if (column_desc()) + return true; + if (field("Size: %" PRIi64, hover_display_val_le_abs)) + return true; + + return false; // TODO + }); + } +}; + +// `off` is the absolute offset into `data`. +internal +Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, const u8 *data) +{ + Sec_Hover_Info info {}; + + // printf("off: 0x%" PRIX64 ", sec start - pre_size: (0x%" PRIX64 " - %" PRIu64 ") = 0x%" PRIX64 "\n", off, section.range.start, section.pre_size, section.range.start - section.pre_size); + assert(off >= section.range.start - section.pre_size); + + // Hover info header + String8 sec_name = section_names[section.id]; + if (section.id == Sec_Page && section.info) { + Page_Info_Node *pinfo = (Page_Info_Node *)section.info; + info.desc = push_str8_node(arena, nullptr, "%s [%s]", + sec_name.c(), pinfo->elem_type_name.c()); + push_str8_node_child(arena, info.desc, "Field: %s", pinfo->owner_field_name.c()); + push_str8_node_child(arena, info.desc, "N. Elems: %d", abs(pinfo->n_elems)); + push_str8_node_child(arena, info.desc, "Bits per elem: %u", pinfo->bits_per_elem); + push_str8_node_child(arena, info.desc, "-----------"); + } else { + info.desc = push_str8_node(arena, nullptr, "%s", sec_name.c()); + } + + u64 start = section.range.start - section.pre_size; + u64 roff = off - start; // offset relative to `section` + u64 cur_field_off = 0; + Sec_Hover_Fn hover { start, roff, data, arena, info, cur_field_off }; + + switch (section.id) { + case Sec_RNTuple_Anchor: { + hover.tkey() + || hover.field("Object len: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u32 x) { + x = bswap(x); + x -= 0x4000'0000; + return push_str8_node_child(arena, prev, fmt, x); + }) + || hover.field_be("Class version: %u") + || hover.field_be("Version Epoch: %u") + || hover.field_be("Version Major: %u") + || hover.field_be("Version Minor: %u") + || hover.field_be("Version Patch: %u") + || hover.field_be("Seek Header: 0x%" PRIX64) + || hover.field_be("NBytes Header: %u") + || hover.field_be("Len Header: %u") + || hover.field_be("Seek Footer: 0x%" PRIX64) + || hover.field_be("NBytes Footer: %u") + || hover.field_be("Len Footer: %u") + || hover.field_be("Max Key Size: %u") + || hover.field_le("Checksum: 0x%" PRIX64) + ; + } break; + + case Sec_TFile_Header: { + u32 root_version_be; + memcpy(&root_version_be, data + start + 4, sizeof(u32)); + u32 root_version = bswap(root_version_be); + b8 is_big = root_version > 1000000; + + if (is_big) { + hover.field_be("ROOT magic number") + || hover.field("ROOT version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u32 x) { + x = bswap(x); + x -= 1000000; + return push_str8_node_child(arena, prev, fmt, x); + }) + || hover.field_be("fBEGIN: 0x%" PRIX64) + || hover.field_be("fEND: 0x%" PRIX64) + || hover.field_be("Seek Free: 0x%" PRIX64) + || hover.field_be("NBytes Free: %u") + || hover.field_be("N Free: %u") + || hover.field_be("NBytes Name: %u") + || hover.field_be("Units: %u") + || hover.field_be("Compression: %u") + || hover.field_be("Seek Info: 0x%" PRIX64) + || hover.field_be("NBytes Info: %u") + || hover.range("Padding", section.post_size) + ; + } else { + hover.field_be("ROOT magic number") + || hover.field_be("ROOT version: %u") + || hover.field_be("fBEGIN: 0x%" PRIX64) + || hover.field_be("fEND: 0x%" PRIX64) + || hover.field_be("Seek Free: 0x%" PRIX64) + || hover.field_be("NBytes Free: %u") + || hover.field_be("N Free: %u") + || hover.field_be("NBytes Name: %u") + || hover.field_be("Units: %u") + || hover.field_be("Compression: %u") + || hover.field_be("Seek Info: 0x%" PRIX64) + || hover.field_be("NBytes Info: %u") + || hover.range("Padding", section.post_size) + ; + } + } break; + + case Sec_TFile_Object: { + if (!hover.tkey()) { + b8 ok = hover.field_str8("File Name: %s") + || hover.field_str8("File Title: %s") + ; + if (!ok) { + u16 version_be; + memcpy(&version_be, data + cur_field_off, sizeof(u16)); + u16 version = bswap(version_be); + b8 is_big = version > 1000; + + if (is_big) { + ok = ok || hover.field("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) { + x = bswap(x); + x -= 1000; + return push_str8_node_child(arena, prev, fmt, x); + }) + || hover.field("Created: ", hover_display_datetime_str) + || hover.field("Modified: ", hover_display_datetime_str) + || hover.field_be("NBytes Key: %u") + || hover.field_be("NBytes Name: %u") + || hover.field_be("Seek Dir: 0x%" PRIX64) + || hover.field_be("Seek Parent: 0x%" PRIX64) + || hover.field_be("Seek Keys: 0x%" PRIX64) + || hover.field_be("UUID Vers.Class: %u") + || hover.field_le("UUID: %u") + ; + } else { + ok = ok || hover.field_be("Version: %u") + || hover.field("Created: ", hover_display_datetime_str) + || hover.field("Modified: ", hover_display_datetime_str) + || hover.field_be("NBytes Key: %u") + || hover.field_be("NBytes Name: %u") + || hover.field_be("Seek Dir: 0x%" PRIX64) + || hover.field_be("Seek Parent: 0x%" PRIX64) + || hover.field_be("Seek Keys: 0x%" PRIX64) + || hover.field_be("UUID Vers.Class: %u") + || hover.field_le("UUID: %u") + || hover.range("Padding", 3 * sizeof(u32)) + ; + } + } + } + } break; + + case Sec_RNTuple_Header: + if (!hover.tkey()) { + b8 zipped; + if (!hover.maybe_rootzip(&zipped)) { + if (zipped) { + hover.range("Payload", section.range.len - section.post_size) + || hover.field_le("Checksum: 0x%" PRIX64) + ; + } else { + hover.envelope_preamble() + // NOTE: flags in principle require a more complex handling, but for now they are unused, + // so they're always occupying only 8 bytes. + || hover.field_le("Flags: 0x%" PRIX64) + || hover.field_str8("Name: %s") + || hover.field_str8("Description: %s") + || hover.field_str8("ROOT version: %s") + || hover.schema_description() + || hover.range("Payload", section.range.len - hover.cur_field_off) // TODO + || hover.field_le("Checksum: 0x%" PRIX64) + ; + } + } + } + break; + case Sec_RNTuple_Footer: + case Sec_Page_List: { + hover.tkey() + || hover.maybe_rootzip() + || hover.range("Payload", section.range.len - section.post_size) // TODO: improve + || hover.field_le("Checksum: 0x%" PRIX64) + ; + } break; + + case Sec_Page: { + // only try hovering a key if this is the first page of the cluster (<=> pre_size != 0) + b8 ok = section.pre_size && hover.tkey(); + ok = ok || hover.maybe_rootzip() + || hover.range("Payload", section.range.len - section.post_size) // TODO: improve + || hover.field_le("Checksum: 0x%" PRIX64) + ; + } break; + + case Sec_TFile_Info: { + hover.tkey() + || hover.maybe_rootzip() + // || hover.field("Byte Count: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u32 x) { + // x = bswap(x); + // x -= 0x400000000; + // return push_str8_node_child(arena, prev, fmt, x); + // }) + // || hover.field_be("Version: %u") + // || hover_try_object(hover) + // || hover.field_be("Name: %u") + // || hover.field_be("N Objects: %u") + || hover.range("Payload", section.range.len) // TODO: improve + ; + } break; + + case Sec_TFile_FreeList: { + if (!hover.tkey()) { + u16 version_be; + memcpy(&version_be, data + start + hover.cur_field_off, sizeof(u16)); + u32 version = bswap(version_be); + b8 is_big = version > 1000; + + if (is_big) { + hover.field("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) { + x = bswap(x); + x -= 1000; + return push_str8_node_child(arena, prev, fmt, x); + }) + || hover.field_be("First: 0x%" PRIX64) + || hover.field_be("Last: 0x%" PRIX64) + ; + } else { + hover.field("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) { + x = bswap(x); + return push_str8_node_child(arena, prev, fmt, x); + }) + || hover.field_be("First: 0x%X") + || hover.field_be("Last: 0x%X") + ; + } + } + } break; + + default:; + } + + return info; +} diff --git a/src/rntuple.cpp b/src/rntuple.cpp index c60d4cc..1b61b5b 100644 --- a/src/rntuple.cpp +++ b/src/rntuple.cpp @@ -623,566 +623,3 @@ Section find_section(App_State &app, u64 off, i64 hilite_cluster = -1) return {}; } -struct Sec_Hover_Info { - Byte_Range rng; - // A string tree where children are more indented than parents - String8_Node *desc; -}; - -template T bswap_if_needed(T x) { return x; } -template <> u16 bswap_if_needed(u16 x) { return bswap(x); } -template <> u32 bswap_if_needed(u32 x) { return bswap(x); } -template <> u64 bswap_if_needed(u64 x) { return bswap(x); } - -template -String8_Node *hover_display_val_be(Arena *arena, String8_Node *prev, const char *fmt, T val) -{ - val = bswap_if_needed(val); - return push_str8_node_child(arena, prev, fmt, val); -} - -template <> -String8_Node *hover_display_val_be(Arena *arena, String8_Node *prev, const char *fmt, String8 val) -{ - return push_str8_node_child(arena, prev, fmt, val.c()); -} - -template -String8_Node *hover_display_val_le(Arena *arena, String8_Node *prev, const char *fmt, T val) -{ - return push_str8_node_child(arena, prev, fmt, val); -} - -template -String8_Node *hover_display_val_le_abs(Arena *arena, String8_Node *prev, const char *fmt, T val) -{ - return push_str8_node_child(arena, prev, fmt, std::abs(val)); -} - -internal -String8_Node *hover_display_datetime_str(Arena *arena, String8_Node *prev, const char *fmt_pre, u32 datetime) -{ - datetime = bswap(datetime); - - // datetime: - // year (6b) | month (4b) | day (5b) | hour (5b) | min (6b) | sec (6b) - u32 year = (datetime >> 26) + 1995; - u32 month = ((datetime & 0x3ff'ffff) >> 22) - 1; - u32 day = (datetime & 0x3f'ffff) >> 17; - u32 hour = (datetime & 0x1'ffff) >> 12; - u32 min = (datetime & 0xfff) >> 6; - u32 sec = datetime & 0x3f; - return push_str8_node_child(arena, prev, "%s%u/%02u/%02u %02u:%02u:%02u", fmt_pre, year, month, day, hour, min, sec); -} - -internal -String8_Node *display_val_rootzip(Arena *arena, String8_Node *prev, const char *fmt, const u8 *src) -{ - const u8 Z_DEFLATED = 8; - - String8 zip_method; - if (src[0] == 'Z' && src[1] == 'L' && src[2] == Z_DEFLATED) { - zip_method = str8("ZLIB"); - } else if (src[0] == 'C' && src[1] == 'S' && src[2] == Z_DEFLATED) { - zip_method = str8("Old"); - } else if (src[0] == 'X' && src[1] == 'Z' && src[2] == 0) { - zip_method = str8("LZMA"); - } else if (src[0] == 'L' && src[1] == '4') { - zip_method = str8("LZ4"); - } else if (src[0] == 'Z' && src[1] == 'S' && src[2] == 1) { - zip_method = str8("ZSTD"); - } else { - return nullptr; - } - u32 comp_size = src[3] | (src[4] << 8) | (src[5] << 16); - u32 uncomp_size = src[6] | (src[7] << 8) | (src[8] << 16); - - String8_Node *sn = push_str8_node_child(arena, prev, "%s", fmt); - sn = push_str8_node_child(arena, sn, "Zip method: %s", zip_method.c()); - sn = push_str8_node(arena, sn, "Compressed size: %s", to_pretty_size(arena, comp_size).c()); - sn = push_str8_node(arena, sn, "Uncompressed size: %s", to_pretty_size(arena, uncomp_size).c()); - sn = push_str8_node(arena, sn, "Comp. ratio: %.2f", (f32)comp_size / uncomp_size); - - return sn; -} - -template -using Display_Fn = String8_Node *(*)(Arena *, String8_Node *, const char *, T); - -// Functor used by get_section_hover_info to describe the structure of a section and print data about it. -struct Sec_Hover_Fn { - u64 start; - u64 roff; - const u8 *data; - Arena *arena; - Sec_Hover_Info &info; - u64 &cur_field_off; - - template - b8 titled_section(const char *title, F &&fn) const - { - String8_Node *prev_desc = info.desc; - info.desc = push_str8_node_child(arena, prev_desc, title); - - b8 ok = fn(); - - if (!ok) { - pop_str8_node_child(prev_desc, info.desc); - info.desc = prev_desc; - } - return ok; - } - - template - b8 field(const char *desc_fmt, Display_Fn display_val) const - { - static_assert(!std::is_same_v, "use field_str8 instead."); - u64 field_len = sizeof(T); - if (roff < cur_field_off + field_len) { - info.rng = { start + cur_field_off, field_len }; - T val; - memcpy(&val, data + info.rng.start, info.rng.len); - display_val(arena, info.desc, desc_fmt, val); - return true; - } - cur_field_off += field_len; - return false; - } - - template - b8 field_be(const char *desc_fmt) const - { - return field(desc_fmt, hover_display_val_be); - } - - template - b8 field_le(const char *desc_fmt) const - { - return field(desc_fmt, hover_display_val_le); - } - - template - b8 field_str8(const char *desc_fmt, Display_Fn display_val = hover_display_val_be) const - { - // String size can be stored as different types, like u8 (by ROOT I/O) or u32 (by RNTuple). - TStrSize str_size; - memcpy(&str_size, data + start + cur_field_off, sizeof(TStrSize)); - // TEMP DEBUG - if (str_size > 1000) - return false; - if (roff < cur_field_off + sizeof(TStrSize) + str_size) { - info.rng = { start + cur_field_off, sizeof(TStrSize) + (u64)str_size }; - u8 *buf = arena_push_array_nozero(arena, str_size + 1); - memcpy(buf, data + start + cur_field_off + sizeof(TStrSize), str_size); - buf[str_size] = 0; - String8 s = { buf, str_size }; - display_val(arena, info.desc, desc_fmt, s); - return true; - } - cur_field_off += sizeof(TStrSize) + str_size; - return false; - } - - b8 range(const char *desc, u64 range_len) const - { - if (roff < cur_field_off + range_len) { - info.rng = { start + cur_field_off, range_len }; - push_str8_node_child(arena, info.desc, "%s", desc); - return true; - } - cur_field_off += range_len; - return false; - } - - b8 range_data(const char *desc, u64 range_len, Display_Fn display_val) const - { - if (roff < cur_field_off + range_len) { - info.rng = { start + cur_field_off, range_len }; - display_val(arena, info.desc, desc, data + start + cur_field_off); - return true; - } - cur_field_off += range_len; - return false; - } - - b8 maybe_rootzip(b8 *was_zipped = nullptr) const - { - // TODO boundary checks - const u64 range_len = 9; - if (display_val_rootzip(arena, info.desc, "Zipped Block", data + start + cur_field_off)) { - if (was_zipped) *was_zipped = true; - if (roff < cur_field_off + range_len) { - info.rng = { start + cur_field_off, range_len }; - hover_display_val_be(arena, info.desc, "", data + start + cur_field_off); - return true; - } - // discard the description (it's fine since it's allocated in the scratch arena) - if (info.desc->first_child == info.desc->last_child) { - info.desc->first_child = info.desc->last_child = nullptr; - } else { - info.desc->last_child = info.desc->last_child->prev; - } - cur_field_off += range_len; - } else if (was_zipped) { - *was_zipped = false; - } - return false; - } - - b8 tkey() const - { - return titled_section("TKey", [this] { - u16 version_be; - memcpy(&version_be, data + start + 4, sizeof(u16)); - u32 version = bswap(version_be); - b8 is_big = version > 1000; - - if (is_big) { - return field_be("NBytes: %u") - || field("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) { - x = bswap(x); - x -= 1000; - return push_str8_node_child(arena, prev, fmt, x); - }) - || field_be("Obj Len: %u") - || field("Datetime: ", hover_display_datetime_str) - || field_be("Key Len: %u") - || field_be("Cycle: %u") - || field_be("Seek Key: 0x%" PRIX64) - || field_be("Seek Pdir: 0x%" PRIX64) - || field_str8("Class Name: %s") - || field_str8("Obj Name: %s") - || field_str8("Obj Title: %s") - ; - } else { - return field_be("NBytes: %u") - || field("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) { - x = bswap(x); - return push_str8_node_child(arena, prev, fmt, x); - }) - || field_be("Obj Len: %u") - || field("Datetime: ", hover_display_datetime_str) - || field_be("Key Len: %u") - || field_be("Cycle: %u") - || field_be("Seek Key: 0x%" PRIX64) - || field_be("Seek Pdir: 0x%" PRIX64) - || field_str8("Class Name: %s") - || field_str8("Obj Name: %s") - || field_str8("Obj Title: %s") - ; - } - }); - } - - b8 envelope_preamble() const - { - static const char *const envelope_names[] = { "INVALID", "Header", "Footer", "Page List" }; - return field("Envelope type: %s", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 val) { - const char *name = (val >= countof(envelope_names)) ? "Unknown" : envelope_names[val]; - return push_str8_node_child(arena, prev, fmt, name); - }) - || range_data("Envelope size: %s", 6, [] (Arena *arena, String8_Node *prev, const char *fmt, const u8 *payload) { - u64 size; - memcpy(&size, payload, 6); - return push_str8_node_child(arena, prev, fmt, to_pretty_size(arena, size)); - }); - } - - b8 list_frame_preamble() const - { - return titled_section("Frame Preamble", [this] { - return field("Size: %" PRIi64, hover_display_val_le_abs) - || field_le("N Items: %u") - ; - }); - } - - b8 field_desc() const - { - static const char *const field_struct_names[] = { - "Leaf", "Collection", "Record", "Variant", "Unsplit" - }; - return titled_section("Field", [this] { - u64 flags_off = start + cur_field_off + 22; - u16 flags; - memcpy(&flags, data + flags_off, sizeof(flags)); - b8 ok = field_le("Size: %" PRIi64) - || field_le("Field version: %u") - || field_le("Type version: %u") - || field_le("On-disk parent id: %u") - || field("Field structure: %s", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 type) { - const char *name = (type >= countof(field_struct_names)) ? "Unknown" : field_struct_names[type]; - return push_str8_node_child(arena, prev, fmt, name); - }) - || field_le("Flags: %u") - ; - if (ok) - return ok; - - using ROOT::Experimental::Internal::RNTupleSerializer; - - if ((flags & RNTupleSerializer::kFlagRepetitiveField) && field_le("N Repetitions: %" PRIu64)) - return true; - if ((flags & RNTupleSerializer::kFlagProjectedField) && field_le("On disk proj.src id: %u")) - return true; - if ((flags & RNTupleSerializer::kFlagHasTypeChecksum) && field_le("Checksum: %u")) - return true; - - return field_str8("Name: %s") - || field_str8("Type Name: %s") - || field_str8("Type Alias: %s") - || field_str8("Description: %s") - ; - }); - } - - b8 schema_description() const - { - return titled_section("Schema Description", [this] { - if (list_frame_preamble()) - return true; - // we need to read back the number of fields to know how long is the next section. - u64 n_fields_off = cur_field_off - sizeof(u32); - u32 n_fields; - memcpy(&n_fields, data + start + n_fields_off, sizeof(n_fields)); - for (u32 i = 0; i < n_fields; ++i) - if (field_desc()) - return true; - if (field("Size: %" PRIi64, hover_display_val_le_abs)) - return true; - return false; // TODO - }); - } -}; - -// `off` is the absolute offset into `data`. -internal -Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, const u8 *data) -{ - Sec_Hover_Info info {}; - - // printf("off: 0x%" PRIX64 ", sec start - pre_size: (0x%" PRIX64 " - %" PRIu64 ") = 0x%" PRIX64 "\n", off, section.range.start, section.pre_size, section.range.start - section.pre_size); - assert(off >= section.range.start - section.pre_size); - - // Hover info header - String8 sec_name = section_names[section.id]; - if (section.id == Sec_Page && section.info) { - Page_Info_Node *pinfo = (Page_Info_Node *)section.info; - info.desc = push_str8_node(arena, nullptr, "%s [%s]", - sec_name.c(), pinfo->elem_type_name.c()); - push_str8_node_child(arena, info.desc, "Field: %s", pinfo->owner_field_name.c()); - push_str8_node_child(arena, info.desc, "N. Elems: %d", abs(pinfo->n_elems)); - push_str8_node_child(arena, info.desc, "Bits per elem: %u", pinfo->bits_per_elem); - push_str8_node_child(arena, info.desc, "-----------"); - } else { - info.desc = push_str8_node(arena, nullptr, "%s", sec_name.c()); - } - - u64 start = section.range.start - section.pre_size; - u64 roff = off - start; // offset relative to `section` - u64 cur_field_off = 0; - Sec_Hover_Fn hover { start, roff, data, arena, info, cur_field_off }; - - switch (section.id) { - case Sec_RNTuple_Anchor: { - hover.tkey() - || hover.field("Object len: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u32 x) { - x = bswap(x); - x -= 0x4000'0000; - return push_str8_node_child(arena, prev, fmt, x); - }) - || hover.field_be("Class version: %u") - || hover.field_be("Version Epoch: %u") - || hover.field_be("Version Major: %u") - || hover.field_be("Version Minor: %u") - || hover.field_be("Version Patch: %u") - || hover.field_be("Seek Header: 0x%" PRIX64) - || hover.field_be("NBytes Header: %u") - || hover.field_be("Len Header: %u") - || hover.field_be("Seek Footer: 0x%" PRIX64) - || hover.field_be("NBytes Footer: %u") - || hover.field_be("Len Footer: %u") - || hover.field_be("Max Key Size: %u") - || hover.field_le("Checksum: 0x%" PRIX64) - ; - } break; - - case Sec_TFile_Header: { - u32 root_version_be; - memcpy(&root_version_be, data + start + 4, sizeof(u32)); - u32 root_version = bswap(root_version_be); - b8 is_big = root_version > 1000000; - - if (is_big) { - hover.field_be("ROOT magic number") - || hover.field("ROOT version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u32 x) { - x = bswap(x); - x -= 1000000; - return push_str8_node_child(arena, prev, fmt, x); - }) - || hover.field_be("fBEGIN: 0x%" PRIX64) - || hover.field_be("fEND: 0x%" PRIX64) - || hover.field_be("Seek Free: 0x%" PRIX64) - || hover.field_be("NBytes Free: %u") - || hover.field_be("N Free: %u") - || hover.field_be("NBytes Name: %u") - || hover.field_be("Units: %u") - || hover.field_be("Compression: %u") - || hover.field_be("Seek Info: 0x%" PRIX64) - || hover.field_be("NBytes Info: %u") - || hover.range("Padding", section.post_size) - ; - } else { - hover.field_be("ROOT magic number") - || hover.field_be("ROOT version: %u") - || hover.field_be("fBEGIN: 0x%" PRIX64) - || hover.field_be("fEND: 0x%" PRIX64) - || hover.field_be("Seek Free: 0x%" PRIX64) - || hover.field_be("NBytes Free: %u") - || hover.field_be("N Free: %u") - || hover.field_be("NBytes Name: %u") - || hover.field_be("Units: %u") - || hover.field_be("Compression: %u") - || hover.field_be("Seek Info: 0x%" PRIX64) - || hover.field_be("NBytes Info: %u") - || hover.range("Padding", section.post_size) - ; - } - } break; - - case Sec_TFile_Object: { - if (!hover.tkey()) { - b8 ok = hover.field_str8("File Name: %s") - || hover.field_str8("File Title: %s") - ; - if (!ok) { - u16 version_be; - memcpy(&version_be, data + cur_field_off, sizeof(u16)); - u16 version = bswap(version_be); - b8 is_big = version > 1000; - - if (is_big) { - ok = ok || hover.field("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) { - x = bswap(x); - x -= 1000; - return push_str8_node_child(arena, prev, fmt, x); - }) - || hover.field("Created: ", hover_display_datetime_str) - || hover.field("Modified: ", hover_display_datetime_str) - || hover.field_be("NBytes Key: %u") - || hover.field_be("NBytes Name: %u") - || hover.field_be("Seek Dir: 0x%" PRIX64) - || hover.field_be("Seek Parent: 0x%" PRIX64) - || hover.field_be("Seek Keys: 0x%" PRIX64) - || hover.field_be("UUID Vers.Class: %u") - || hover.field_le("UUID: %u") - ; - } else { - ok = ok || hover.field_be("Version: %u") - || hover.field("Created: ", hover_display_datetime_str) - || hover.field("Modified: ", hover_display_datetime_str) - || hover.field_be("NBytes Key: %u") - || hover.field_be("NBytes Name: %u") - || hover.field_be("Seek Dir: 0x%" PRIX64) - || hover.field_be("Seek Parent: 0x%" PRIX64) - || hover.field_be("Seek Keys: 0x%" PRIX64) - || hover.field_be("UUID Vers.Class: %u") - || hover.field_le("UUID: %u") - || hover.range("Padding", 3 * sizeof(u32)) - ; - } - } - } - } break; - - case Sec_RNTuple_Header: - if (!hover.tkey()) { - b8 zipped; - if (!hover.maybe_rootzip(&zipped)) { - if (zipped) { - hover.range("Payload", section.range.len - section.post_size) - || hover.field_le("Checksum: 0x%" PRIX64) - ; - } else { - hover.envelope_preamble() - // NOTE: flags in principle require a more complex handling, but for now they are unused, - // so they're always occupying only 8 bytes. - || hover.field_le("Flags: 0x%" PRIX64) - || hover.field_str8("Name: %s") - || hover.field_str8("Description: %s") - || hover.field_str8("ROOT version: %s") - || hover.schema_description() - || hover.range("Payload", section.range.len - hover.cur_field_off) // TODO - || hover.field_le("Checksum: 0x%" PRIX64) - ; - } - } - } - break; - case Sec_RNTuple_Footer: - case Sec_Page_List: { - hover.tkey() - || hover.maybe_rootzip() - || hover.range("Payload", section.range.len - section.post_size) // TODO: improve - || hover.field_le("Checksum: 0x%" PRIX64) - ; - } break; - - case Sec_Page: { - // only try hovering a key if this is the first page of the cluster (<=> pre_size != 0) - b8 ok = section.pre_size && hover.tkey(); - ok = ok || hover.maybe_rootzip() - || hover.range("Payload", section.range.len - section.post_size) // TODO: improve - || hover.field_le("Checksum: 0x%" PRIX64) - ; - } break; - - case Sec_TFile_Info: { - hover.tkey() - || hover.maybe_rootzip() - // || hover.field("Byte Count: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u32 x) { - // x = bswap(x); - // x -= 0x400000000; - // return push_str8_node_child(arena, prev, fmt, x); - // }) - // || hover.field_be("Version: %u") - // || hover_try_object(hover) - // || hover.field_be("Name: %u") - // || hover.field_be("N Objects: %u") - || hover.range("Payload", section.range.len) // TODO: improve - ; - } break; - - case Sec_TFile_FreeList: { - if (!hover.tkey()) { - u16 version_be; - memcpy(&version_be, data + start + hover.cur_field_off, sizeof(u16)); - u32 version = bswap(version_be); - b8 is_big = version > 1000; - - if (is_big) { - hover.field("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) { - x = bswap(x); - x -= 1000; - return push_str8_node_child(arena, prev, fmt, x); - }) - || hover.field_be("First: 0x%" PRIX64) - || hover.field_be("Last: 0x%" PRIX64) - ; - } else { - hover.field("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) { - x = bswap(x); - return push_str8_node_child(arena, prev, fmt, x); - }) - || hover.field_be("First: 0x%X") - || hover.field_be("Last: 0x%X") - ; - } - } - } break; - - default:; - } - - return info; -} diff --git a/src/rntviewer.cpp b/src/rntviewer.cpp index 8290d3c..afc8cf0 100644 --- a/src/rntviewer.cpp +++ b/src/rntviewer.cpp @@ -71,6 +71,7 @@ namespace chr = std::chrono; #include "mem.cpp" #include "str.cpp" #include "rntuple.cpp" +#include "hover.cpp" #include "render_term.cpp" #include "argparse.cpp"