From a57f48d5ff422063b912a4c864fbc8b0fb76659a Mon Sep 17 00:00:00 2001 From: silverweed Date: Fri, 27 Sep 2024 15:09:16 +0200 Subject: [PATCH] fix(?) cluster parsing --- src/hover.cpp | 233 ++++++++++++++++++++++++++++++++++------------ src/rntviewer.cpp | 5 +- 2 files changed, 178 insertions(+), 60 deletions(-) diff --git a/src/hover.cpp b/src/hover.cpp index cc10452..18212f2 100644 --- a/src/hover.cpp +++ b/src/hover.cpp @@ -1,14 +1,43 @@ +// C++ fuckery to get the number of lambda arguments +template +struct Signature; + +template +struct Tuple { + static constexpr u32 N_Elems = sizeof...(Args); +}; + +// Oh god, oh fuck +template +struct Signature { + static constexpr u32 N_Args = Tuple::N_Elems; +}; + +// Bruh +template +constexpr u32 n_functor_args = Signature::operator())>::N_Args; +// -------- end C++ fuckery + +// Dummy argument used by Sec_Hover_Fn::frame() to notify it wants to handle the for loop by itself. +// See comment in frame(). +using Frame_List_Special_Handling = const void *; + +// The data returned by get_section_hover_info(), i.e. the only reason why this entire file exists. struct Sec_Hover_Info { + // Highlighted byte range Byte_Range rng; - // A string tree where children are more indented than parents + // A string tree where children are displayed as more indented than parents String8_Node *desc; + // The one line of the tree that gets colored String8_Node *highlighted_desc; }; -template T bswap_if_needed(T x) { return x; } -template <> u16 bswap_if_needed(u16 x) { return bswap(x); } -template <> u32 bswap_if_needed(u32 x) { return bswap(x); } -template <> u64 bswap_if_needed(u64 x) { return bswap(x); } +template T bswap_if_needed(T x) { + if constexpr (sizeof(T) > 1 && std::is_integral_v) + return bswap(x); + else + return x; +} template String8_Node *hover_display_val_be(Arena *arena, String8_Node *prev, const char *fmt, T val) @@ -35,9 +64,9 @@ String8_Node *hover_display_val_le_abs(Arena *arena, String8_Node *prev, const c return push_str8_node_child(arena, prev, fmt, std::abs(val)); } -String8_Node *hover_display_generic_range(Arena *arena, String8_Node *prev, const char *desc, const u8 *) +String8_Node *hover_display_generic_range(Arena *arena, String8_Node *prev, const char *desc, const u8 *, u64 size) { - return push_str8_node_child(arena, prev, desc); + return push_str8_node_child(arena, prev, push_str8f(arena, "%s (%s)", desc, to_pretty_size(arena, size).c()).c()); } internal @@ -91,6 +120,8 @@ String8_Node *display_val_rootzip(Arena *arena, String8_Node *prev, const char * template using Display_Fn = String8_Node *(*)(Arena *, String8_Node *, const char *, T); +using Display_Range_Fn = String8_Node *(*)(Arena *, String8_Node *, const char *, const u8 *, u64); + enum Hover_Section_Flags { HoverSec_None = 0, HoverSec_HideIfNotHovered = 1, @@ -225,13 +256,13 @@ struct Sec_Hover_Fn { } // An unspecified range of bytes - void range(const char *desc, u64 range_len, Display_Fn display_val = hover_display_generic_range) + void range(const char *desc, u64 range_len, Display_Range_Fn display_val = hover_display_generic_range) { // if (ended) // return; b8 hovered = cur_field_off <= roff && roff < cur_field_off + range_len; - String8_Node *dsc = display_val(arena, info.desc, desc, data + start + cur_field_off); + String8_Node *dsc = display_val(arena, info.desc, desc, data + start + cur_field_off, range_len); if (hovered && !display_grouped) info.highlighted_desc = dsc; @@ -307,7 +338,7 @@ struct Sec_Hover_Fn { const char *name = (val >= countof(envelope_names)) ? "Unknown" : envelope_names[val]; return push_str8_node_child(arena, prev, fmt, name); }); - range("Envelope size: %s", 6, [] (Arena *arena, String8_Node *prev, const char *fmt, const u8 *payload) { + range("Envelope size: %s", 6, [] (Arena *arena, String8_Node *prev, const char *fmt, const u8 *payload, u64) { u64 size; memcpy(&size, payload, 6); return push_str8_node_child(arena, prev, fmt, to_pretty_size(arena, size)); @@ -328,6 +359,16 @@ struct Sec_Hover_Fn { titled_section(titlestr.c(), [this, &frame_type, &frame_size = size, n_items] { i64 size; memcpy(&size, data + start + cur_field_off, sizeof(size)); + // Sanity check + if (size > 1000000) { + fprintf(stderr, "Frame size read at 0x%" PRIX64 " looks bogus" + " (is it really %s? Don't think so...); setting it to 0 for good measure.\n", + start + cur_field_off, to_pretty_size(arena, size).c()); + frame_size = 0; + frame_type = Frame_INVALID; + return; + } + if (size >= 0) { frame_type = Frame_Record; field("Record frame size: %" PRIi64 " B", hover_display_val_le_abs); @@ -340,15 +381,14 @@ struct Sec_Hover_Fn { } else { frame_type = Frame_List; memcpy(n_items, data + start + cur_field_off + sizeof(i64), sizeof(u32)); - titled_section("List Frame", [this] { - field("Size: %" PRIi64 " B", hover_display_val_le_abs); - field_le("N Items: %u"); - }); + field("List frame size: %" PRIi64 " B", hover_display_val_le_abs); + field_le("List frame n.items: %u"); } } frame_size = std::abs(size); }); + return frame_type; } @@ -403,34 +443,86 @@ struct Sec_Hover_Fn { if (flags & RNTupleSerializer::kFlagDeferredColumn) { field_le("First element: %" PRIu64); } - // if (flags & RNTupleSerializer::kFlagHasValueRange) { - // field_le("Value Min: %f"); - // field_le("Value Max: %f"); - // } + if (flags & RNTupleSerializer::kFlagHasValueRange) { + field_le("Value Min: %f"); + field_le("Value Max: %f"); + } }, HoverSec_HideIfNotHovered); } void schema_description(const char *title) { titled_section(title, [this] { - // TODO: Columns and alias columns are not the same frame("Fields", [this] (u32 idx) { field_desc(push_str8f(arena, "Field %u", idx).c()); }); frame("Columns", [this] (u32 idx) { column_desc(push_str8f(arena, "Column %u", idx).c()); }); - frame("Alias Columns", [this] (u32 idx) { column_desc(push_str8f(arena, "Alias Column %u", idx).c()); }); - frame("Extra Type Infos", [this] (u32) { - field_le("Content identifier: %lu"); - field_le("Type version from: %lu"); - field_le("Type version to: %lu"); + frame("Alias Columns", [this] (u32 idx) { + frame(push_str8f(arena, "Alias Column %u", idx).c(), [this] { + field_le("Phys Col Id: %u"); + field_le("Field Id: %u"); + }); + }); + frame("Extra Type Infos", [this] (u32 idx) { + frame(push_str8f(arena, "Extra Type Info %u", idx).c(), [this] { + field_le("Content identifier: %lu"); + field_le("Type version from: %lu"); + field_le("Type version to: %lu"); + }); }); }); } void locator(const char *title) { - // TODO - // return titled_section(title, [this] { - // return true; - // }); + titled_section(title, [this] { + i32 head; + b8 ok = field("", [] (Arena *arena, String8_Node *prev, const char *, i32 head) { + if (head < 0) { + head = -head; + i32 type = head >> 24; + switch (type) { + case 0x01: return push_str8_node_child(arena, prev, "Type: Large File"); + case 0x02: return push_str8_node_child(arena, prev, "Type: DAOS"); + default: return push_str8_node_child(arena, prev, "Type: Unknown"); + } + } else { + return push_str8_node_child(arena, prev, "Type: File"); + } + }, &head); + + if (!ok) + return; + + if (head < 0) { + head = -head; + i32 type = head >> 24; + u32 size = (u32(head) & 0xffff) - sizeof(i32); + u32 reserved = (head >> 16) & 0xff; + push_str8_node_child(arena, info.desc, "Size: %u", size); + push_str8_node_child(arena, info.desc, "Reserved: %u", reserved); + switch (type) { + case 0x01: + field_le("N Bytes: %" PRIu64); + field_le("Position: 0x%" PRIX64); + break; + case 0x02: + if (size == 12) { + field_le("N Bytes: %u"); + field_le("Location: 0x%" PRIX64); + } else if (size == 16) { + field_le("N Bytes: %" PRIu64); + field_le("Location: 0x%" PRIX64); + } else { + range("Unknown payload", size); + } + break; + default: + range("Unknown locator", size); + } + } else { + push_str8_node_child(arena, info.desc, "N Bytes: %" PRIu64, head); + field_le("Position: 0x%" PRIX64); + } + }); } void cluster_group() @@ -459,31 +551,39 @@ struct Sec_Hover_Fn { void cluster() { - frame("Cluster", [this] (u32 col_idx) { + frame("Cluster", [this] (u32 col_idx) { // outer list of columns titled_section(push_str8f(arena, "Column %u", col_idx).c(), [this] { - frame("Pages", [this] (u32 page_idx) { - titled_section(push_str8f(arena, "Page %u", page_idx).c(), [this] { - field("", [] (Arena *arena, String8_Node *prev, const char *, i32 n_elems) { + // Inner list of pages. NOTE this is a mischievous list frame who needs special handling! + // See the comment in frame() for more details. + frame("Pages", [this] (u32 n_items, Frame_List_Special_Handling) { + for (u32 page_idx = 0; page_idx < n_items; ++page_idx) { + titled_section(push_str8f(arena, "Page %u", page_idx).c(), [this] { + i32 n_elems; + if (!field("N Elements: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, i32 n_elems) { + return push_str8_node_child(arena, prev, fmt, (u32)std::abs(n_elems)); + }, &n_elems)) + { + return; + } b8 has_checksum = n_elems < 0; - String8_Node *sn = push_str8_node_child(arena, prev, "N Elements: %u", std::abs(n_elems)); - return push_str8_node(arena, sn, "Has Checksum: %s", has_checksum ? "yes" : "no"); - }); - locator("Element Locator"); - }, HoverSec_HideIfNotHovered); - }); - - i64 n_cols; - if (!field("", [] (Arena *arena, String8_Node *prev, const char *, i64 n_cols) { - if (n_cols < 0) { - return push_str8_node_child(arena, prev, "Element Offset: "); + push_str8_node_child(arena, info.desc, "Has Checksum: %s", has_checksum ? "yes" : "no"); + locator("Element Locator"); + }, HoverSec_HideIfNotHovered); } - return push_str8_node_child(arena, prev, "Element Offset: %" PRIi64, n_cols); - }, &n_cols)) - { - return; - } - if (n_cols >= 0) - field_le("Compression Settings: %d"); + + i64 n_cols; + if (!field("", [] (Arena *arena, String8_Node *prev, const char *, i64 n_cols) { + if (n_cols < 0) { + return push_str8_node_child(arena, prev, "Element Offset: "); + } + return push_str8_node_child(arena, prev, "Element Offset: %" PRIi64, n_cols); + }, &n_cols)) + { + return; + } + if (n_cols >= 0) + field_le("Compression Settings: %d"); + }); }, HoverSec_HideIfNotHovered); }); } @@ -501,8 +601,27 @@ struct Sec_Hover_Fn { return; if constexpr (FType == Frame_List) { - for (u32 i = 0; i < n_items; ++i) - frame_body_fn(i); + // Sadness here. + // Here's the thing: for convenience, when we deal with a Frame_List, we want to pass + // a function that handles the single element, so we don't have to repeat the for loop + // in every lambda we pass to frame. + // However, there is an oddball case where a list frame declares a size that's not simply + // the sum of all its elements, but it also includes trailing stuff (looking at you, Page Locations frame: + // https://github.com/root-project/root/blob/master/tree/ntuple/v7/doc/specifications.md#page-locations) + // So, to avoid bloating all other well-behaving list frames' code, we allow passing a lambda that + // handles the entire thing, similarly to what we do for Frame_Record. + // The way we distinguish the case is by checking if the given lambda accepts only a u32 param (regular case) + // or exactly 2 arguments (oddball case). In this second case, the first u32 gives the number of items, + // instead of the element index, and the second argument has no meaning. + constexpr u32 n_fn_args = n_functor_args; + if constexpr (n_fn_args == 1) { + for (u32 i = 0; i < n_items; ++i) + frame_body_fn(i); + } else if constexpr (n_fn_args == 2) { + frame_body_fn(n_items, nullptr); + } else { + static_assert(!sizeof(F), "frame_body_fn must accept either 1 (regular case) or 2 arguments!"); + } } else { frame_body_fn(); } @@ -516,7 +635,7 @@ struct Sec_Hover_Fn { u64 extra_size = size - allocated_size; if (extra_size > 0) - range("Unknown", extra_size); + range(push_str8f(arena, "Unknown frame extra payload of %s", title).c(), extra_size); }, sec_flags); cur_field_off = start_off + size; @@ -734,10 +853,9 @@ struct Sec_Hover_Fn { field_le("Flags: 0x%" PRIX64); field_le("Header checksum: 0x%" PRIX64); schema_description("Schema Extension"); - // - list of column group record frames (TODO) - //frame_header("Column Groups"); - // - list of cluster group record frames (TODO) - //frame_header("Cluster Groups"); + // NOTE: Column groups are currently unused, so this should always be empty + frame("Column Groups", [] (u32) {}); + frame("Cluster Groups", [this] (u32) { cluster_group(); }); range("Payload", section.range.len - cur_field_off); field_le("Checksum: 0x%" PRIX64); } @@ -877,4 +995,3 @@ Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, co return info; } - diff --git a/src/rntviewer.cpp b/src/rntviewer.cpp index 913d80f..1cc9239 100644 --- a/src/rntviewer.cpp +++ b/src/rntviewer.cpp @@ -26,8 +26,9 @@ #include #include #include -#include // PRIu64 +#include // PRIu64, ... +#include // For std::decay_t and std::is_same_v #include #ifdef DEBUG @@ -45,7 +46,7 @@ #endif // RNT_NO_GFX #define V_MAJOR "0" -#define V_MINOR "5" +#define V_MINOR "6" #include "root/root_inc.h" #include "root/RMicroFileReader.hxx"