fix(?) cluster parsing

This commit is contained in:
silverweed 2024-09-27 15:09:16 +02:00
parent 57bf7675f8
commit a57f48d5ff
2 changed files with 178 additions and 60 deletions

View file

@ -1,14 +1,43 @@
// C++ fuckery to get the number of lambda arguments
template <typename F>
struct Signature;
template <typename... Args>
struct Tuple {
static constexpr u32 N_Elems = sizeof...(Args);
};
// Oh god, oh fuck
template <typename Obj, typename... Args>
struct Signature<void(Obj::*)(Args...) const> {
static constexpr u32 N_Args = Tuple<Args...>::N_Elems;
};
// Bruh
template <typename F>
constexpr u32 n_functor_args = Signature<decltype(&std::decay_t<F>::operator())>::N_Args;
// -------- end C++ fuckery
// Dummy argument used by Sec_Hover_Fn::frame() to notify it wants to handle the for loop by itself.
// See comment in frame().
using Frame_List_Special_Handling = const void *;
// The data returned by get_section_hover_info(), i.e. the only reason why this entire file exists.
struct Sec_Hover_Info {
// Highlighted byte range
Byte_Range rng;
// A string tree where children are more indented than parents
// A string tree where children are displayed as more indented than parents
String8_Node *desc;
// The one line of the tree that gets colored
String8_Node *highlighted_desc;
};
template <typename T> T bswap_if_needed(T x) { return x; }
template <> u16 bswap_if_needed(u16 x) { return bswap(x); }
template <> u32 bswap_if_needed(u32 x) { return bswap(x); }
template <> u64 bswap_if_needed(u64 x) { return bswap(x); }
template <typename T> T bswap_if_needed(T x) {
if constexpr (sizeof(T) > 1 && std::is_integral_v<T>)
return bswap(x);
else
return x;
}
template <typename T>
String8_Node *hover_display_val_be(Arena *arena, String8_Node *prev, const char *fmt, T val)
@ -35,9 +64,9 @@ String8_Node *hover_display_val_le_abs(Arena *arena, String8_Node *prev, const c
return push_str8_node_child(arena, prev, fmt, std::abs(val));
}
String8_Node *hover_display_generic_range(Arena *arena, String8_Node *prev, const char *desc, const u8 *)
String8_Node *hover_display_generic_range(Arena *arena, String8_Node *prev, const char *desc, const u8 *, u64 size)
{
return push_str8_node_child(arena, prev, desc);
return push_str8_node_child(arena, prev, push_str8f(arena, "%s (%s)", desc, to_pretty_size(arena, size).c()).c());
}
internal
@ -91,6 +120,8 @@ String8_Node *display_val_rootzip(Arena *arena, String8_Node *prev, const char *
template <typename T>
using Display_Fn = String8_Node *(*)(Arena *, String8_Node *, const char *, T);
using Display_Range_Fn = String8_Node *(*)(Arena *, String8_Node *, const char *, const u8 *, u64);
enum Hover_Section_Flags {
HoverSec_None = 0,
HoverSec_HideIfNotHovered = 1,
@ -225,13 +256,13 @@ struct Sec_Hover_Fn {
}
// An unspecified range of bytes
void range(const char *desc, u64 range_len, Display_Fn<const u8 *> display_val = hover_display_generic_range)
void range(const char *desc, u64 range_len, Display_Range_Fn display_val = hover_display_generic_range)
{
// if (ended)
// return;
b8 hovered = cur_field_off <= roff && roff < cur_field_off + range_len;
String8_Node *dsc = display_val(arena, info.desc, desc, data + start + cur_field_off);
String8_Node *dsc = display_val(arena, info.desc, desc, data + start + cur_field_off, range_len);
if (hovered && !display_grouped)
info.highlighted_desc = dsc;
@ -307,7 +338,7 @@ struct Sec_Hover_Fn {
const char *name = (val >= countof(envelope_names)) ? "Unknown" : envelope_names[val];
return push_str8_node_child(arena, prev, fmt, name);
});
range("Envelope size: %s", 6, [] (Arena *arena, String8_Node *prev, const char *fmt, const u8 *payload) {
range("Envelope size: %s", 6, [] (Arena *arena, String8_Node *prev, const char *fmt, const u8 *payload, u64) {
u64 size;
memcpy(&size, payload, 6);
return push_str8_node_child(arena, prev, fmt, to_pretty_size(arena, size));
@ -328,6 +359,16 @@ struct Sec_Hover_Fn {
titled_section(titlestr.c(), [this, &frame_type, &frame_size = size, n_items] {
i64 size;
memcpy(&size, data + start + cur_field_off, sizeof(size));
// Sanity check
if (size > 1000000) {
fprintf(stderr, "Frame size read at 0x%" PRIX64 " looks bogus"
" (is it really %s? Don't think so...); setting it to 0 for good measure.\n",
start + cur_field_off, to_pretty_size(arena, size).c());
frame_size = 0;
frame_type = Frame_INVALID;
return;
}
if (size >= 0) {
frame_type = Frame_Record;
field<i64>("Record frame size: %" PRIi64 " B", hover_display_val_le_abs<i64>);
@ -340,15 +381,14 @@ struct Sec_Hover_Fn {
} else {
frame_type = Frame_List;
memcpy(n_items, data + start + cur_field_off + sizeof(i64), sizeof(u32));
titled_section("List Frame", [this] {
field<i64>("Size: %" PRIi64 " B", hover_display_val_le_abs<i64>);
field_le<u32>("N Items: %u");
});
field<i64>("List frame size: %" PRIi64 " B", hover_display_val_le_abs<i64>);
field_le<u32>("List frame n.items: %u");
}
}
frame_size = std::abs(size);
});
return frame_type;
}
@ -403,34 +443,86 @@ struct Sec_Hover_Fn {
if (flags & RNTupleSerializer::kFlagDeferredColumn) {
field_le<u64>("First element: %" PRIu64);
}
// if (flags & RNTupleSerializer::kFlagHasValueRange) {
// field_le<double>("Value Min: %f");
// field_le<double>("Value Max: %f");
// }
if (flags & RNTupleSerializer::kFlagHasValueRange) {
field_le<double>("Value Min: %f");
field_le<double>("Value Max: %f");
}
}, HoverSec_HideIfNotHovered);
}
void schema_description(const char *title)
{
titled_section(title, [this] {
// TODO: Columns and alias columns are not the same
frame<Frame_List>("Fields", [this] (u32 idx) { field_desc(push_str8f(arena, "Field %u", idx).c()); });
frame<Frame_List>("Columns", [this] (u32 idx) { column_desc(push_str8f(arena, "Column %u", idx).c()); });
frame<Frame_List>("Alias Columns", [this] (u32 idx) { column_desc(push_str8f(arena, "Alias Column %u", idx).c()); });
frame<Frame_List>("Extra Type Infos", [this] (u32) {
field_le<u32>("Content identifier: %lu");
field_le<u32>("Type version from: %lu");
field_le<u32>("Type version to: %lu");
frame<Frame_List>("Alias Columns", [this] (u32 idx) {
frame<Frame_Record>(push_str8f(arena, "Alias Column %u", idx).c(), [this] {
field_le<u32>("Phys Col Id: %u");
field_le<u32>("Field Id: %u");
});
});
frame<Frame_List>("Extra Type Infos", [this] (u32 idx) {
frame<Frame_Record>(push_str8f(arena, "Extra Type Info %u", idx).c(), [this] {
field_le<u32>("Content identifier: %lu");
field_le<u32>("Type version from: %lu");
field_le<u32>("Type version to: %lu");
});
});
});
}
void locator(const char *title)
{
// TODO
// return titled_section(title, [this] {
// return true;
// });
titled_section(title, [this] {
i32 head;
b8 ok = field<i32>("", [] (Arena *arena, String8_Node *prev, const char *, i32 head) {
if (head < 0) {
head = -head;
i32 type = head >> 24;
switch (type) {
case 0x01: return push_str8_node_child(arena, prev, "Type: Large File");
case 0x02: return push_str8_node_child(arena, prev, "Type: DAOS");
default: return push_str8_node_child(arena, prev, "Type: Unknown");
}
} else {
return push_str8_node_child(arena, prev, "Type: File");
}
}, &head);
if (!ok)
return;
if (head < 0) {
head = -head;
i32 type = head >> 24;
u32 size = (u32(head) & 0xffff) - sizeof(i32);
u32 reserved = (head >> 16) & 0xff;
push_str8_node_child(arena, info.desc, "Size: %u", size);
push_str8_node_child(arena, info.desc, "Reserved: %u", reserved);
switch (type) {
case 0x01:
field_le<u64>("N Bytes: %" PRIu64);
field_le<u64>("Position: 0x%" PRIX64);
break;
case 0x02:
if (size == 12) {
field_le<u32>("N Bytes: %u");
field_le<u64>("Location: 0x%" PRIX64);
} else if (size == 16) {
field_le<u64>("N Bytes: %" PRIu64);
field_le<u64>("Location: 0x%" PRIX64);
} else {
range("Unknown payload", size);
}
break;
default:
range("Unknown locator", size);
}
} else {
push_str8_node_child(arena, info.desc, "N Bytes: %" PRIu64, head);
field_le<u64>("Position: 0x%" PRIX64);
}
});
}
void cluster_group()
@ -459,31 +551,39 @@ struct Sec_Hover_Fn {
void cluster()
{
frame<Frame_List>("Cluster", [this] (u32 col_idx) {
frame<Frame_List>("Cluster", [this] (u32 col_idx) { // outer list of columns
titled_section(push_str8f(arena, "Column %u", col_idx).c(), [this] {
frame<Frame_List>("Pages", [this] (u32 page_idx) {
titled_section(push_str8f(arena, "Page %u", page_idx).c(), [this] {
field<i32>("", [] (Arena *arena, String8_Node *prev, const char *, i32 n_elems) {
// Inner list of pages. NOTE this is a mischievous list frame who needs special handling!
// See the comment in frame() for more details.
frame<Frame_List>("Pages", [this] (u32 n_items, Frame_List_Special_Handling) {
for (u32 page_idx = 0; page_idx < n_items; ++page_idx) {
titled_section(push_str8f(arena, "Page %u", page_idx).c(), [this] {
i32 n_elems;
if (!field<i32>("N Elements: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, i32 n_elems) {
return push_str8_node_child(arena, prev, fmt, (u32)std::abs(n_elems));
}, &n_elems))
{
return;
}
b8 has_checksum = n_elems < 0;
String8_Node *sn = push_str8_node_child(arena, prev, "N Elements: %u", std::abs(n_elems));
return push_str8_node(arena, sn, "Has Checksum: %s", has_checksum ? "yes" : "no");
});
locator("Element Locator");
}, HoverSec_HideIfNotHovered);
});
i64 n_cols;
if (!field<i64>("", [] (Arena *arena, String8_Node *prev, const char *, i64 n_cols) {
if (n_cols < 0) {
return push_str8_node_child(arena, prev, "Element Offset: <suppressed>");
push_str8_node_child(arena, info.desc, "Has Checksum: %s", has_checksum ? "yes" : "no");
locator("Element Locator");
}, HoverSec_HideIfNotHovered);
}
return push_str8_node_child(arena, prev, "Element Offset: %" PRIi64, n_cols);
}, &n_cols))
{
return;
}
if (n_cols >= 0)
field_le<i32>("Compression Settings: %d");
i64 n_cols;
if (!field<i64>("", [] (Arena *arena, String8_Node *prev, const char *, i64 n_cols) {
if (n_cols < 0) {
return push_str8_node_child(arena, prev, "Element Offset: <suppressed>");
}
return push_str8_node_child(arena, prev, "Element Offset: %" PRIi64, n_cols);
}, &n_cols))
{
return;
}
if (n_cols >= 0)
field_le<i32>("Compression Settings: %d");
});
}, HoverSec_HideIfNotHovered);
});
}
@ -501,8 +601,27 @@ struct Sec_Hover_Fn {
return;
if constexpr (FType == Frame_List) {
for (u32 i = 0; i < n_items; ++i)
frame_body_fn(i);
// Sadness here.
// Here's the thing: for convenience, when we deal with a Frame_List, we want to pass
// a function that handles the single element, so we don't have to repeat the for loop
// in every lambda we pass to frame<Frame_List>.
// However, there is an oddball case where a list frame declares a size that's not simply
// the sum of all its elements, but it also includes trailing stuff (looking at you, Page Locations frame:
// https://github.com/root-project/root/blob/master/tree/ntuple/v7/doc/specifications.md#page-locations)
// So, to avoid bloating all other well-behaving list frames' code, we allow passing a lambda that
// handles the entire thing, similarly to what we do for Frame_Record.
// The way we distinguish the case is by checking if the given lambda accepts only a u32 param (regular case)
// or exactly 2 arguments (oddball case). In this second case, the first u32 gives the number of items,
// instead of the element index, and the second argument has no meaning.
constexpr u32 n_fn_args = n_functor_args<F>;
if constexpr (n_fn_args == 1) {
for (u32 i = 0; i < n_items; ++i)
frame_body_fn(i);
} else if constexpr (n_fn_args == 2) {
frame_body_fn(n_items, nullptr);
} else {
static_assert(!sizeof(F), "frame_body_fn must accept either 1 (regular case) or 2 arguments!");
}
} else {
frame_body_fn();
}
@ -516,7 +635,7 @@ struct Sec_Hover_Fn {
u64 extra_size = size - allocated_size;
if (extra_size > 0)
range("Unknown", extra_size);
range(push_str8f(arena, "Unknown frame extra payload of %s", title).c(), extra_size);
}, sec_flags);
cur_field_off = start_off + size;
@ -734,10 +853,9 @@ struct Sec_Hover_Fn {
field_le<u64>("Flags: 0x%" PRIX64);
field_le<u64>("Header checksum: 0x%" PRIX64);
schema_description("Schema Extension");
// - list of column group record frames (TODO)
//frame_header("Column Groups");
// - list of cluster group record frames (TODO)
//frame_header("Cluster Groups");
// NOTE: Column groups are currently unused, so this should always be empty
frame<Frame_List>("Column Groups", [] (u32) {});
frame<Frame_List>("Cluster Groups", [this] (u32) { cluster_group(); });
range("Payload", section.range.len - cur_field_off);
field_le<u64>("Checksum: 0x%" PRIX64);
}
@ -877,4 +995,3 @@ Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, co
return info;
}

View file

@ -26,8 +26,9 @@
#include <cmath>
#include <cstring>
#include <cassert>
#include <inttypes.h> // PRIu64
#include <inttypes.h> // PRIu64, ...
#include <type_traits> // For std::decay_t and std::is_same_v
#include <chrono>
#ifdef DEBUG
@ -45,7 +46,7 @@
#endif // RNT_NO_GFX
#define V_MAJOR "0"
#define V_MINOR "5"
#define V_MINOR "6"
#include "root/root_inc.h"
#include "root/RMicroFileReader.hxx"