checkpoint

This commit is contained in:
silverweed 2024-09-25 10:07:42 +02:00
parent d9ea00f027
commit 9f358b56bf
3 changed files with 140 additions and 210 deletions

View file

@ -1,5 +1,3 @@
using ROOT::Experimental::Internal::RNTupleSerializer;
struct Sec_Hover_Info { struct Sec_Hover_Info {
Byte_Range rng; Byte_Range rng;
// A string tree where children are more indented than parents // A string tree where children are more indented than parents
@ -96,47 +94,11 @@ T read_buf(const void *buf, u64 &off)
return val; return val;
} }
internal
const char *get_column_type_name(u16 type)
{
switch (type) {
case 0x01: return "Index64";
case 0x02: return "Index32";
case 0x03: return "Switch";
case 0x04: return "Byte";
case 0x05: return "Char";
case 0x06: return "Bit";
case 0x07: return "Real64";
case 0x08: return "Real32";
case 0x09: return "Real16";
case 0x16: return "Int64";
case 0x0A: return "UInt64";
case 0x17: return "Int32";
case 0x0B: return "UInt32";
case 0x18: return "Int16";
case 0x0C: return "UInt16";
case 0x19: return "Int8";
case 0x0D: return "UInt8";
case 0x0E: return "SplitIndex64";
case 0x0F: return "SplitIndex32";
case 0x10: return "SplitReal64";
case 0x11: return "SplitReal32";
case 0x1A: return "SplitInt64";
case 0x13: return "SplitUInt64";
case 0x1B: return "SplitInt32";
case 0x14: return "SplitUInt32";
case 0x1C: return "SplitInt16";
case 0x15: return "SplitUInt16";
case 0x1D: return "Real32Trunc";
default: return "Unknown";
}
}
// Functor used by get_section_hover_info to describe the structure of a section and print data about it. // Functor used by get_section_hover_info to describe the structure of a section and print data about it.
struct Sec_Hover_Fn { struct Sec_Hover_Fn {
u64 start; u64 start; // the start of the section (including the pre_size, e.g. the TKey)
u64 roff; u64 roff; // the offset relative to the section start
const u8 *data; const u8 *data; // the entire file data
Arena *arena; Arena *arena;
Sec_Hover_Info &info; Sec_Hover_Info &info;
u64 &cur_field_off; u64 &cur_field_off;
@ -148,53 +110,52 @@ struct Sec_Hover_Fn {
String8_Node *prev_desc = info.desc; String8_Node *prev_desc = info.desc;
info.desc = push_str8_node_child(arena, prev_desc, title); info.desc = push_str8_node_child(arena, prev_desc, title);
b8 ok = fn(); b8 hovered = fn();
if (!ok) { if (!hovered) {
pop_str8_node_child(prev_desc, info.desc); pop_str8_node_child(prev_desc, info.desc);
info.desc = prev_desc; info.desc = prev_desc;
} }
return ok; return hovered;
} }
// Returns true if this field is being hovered
template <typename T> template <typename T>
T add_to_desc(const char *fmt, Display_Fn<T> display_val = hover_display_val_le<T>) const b8 field(const char *desc_fmt, Display_Fn<T> display_val, T *val_read = nullptr) const
{
static_assert(!std::is_same_v<T, String8>, "use add_to_desc_str8 instead.");
T val = read_buf<T>(data + start, cur_field_off);
display_val(arena, info.desc, fmt, val);
return val;
}
template <typename TStrSize>
String8 add_to_desc_str8(const char *fmt, Display_Fn<String8> display_val = hover_display_val_str8) const
{
TStrSize str_size = read_buf<TStrSize>(data + start, cur_field_off);
u8 *buf = nullptr;
if (str_size > 1000) // DEBUG
return str8("");
if (str_size > 0) {
buf = arena_push_array_nozero<u8>(arena, str_size + 1);
memcpy(buf, data + start + cur_field_off, str_size);
buf[str_size] = 0;
cur_field_off += str_size;
}
String8 s = { buf, str_size };
display_val(arena, info.desc, fmt, s);
return s;
}
template <typename T>
b8 field(const char *desc_fmt, Display_Fn<T> display_val) const
{ {
static_assert(!std::is_same_v<T, String8>, "use field_str8 instead."); static_assert(!std::is_same_v<T, String8>, "use field_str8 instead.");
u64 field_len = sizeof(T); u64 field_len = sizeof(T);
if (roff < cur_field_off + field_len) { u64 field_off = cur_field_off;
info.rng = { start + cur_field_off, field_len };
add_to_desc<T>(desc_fmt, display_val);
return true;
}
cur_field_off += field_len; cur_field_off += field_len;
if (display_grouped || roff < field_off + field_len) {
info.rng = { start + field_off, field_len };
T val = read_buf<T>(data + start, field_off);
display_val(arena, info.desc, desc_fmt, val);
if (val_read)
*val_read = val;
return !display_grouped;
}
return false;
}
template <typename TStrSize>
b8 field_str8(const char *desc_fmt, Display_Fn<String8> display_val = hover_display_val_str8) const
{
// String size can be stored as different types, like u8 (by ROOT I/O) or u32 (by RNTuple).
TStrSize str_size;
memcpy(&str_size, data + start + cur_field_off, sizeof(TStrSize));
u64 field_off = cur_field_off;
u64 field_len = sizeof(TStrSize) + (u64)str_size;
cur_field_off += field_len;
if (display_grouped || roff < field_off + field_len) {
info.rng = { start + field_off, field_len };
u8 *buf = arena_push_array_nozero<u8>(arena, str_size + 1);
memcpy(buf, data + start + field_off + sizeof(TStrSize), str_size);
buf[str_size] = 0;
String8 s = { buf, str_size };
display_val(arena, info.desc, desc_fmt, s);
return !display_grouped;
}
return false; return false;
} }
@ -210,25 +171,6 @@ struct Sec_Hover_Fn {
return field<T>(desc_fmt, hover_display_val_le<T>); return field<T>(desc_fmt, hover_display_val_le<T>);
} }
template <typename TStrSize>
b8 field_str8(const char *desc_fmt, Display_Fn<String8> display_val = hover_display_val_str8) const
{
// String size can be stored as different types, like u8 (by ROOT I/O) or u32 (by RNTuple).
TStrSize str_size;
memcpy(&str_size, data + start + cur_field_off, sizeof(TStrSize));
if (roff < cur_field_off + sizeof(TStrSize) + str_size) {
info.rng = { start + cur_field_off, sizeof(TStrSize) + (u64)str_size };
u8 *buf = arena_push_array_nozero<u8>(arena, str_size + 1);
memcpy(buf, data + start + cur_field_off + sizeof(TStrSize), str_size);
buf[str_size] = 0;
String8 s = { buf, str_size };
display_val(arena, info.desc, desc_fmt, s);
return true;
}
cur_field_off += sizeof(TStrSize) + str_size;
return false;
}
b8 range(const char *desc, u64 range_len) const b8 range(const char *desc, u64 range_len) const
{ {
if (roff < cur_field_off + range_len) { if (roff < cur_field_off + range_len) {
@ -347,9 +289,9 @@ struct Sec_Hover_Fn {
else if (roff < cur_field_off + 12) { else if (roff < cur_field_off + 12) {
info.rng = { start + cur_field_off, 12 }; info.rng = { start + cur_field_off, 12 };
b8 ok = titled_section("List Frame", [this] { b8 ok = titled_section("List Frame", [this] {
add_to_desc<i64>("Size: %" PRIi64 " B", hover_display_val_le_abs<i64>); return field<i64>("Size: %" PRIi64 " B", hover_display_val_le_abs<i64>)
add_to_desc<u32>("N Items: %u"); || field_le<u32>("N Items: %u")
return true; ;
}); });
if (ok) if (ok)
return ok; return ok;
@ -365,45 +307,12 @@ struct Sec_Hover_Fn {
"Leaf", "Collection", "Record", "Variant", "Unsplit" "Leaf", "Collection", "Record", "Variant", "Unsplit"
}; };
if (display_grouped) {
i64 size; i64 size;
memcpy(&size, data + start + cur_field_off, sizeof(size)); memcpy(&size, data + start + cur_field_off, sizeof(size));
u64 field_desc_len = (u64)std::abs(size); u64 field_desc_len = (u64)std::abs(size);
if (roff < cur_field_off + field_desc_len) { if (roff < cur_field_off + field_desc_len) {
info.rng = { start + cur_field_off, (u64)field_desc_len }; info.rng = { start + cur_field_off, (u64)field_desc_len };
return titled_section("Field", [this] { return titled_section("Field", [this] {
add_to_desc<i64>("Size: %" PRIi64 " B");
add_to_desc<u32>("Field version: %u");
add_to_desc<u32>("Type version: %u");
add_to_desc<u32>("On-disk parent id: %u");
add_to_desc<u16>("Field structure: %s", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 type) {
const char *name = (type >= countof(field_struct_names)) ? "Unknown" : field_struct_names[type];
return push_str8_node_child(arena, prev, fmt, name);
});
u16 flags = add_to_desc<u16>("Flags: 0b%b");
if (flags & RNTupleSerializer::kFlagRepetitiveField)
add_to_desc<u64>("N Repetitions: %" PRIu64);
if (flags & RNTupleSerializer::kFlagProjectedField)
add_to_desc<u32>("On disk proj.src id: %u");
if (flags & RNTupleSerializer::kFlagHasTypeChecksum)
add_to_desc<u32>("Checksum: %u");
add_to_desc_str8<u32>("Name: %s");
add_to_desc_str8<u32>("Type Name: %s");
add_to_desc_str8<u32>("Type Alias: %s");
add_to_desc_str8<u32>("Description: %s");
return true;
});
}
cur_field_off += field_desc_len;
return false;
} else {
return titled_section("Field", [this] {
u64 flags_off = start + cur_field_off + 22;
u16 flags;
memcpy(&flags, data + flags_off, sizeof(flags));
b8 ok = field_le<i64>("Size: %" PRIi64 " B") b8 ok = field_le<i64>("Size: %" PRIi64 " B")
|| field_le<u32>("Field version: %u") || field_le<u32>("Field version: %u")
|| field_le<u32>("Type version: %u") || field_le<u32>("Type version: %u")
@ -411,83 +320,65 @@ struct Sec_Hover_Fn {
|| field<u16>("Field structure: %s", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 type) { || field<u16>("Field structure: %s", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 type) {
const char *name = (type >= countof(field_struct_names)) ? "Unknown" : field_struct_names[type]; const char *name = (type >= countof(field_struct_names)) ? "Unknown" : field_struct_names[type];
return push_str8_node_child(arena, prev, fmt, name); return push_str8_node_child(arena, prev, fmt, name);
}) });
|| field_le<u16>("Flags: 0b%b")
;
if (ok) if (ok)
return ok;
if ((flags & RNTupleSerializer::kFlagRepetitiveField) && field_le<u64>("N Repetitions: %" PRIu64))
return true;
if ((flags & RNTupleSerializer::kFlagProjectedField) && field_le<u32>("On disk proj.src id: %u"))
return true;
if ((flags & RNTupleSerializer::kFlagHasTypeChecksum) && field_le<u32>("Checksum: %u"))
return true; return true;
return field_str8<u32>("Name: %s") u16 flags;
ok = field<u16>("Flags: 0b%b", hover_display_val_le, &flags);
if (flags & RNTupleSerializer::kFlagRepetitiveField)
ok = ok || field_le<u64>("N Repetitions: %" PRIu64);
if (flags & RNTupleSerializer::kFlagProjectedField)
ok = ok || field_le<u32>("On disk proj.src id: %u");
if (flags & RNTupleSerializer::kFlagHasTypeChecksum)
ok = ok || field_le<u32>("Checksum: %u");
ok = ok || field_str8<u32>("Name: %s")
|| field_str8<u32>("Type Name: %s") || field_str8<u32>("Type Name: %s")
|| field_str8<u32>("Type Alias: %s") || field_str8<u32>("Type Alias: %s")
|| field_str8<u32>("Description: %s") || field_str8<u32>("Description: %s")
; ;
return ok;
}); });
} }
cur_field_off += field_desc_len;
return false;
} }
b8 column_desc(const char *title) const b8 column_desc(const char *title) const
{ {
if (display_grouped) {
i64 size; i64 size;
memcpy(&size, data + start + cur_field_off, sizeof(size)); memcpy(&size, data + start + cur_field_off, sizeof(size));
u64 col_desc_len = (u64)std::abs(size); u64 col_desc_len = (u64)std::abs(size);
if (roff < cur_field_off + col_desc_len) { if (roff < cur_field_off + col_desc_len) {
info.rng = { start + cur_field_off, col_desc_len }; info.rng = { start + cur_field_off, col_desc_len };
return titled_section(title, [this] { return titled_section(title, [this] {
add_to_desc<i64>("Size: %" PRIi64 " B");
add_to_desc<u16>("Column type: %s", [](Arena *arena, String8_Node *prev, const char *fmt, u16 val) {
const char *readable_col_type = get_column_type_name(val);
return push_str8_node_child(arena, prev, fmt, readable_col_type);
});
add_to_desc<u16>("Bits on storage: %u");
add_to_desc<u32>("Field ID: %u");
u16 flags = add_to_desc<u16>("Flags: 0b%b");
add_to_desc<u16>("Representation idx: %u");
if (flags & RNTupleSerializer::kFlagDeferredColumn)
add_to_desc<u64>("First element: %" PRIu64);
// if (flags & RNTupleSerializer::kFlagHasValueRange) {
// add_to_desc<double>("Value Min: %f");
// add_to_desc<double>("Value Max: %f");
// }
return true;
});
}
cur_field_off += col_desc_len;
return false;
} else {
return titled_section("Column", [this] {
u64 flags_off = start + cur_field_off + 16;
u16 flags;
memcpy(&flags, data + flags_off, sizeof(flags));
b8 ok = field_le<i64>("Size: %" PRIi64 " B") b8 ok = field_le<i64>("Size: %" PRIi64 " B")
|| field<u16>("Column Type: %s", [](Arena *arena, String8_Node *prev, const char *fmt, u16 val) { || field<u16>("Column type: %s", [](Arena *arena, String8_Node *prev, const char *fmt, u16 val) {
const char *readable_col_type = get_column_type_name(val); const char *readable_col_type = get_column_type_name(val);
return push_str8_node_child(arena, prev, fmt, readable_col_type); return push_str8_node_child(arena, prev, fmt, readable_col_type);
}) })
|| field_le<u16>("Bits on storage: %u") || field_le<u16>("Bits on storage: %u")
|| field_le<u32>("Field ID: %u") || field_le<u32>("Field ID: %u")
|| field_le<u16>("Flags: 0b%b")
|| field_le<u16>("Representation idx: %u")
; ;
if (ok) if (ok)
return ok;
if ((flags & RNTupleSerializer::kFlagDeferredColumn) && field_le<u64>("First element: %" PRIu64))
return true; return true;
// if ((flags & RNTupleSerializer::kFlagHasValueRange) && (field_le<double>("Value Min: %f") || field_le<double>("Value Max: %f")))
// return true;
return false; u16 flags;
ok = field<u16>("Flags: 0b%b", hover_display_val_le, &flags);
ok = ok || field_le<u16>("Representation idx: %u");
if (flags & RNTupleSerializer::kFlagDeferredColumn)
ok = ok || field_le<u64>("First element: %" PRIu64);
if (flags & RNTupleSerializer::kFlagHasValueRange) {
ok = ok || field_le<double>("Value Min: %f")
|| field_le<double>("Value Max: %f");
}
return ok;
}); });
} }
cur_field_off += col_desc_len;
return false;
} }
template <typename F> template <typename F>
@ -798,3 +689,4 @@ Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, co
return info; return info;
} }

View file

@ -94,6 +94,43 @@ String8 build_fully_qualified_field_name(Arena *arena, const ROOT::Experimental:
return str; return str;
} }
internal
const char *get_column_type_name(u16 type)
{
switch (type) {
case 0x01: return "Index64";
case 0x02: return "Index32";
case 0x03: return "Switch";
case 0x04: return "Byte";
case 0x05: return "Char";
case 0x06: return "Bit";
case 0x07: return "Real64";
case 0x08: return "Real32";
case 0x09: return "Real16";
case 0x16: return "Int64";
case 0x0A: return "UInt64";
case 0x17: return "Int32";
case 0x0B: return "UInt32";
case 0x18: return "Int16";
case 0x0C: return "UInt16";
case 0x19: return "Int8";
case 0x0D: return "UInt8";
case 0x0E: return "SplitIndex64";
case 0x0F: return "SplitIndex32";
case 0x10: return "SplitReal64";
case 0x11: return "SplitReal32";
case 0x1A: return "SplitInt64";
case 0x13: return "SplitUInt64";
case 0x1B: return "SplitInt32";
case 0x14: return "SplitUInt32";
case 0x1C: return "SplitInt16";
case 0x15: return "SplitUInt16";
case 0x1D: return "Real32Trunc";
case 0x1E: return "Real32Quant";
default: return "Unknown";
}
}
internal internal
void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTuple_File_Info &info, RNTuple_Data &rndata) void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTuple_File_Info &info, RNTuple_Data &rndata)
{ {
@ -132,9 +169,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
Cluster_Info *clusters = arena_push_array<Cluster_Info>(arena, descriptor.GetNActiveClusters()); Cluster_Info *clusters = arena_push_array<Cluster_Info>(arena, descriptor.GetNActiveClusters());
// @ForeignAlloc: currently using directly the RColumnElementBase API which forces foreign allocations. const char *elem_type_name;
// If we don't want to have this we could create our own function to retrieve a column type name.
std::string elem_type_name;
// gather clusters and pages metadata // gather clusters and pages metadata
for (const RClusterDescriptor &cluster_desc : descriptor.GetClusterIterable()) { for (const RClusterDescriptor &cluster_desc : descriptor.GetClusterIterable()) {
@ -143,7 +178,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
for (const RClusterDescriptor::RColumnRange &col_range : cluster_desc.GetColumnRangeIterable()) { for (const RClusterDescriptor::RColumnRange &col_range : cluster_desc.GetColumnRangeIterable()) {
const auto &col_descriptor = descriptor.GetColumnDescriptor(col_range.fPhysicalColumnId); const auto &col_descriptor = descriptor.GetColumnDescriptor(col_range.fPhysicalColumnId);
elem_type_name = RColumnElementBase::GetTypeName(col_descriptor.GetType()); // @ForeignAlloc elem_type_name = get_column_type_name((u16)col_descriptor.GetType());
const auto &field_desc = descriptor.GetFieldDescriptor(col_descriptor.GetFieldId()); const auto &field_desc = descriptor.GetFieldDescriptor(col_descriptor.GetFieldId());
const String8 owner_field_name = build_fully_qualified_field_name(arena, descriptor, &field_desc); const String8 owner_field_name = build_fully_qualified_field_name(arena, descriptor, &field_desc);
@ -159,7 +194,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
// If in the future we get RNTuples with more than 4B clusters we can just change the type to u64. // If in the future we get RNTuples with more than 4B clusters we can just change the type to u64.
assert(cluster_desc.GetId() <= UINT_MAX); assert(cluster_desc.GetId() <= UINT_MAX);
pinfo->cluster_id = cluster_desc.GetId(); pinfo->cluster_id = cluster_desc.GetId();
pinfo->elem_type_name = push_str8f(arena, "%s", elem_type_name.c_str()); pinfo->elem_type_name = push_str8f(arena, "%s", elem_type_name);
pinfo->owner_field_name = owner_field_name; pinfo->owner_field_name = owner_field_name;
pinfo->bits_per_elem = col_descriptor.GetBitsOnStorage(); pinfo->bits_per_elem = col_descriptor.GetBitsOnStorage();

View file

@ -2,3 +2,6 @@
#include <ROOT/RNTupleDescriptor.hxx> #include <ROOT/RNTupleDescriptor.hxx>
#include <ROOT/RNTupleZip.hxx> #include <ROOT/RNTupleZip.hxx>
#include <ROOT/RColumnElementBase.hxx> #include <ROOT/RColumnElementBase.hxx>
using ROOT::Experimental::Internal::RNTupleSerializer;