From 9f358b56bf6e64271b17a0e9c8e029e0f5be43e0 Mon Sep 17 00:00:00 2001 From: silverweed Date: Wed, 25 Sep 2024 10:07:42 +0200 Subject: [PATCH] checkpoint --- src/hover.cpp | 300 ++++++++++++++------------------------------ src/rntuple.cpp | 47 ++++++- src/root/root_inc.h | 3 + 3 files changed, 140 insertions(+), 210 deletions(-) diff --git a/src/hover.cpp b/src/hover.cpp index 88db4d4..a272508 100644 --- a/src/hover.cpp +++ b/src/hover.cpp @@ -1,5 +1,3 @@ -using ROOT::Experimental::Internal::RNTupleSerializer; - struct Sec_Hover_Info { Byte_Range rng; // A string tree where children are more indented than parents @@ -96,47 +94,11 @@ T read_buf(const void *buf, u64 &off) return val; } -internal -const char *get_column_type_name(u16 type) -{ - switch (type) { - case 0x01: return "Index64"; - case 0x02: return "Index32"; - case 0x03: return "Switch"; - case 0x04: return "Byte"; - case 0x05: return "Char"; - case 0x06: return "Bit"; - case 0x07: return "Real64"; - case 0x08: return "Real32"; - case 0x09: return "Real16"; - case 0x16: return "Int64"; - case 0x0A: return "UInt64"; - case 0x17: return "Int32"; - case 0x0B: return "UInt32"; - case 0x18: return "Int16"; - case 0x0C: return "UInt16"; - case 0x19: return "Int8"; - case 0x0D: return "UInt8"; - case 0x0E: return "SplitIndex64"; - case 0x0F: return "SplitIndex32"; - case 0x10: return "SplitReal64"; - case 0x11: return "SplitReal32"; - case 0x1A: return "SplitInt64"; - case 0x13: return "SplitUInt64"; - case 0x1B: return "SplitInt32"; - case 0x14: return "SplitUInt32"; - case 0x1C: return "SplitInt16"; - case 0x15: return "SplitUInt16"; - case 0x1D: return "Real32Trunc"; - default: return "Unknown"; - } -} - // Functor used by get_section_hover_info to describe the structure of a section and print data about it. struct Sec_Hover_Fn { - u64 start; - u64 roff; - const u8 *data; + u64 start; // the start of the section (including the pre_size, e.g. the TKey) + u64 roff; // the offset relative to the section start + const u8 *data; // the entire file data Arena *arena; Sec_Hover_Info &info; u64 &cur_field_off; @@ -148,53 +110,52 @@ struct Sec_Hover_Fn { String8_Node *prev_desc = info.desc; info.desc = push_str8_node_child(arena, prev_desc, title); - b8 ok = fn(); + b8 hovered = fn(); - if (!ok) { + if (!hovered) { pop_str8_node_child(prev_desc, info.desc); info.desc = prev_desc; } - return ok; + return hovered; } + // Returns true if this field is being hovered template - T add_to_desc(const char *fmt, Display_Fn display_val = hover_display_val_le) const - { - static_assert(!std::is_same_v, "use add_to_desc_str8 instead."); - T val = read_buf(data + start, cur_field_off); - display_val(arena, info.desc, fmt, val); - return val; - } - - template - String8 add_to_desc_str8(const char *fmt, Display_Fn display_val = hover_display_val_str8) const - { - TStrSize str_size = read_buf(data + start, cur_field_off); - u8 *buf = nullptr; - if (str_size > 1000) // DEBUG - return str8(""); - if (str_size > 0) { - buf = arena_push_array_nozero(arena, str_size + 1); - memcpy(buf, data + start + cur_field_off, str_size); - buf[str_size] = 0; - cur_field_off += str_size; - } - String8 s = { buf, str_size }; - display_val(arena, info.desc, fmt, s); - return s; - } - - template - b8 field(const char *desc_fmt, Display_Fn display_val) const + b8 field(const char *desc_fmt, Display_Fn display_val, T *val_read = nullptr) const { static_assert(!std::is_same_v, "use field_str8 instead."); u64 field_len = sizeof(T); - if (roff < cur_field_off + field_len) { - info.rng = { start + cur_field_off, field_len }; - add_to_desc(desc_fmt, display_val); - return true; - } + u64 field_off = cur_field_off; cur_field_off += field_len; + if (display_grouped || roff < field_off + field_len) { + info.rng = { start + field_off, field_len }; + T val = read_buf(data + start, field_off); + display_val(arena, info.desc, desc_fmt, val); + if (val_read) + *val_read = val; + return !display_grouped; + } + return false; + } + + template + b8 field_str8(const char *desc_fmt, Display_Fn display_val = hover_display_val_str8) const + { + // String size can be stored as different types, like u8 (by ROOT I/O) or u32 (by RNTuple). + TStrSize str_size; + memcpy(&str_size, data + start + cur_field_off, sizeof(TStrSize)); + u64 field_off = cur_field_off; + u64 field_len = sizeof(TStrSize) + (u64)str_size; + cur_field_off += field_len; + if (display_grouped || roff < field_off + field_len) { + info.rng = { start + field_off, field_len }; + u8 *buf = arena_push_array_nozero(arena, str_size + 1); + memcpy(buf, data + start + field_off + sizeof(TStrSize), str_size); + buf[str_size] = 0; + String8 s = { buf, str_size }; + display_val(arena, info.desc, desc_fmt, s); + return !display_grouped; + } return false; } @@ -210,25 +171,6 @@ struct Sec_Hover_Fn { return field(desc_fmt, hover_display_val_le); } - template - b8 field_str8(const char *desc_fmt, Display_Fn display_val = hover_display_val_str8) const - { - // String size can be stored as different types, like u8 (by ROOT I/O) or u32 (by RNTuple). - TStrSize str_size; - memcpy(&str_size, data + start + cur_field_off, sizeof(TStrSize)); - if (roff < cur_field_off + sizeof(TStrSize) + str_size) { - info.rng = { start + cur_field_off, sizeof(TStrSize) + (u64)str_size }; - u8 *buf = arena_push_array_nozero(arena, str_size + 1); - memcpy(buf, data + start + cur_field_off + sizeof(TStrSize), str_size); - buf[str_size] = 0; - String8 s = { buf, str_size }; - display_val(arena, info.desc, desc_fmt, s); - return true; - } - cur_field_off += sizeof(TStrSize) + str_size; - return false; - } - b8 range(const char *desc, u64 range_len) const { if (roff < cur_field_off + range_len) { @@ -347,9 +289,9 @@ struct Sec_Hover_Fn { else if (roff < cur_field_off + 12) { info.rng = { start + cur_field_off, 12 }; b8 ok = titled_section("List Frame", [this] { - add_to_desc("Size: %" PRIi64 " B", hover_display_val_le_abs); - add_to_desc("N Items: %u"); - return true; + return field("Size: %" PRIi64 " B", hover_display_val_le_abs) + || field_le("N Items: %u") + ; }); if (ok) return ok; @@ -365,129 +307,78 @@ struct Sec_Hover_Fn { "Leaf", "Collection", "Record", "Variant", "Unsplit" }; - if (display_grouped) { - i64 size; - memcpy(&size, data + start + cur_field_off, sizeof(size)); - u64 field_desc_len = (u64)std::abs(size); - if (roff < cur_field_off + field_desc_len) { - info.rng = { start + cur_field_off, (u64)field_desc_len }; - return titled_section("Field", [this] { - add_to_desc("Size: %" PRIi64 " B"); - add_to_desc("Field version: %u"); - add_to_desc("Type version: %u"); - add_to_desc("On-disk parent id: %u"); - add_to_desc("Field structure: %s", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 type) { - const char *name = (type >= countof(field_struct_names)) ? "Unknown" : field_struct_names[type]; - return push_str8_node_child(arena, prev, fmt, name); - }); - u16 flags = add_to_desc("Flags: 0b%b"); - - if (flags & RNTupleSerializer::kFlagRepetitiveField) - add_to_desc("N Repetitions: %" PRIu64); - if (flags & RNTupleSerializer::kFlagProjectedField) - add_to_desc("On disk proj.src id: %u"); - if (flags & RNTupleSerializer::kFlagHasTypeChecksum) - add_to_desc("Checksum: %u"); - - add_to_desc_str8("Name: %s"); - add_to_desc_str8("Type Name: %s"); - add_to_desc_str8("Type Alias: %s"); - add_to_desc_str8("Description: %s"); - + i64 size; + memcpy(&size, data + start + cur_field_off, sizeof(size)); + u64 field_desc_len = (u64)std::abs(size); + if (roff < cur_field_off + field_desc_len) { + info.rng = { start + cur_field_off, (u64)field_desc_len }; + return titled_section("Field", [this] { + b8 ok = field_le("Size: %" PRIi64 " B") + || field_le("Field version: %u") + || field_le("Type version: %u") + || field_le("On-disk parent id: %u") + || field("Field structure: %s", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 type) { + const char *name = (type >= countof(field_struct_names)) ? "Unknown" : field_struct_names[type]; + return push_str8_node_child(arena, prev, fmt, name); + }); + if (ok) return true; - }); - } - cur_field_off += field_desc_len; - return false; - } else { - return titled_section("Field", [this] { - u64 flags_off = start + cur_field_off + 22; - u16 flags; - memcpy(&flags, data + flags_off, sizeof(flags)); - b8 ok = field_le("Size: %" PRIi64 " B") - || field_le("Field version: %u") - || field_le("Type version: %u") - || field_le("On-disk parent id: %u") - || field("Field structure: %s", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 type) { - const char *name = (type >= countof(field_struct_names)) ? "Unknown" : field_struct_names[type]; - return push_str8_node_child(arena, prev, fmt, name); - }) - || field_le("Flags: 0b%b") - ; - if (ok) - return ok; - if ((flags & RNTupleSerializer::kFlagRepetitiveField) && field_le("N Repetitions: %" PRIu64)) - return true; - if ((flags & RNTupleSerializer::kFlagProjectedField) && field_le("On disk proj.src id: %u")) - return true; - if ((flags & RNTupleSerializer::kFlagHasTypeChecksum) && field_le("Checksum: %u")) - return true; + u16 flags; + ok = field("Flags: 0b%b", hover_display_val_le, &flags); - return field_str8("Name: %s") - || field_str8("Type Name: %s") - || field_str8("Type Alias: %s") - || field_str8("Description: %s") - ; + if (flags & RNTupleSerializer::kFlagRepetitiveField) + ok = ok || field_le("N Repetitions: %" PRIu64); + if (flags & RNTupleSerializer::kFlagProjectedField) + ok = ok || field_le("On disk proj.src id: %u"); + if (flags & RNTupleSerializer::kFlagHasTypeChecksum) + ok = ok || field_le("Checksum: %u"); + + ok = ok || field_str8("Name: %s") + || field_str8("Type Name: %s") + || field_str8("Type Alias: %s") + || field_str8("Description: %s") + ; + return ok; }); } + cur_field_off += field_desc_len; + return false; } b8 column_desc(const char *title) const { - if (display_grouped) { - i64 size; - memcpy(&size, data + start + cur_field_off, sizeof(size)); - u64 col_desc_len = (u64)std::abs(size); - if (roff < cur_field_off + col_desc_len) { - info.rng = { start + cur_field_off, col_desc_len }; - return titled_section(title, [this] { - add_to_desc("Size: %" PRIi64 " B"); - add_to_desc("Column type: %s", [](Arena *arena, String8_Node *prev, const char *fmt, u16 val) { - const char *readable_col_type = get_column_type_name(val); - return push_str8_node_child(arena, prev, fmt, readable_col_type); - }); - add_to_desc("Bits on storage: %u"); - add_to_desc("Field ID: %u"); - u16 flags = add_to_desc("Flags: 0b%b"); - add_to_desc("Representation idx: %u"); - if (flags & RNTupleSerializer::kFlagDeferredColumn) - add_to_desc("First element: %" PRIu64); - // if (flags & RNTupleSerializer::kFlagHasValueRange) { - // add_to_desc("Value Min: %f"); - // add_to_desc("Value Max: %f"); - // } - return true; - }); - } - cur_field_off += col_desc_len; - return false; - } else { - return titled_section("Column", [this] { - u64 flags_off = start + cur_field_off + 16; - u16 flags; - memcpy(&flags, data + flags_off, sizeof(flags)); + i64 size; + memcpy(&size, data + start + cur_field_off, sizeof(size)); + u64 col_desc_len = (u64)std::abs(size); + if (roff < cur_field_off + col_desc_len) { + info.rng = { start + cur_field_off, col_desc_len }; + return titled_section(title, [this] { b8 ok = field_le("Size: %" PRIi64 " B") - || field("Column Type: %s", [](Arena *arena, String8_Node *prev, const char *fmt, u16 val) { + || field("Column type: %s", [](Arena *arena, String8_Node *prev, const char *fmt, u16 val) { const char *readable_col_type = get_column_type_name(val); return push_str8_node_child(arena, prev, fmt, readable_col_type); - }) + }) || field_le("Bits on storage: %u") || field_le("Field ID: %u") - || field_le("Flags: 0b%b") - || field_le("Representation idx: %u") - ; + ; if (ok) - return ok; - - if ((flags & RNTupleSerializer::kFlagDeferredColumn) && field_le("First element: %" PRIu64)) return true; - // if ((flags & RNTupleSerializer::kFlagHasValueRange) && (field_le("Value Min: %f") || field_le("Value Max: %f"))) - // return true; - return false; - }); + u16 flags; + ok = field("Flags: 0b%b", hover_display_val_le, &flags); + ok = ok || field_le("Representation idx: %u"); + if (flags & RNTupleSerializer::kFlagDeferredColumn) + ok = ok || field_le("First element: %" PRIu64); + if (flags & RNTupleSerializer::kFlagHasValueRange) { + ok = ok || field_le("Value Min: %f") + || field_le("Value Max: %f"); + } + return ok; + }); } + cur_field_off += col_desc_len; + return false; } template @@ -798,3 +689,4 @@ Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, co return info; } + diff --git a/src/rntuple.cpp b/src/rntuple.cpp index 09e5c92..32b0953 100644 --- a/src/rntuple.cpp +++ b/src/rntuple.cpp @@ -93,7 +93,44 @@ String8 build_fully_qualified_field_name(Arena *arena, const ROOT::Experimental: } return str; } - + +internal +const char *get_column_type_name(u16 type) +{ + switch (type) { + case 0x01: return "Index64"; + case 0x02: return "Index32"; + case 0x03: return "Switch"; + case 0x04: return "Byte"; + case 0x05: return "Char"; + case 0x06: return "Bit"; + case 0x07: return "Real64"; + case 0x08: return "Real32"; + case 0x09: return "Real16"; + case 0x16: return "Int64"; + case 0x0A: return "UInt64"; + case 0x17: return "Int32"; + case 0x0B: return "UInt32"; + case 0x18: return "Int16"; + case 0x0C: return "UInt16"; + case 0x19: return "Int8"; + case 0x0D: return "UInt8"; + case 0x0E: return "SplitIndex64"; + case 0x0F: return "SplitIndex32"; + case 0x10: return "SplitReal64"; + case 0x11: return "SplitReal32"; + case 0x1A: return "SplitInt64"; + case 0x13: return "SplitUInt64"; + case 0x1B: return "SplitInt32"; + case 0x14: return "SplitUInt32"; + case 0x1C: return "SplitInt16"; + case 0x15: return "SplitUInt16"; + case 0x1D: return "Real32Trunc"; + case 0x1E: return "Real32Quant"; + default: return "Unknown"; + } +} + internal void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTuple_File_Info &info, RNTuple_Data &rndata) { @@ -132,9 +169,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl Cluster_Info *clusters = arena_push_array(arena, descriptor.GetNActiveClusters()); - // @ForeignAlloc: currently using directly the RColumnElementBase API which forces foreign allocations. - // If we don't want to have this we could create our own function to retrieve a column type name. - std::string elem_type_name; + const char *elem_type_name; // gather clusters and pages metadata for (const RClusterDescriptor &cluster_desc : descriptor.GetClusterIterable()) { @@ -143,7 +178,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl for (const RClusterDescriptor::RColumnRange &col_range : cluster_desc.GetColumnRangeIterable()) { const auto &col_descriptor = descriptor.GetColumnDescriptor(col_range.fPhysicalColumnId); - elem_type_name = RColumnElementBase::GetTypeName(col_descriptor.GetType()); // @ForeignAlloc + elem_type_name = get_column_type_name((u16)col_descriptor.GetType()); const auto &field_desc = descriptor.GetFieldDescriptor(col_descriptor.GetFieldId()); const String8 owner_field_name = build_fully_qualified_field_name(arena, descriptor, &field_desc); @@ -159,7 +194,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl // If in the future we get RNTuples with more than 4B clusters we can just change the type to u64. assert(cluster_desc.GetId() <= UINT_MAX); pinfo->cluster_id = cluster_desc.GetId(); - pinfo->elem_type_name = push_str8f(arena, "%s", elem_type_name.c_str()); + pinfo->elem_type_name = push_str8f(arena, "%s", elem_type_name); pinfo->owner_field_name = owner_field_name; pinfo->bits_per_elem = col_descriptor.GetBitsOnStorage(); diff --git a/src/root/root_inc.h b/src/root/root_inc.h index 55a2098..237c1f9 100644 --- a/src/root/root_inc.h +++ b/src/root/root_inc.h @@ -2,3 +2,6 @@ #include #include #include + +using ROOT::Experimental::Internal::RNTupleSerializer; +