rntviewer/src/rntuple.cpp

829 lines
32 KiB
C++
Raw Normal View History

2024-07-11 12:00:43 +00:00
internal
2024-07-12 09:58:55 +00:00
String8 rntuple_description(Arena *arena, const RNTuple_Data &ntuple)
2024-07-11 12:00:43 +00:00
{
String8 desc = push_str8f(arena, "version %u.%u.%u.%u",
ntuple.version.epoch,
ntuple.version.major,
ntuple.version.minor,
ntuple.version.patch);
2024-07-11 12:00:43 +00:00
return desc;
}
internal
2024-07-12 13:56:04 +00:00
ROOT::Experimental::RNTupleDescriptor create_descriptor(Arena *arena, RMicroFileReader &reader, const RNTuple_File_Info &info)
2024-07-11 12:00:43 +00:00
{
2024-07-12 09:58:55 +00:00
using namespace ROOT::Experimental;
using namespace ROOT::Experimental::Internal;
Temp scratch = scratch_begin(&arena, 1);
defer { scratch_end(scratch); };
2024-07-12 09:58:55 +00:00
const RNTuple_Anchor &anchor = info.anchor;
// Read compressed header+footer
2024-07-26 06:39:00 +00:00
u8 *header_zip = arena_push_array_nozero<u8>(scratch.arena, anchor.fNBytesHeader);
u8 *footer_zip = arena_push_array_nozero<u8>(scratch.arena, anchor.fNBytesFooter);
2024-07-12 09:58:55 +00:00
reader.ReadBuffer(header_zip, anchor.fNBytesHeader, anchor.fSeekHeader);
reader.ReadBuffer(footer_zip, anchor.fNBytesFooter, anchor.fSeekFooter);
// Decompress header+footer
2024-07-26 06:39:00 +00:00
u8 *header = arena_push_array_nozero<u8>(scratch.arena, anchor.fLenHeader);
u8 *footer = arena_push_array_nozero<u8>(scratch.arena, anchor.fLenFooter);
2024-07-12 09:58:55 +00:00
RNTupleDecompressor::Unzip(header_zip, anchor.fNBytesHeader, anchor.fLenHeader, header);
RNTupleDecompressor::Unzip(footer_zip, anchor.fNBytesFooter, anchor.fLenFooter, footer);
// Deserialize header+footer
RNTupleDescriptorBuilder desc_builder;
RNTupleSerializer::DeserializeHeader(header, anchor.fLenHeader, desc_builder);
RNTupleSerializer::DeserializeFooter(footer, anchor.fLenFooter, desc_builder);
RNTupleDescriptor descriptor = desc_builder.MoveDescriptor();
for (const RClusterGroupDescriptor &cgdesc : descriptor.GetClusterGroupIterable()) {
u64 arena_start = arena_pos(scratch.arena);
// Read page list
u64 page_list_zip_size = cgdesc.GetPageListLocator().fBytesOnStorage;
u64 page_list_seek = cgdesc.GetPageListLocator().GetPosition<u64>();
2024-07-26 06:39:00 +00:00
u8 *page_list_zip = arena_push_array_nozero<u8>(scratch.arena, page_list_zip_size);
2024-07-12 09:58:55 +00:00
reader.ReadBuffer(page_list_zip, page_list_zip_size, page_list_seek);
// Decompress page list
u64 page_list_len = cgdesc.GetPageListLength();
2024-07-26 06:39:00 +00:00
u8 *page_list = arena_push_array_nozero<u8>(scratch.arena, page_list_len);
2024-07-12 09:58:55 +00:00
RNTupleDecompressor::Unzip(page_list_zip, page_list_zip_size, page_list_len, page_list);
// Deserialize page list
DescriptorId_t cluster_grpid = cgdesc.GetId();
RNTupleSerializer::DeserializePageList(page_list, page_list_len, cluster_grpid, descriptor);
arena_pop_to(scratch.arena, arena_start);
}
2024-07-12 13:56:04 +00:00
return descriptor;
}
internal
2024-07-18 13:32:32 +00:00
void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTuple_File_Info &info, RNTuple_Data &rndata)
2024-07-12 13:56:04 +00:00
{
using namespace ROOT::Experimental;
using namespace ROOT::Experimental::Internal;
RNTupleDescriptor descriptor = create_descriptor(arena, reader, info);
2024-07-16 12:34:51 +00:00
// gather cluster groups metadata
Cluster_Group_Info *cluster_groups = arena_push_array_nozero<Cluster_Group_Info>(arena, descriptor.GetNClusterGroups());
u64 tot_page_list_size = 0;
u64 cg_idx = 0;
for (const RClusterGroupDescriptor &cg_desc : descriptor.GetClusterGroupIterable()) {
Cluster_Group_Info &cg_info = cluster_groups[cg_idx++];
// Page list locator
RNTupleLocator plist_locator = cg_desc.GetPageListLocator();
cg_info.rng_page_list.start = plist_locator.GetPosition<u64>();
cg_info.rng_page_list.len = plist_locator.fBytesOnStorage;
tot_page_list_size += plist_locator.fBytesOnStorage;
}
fprintf(stderr, "Loading pages...\n");
2024-07-12 16:29:35 +00:00
u64 n_pages = 0;
u64 n_elems = 0;
2024-07-16 12:34:51 +00:00
u64 tot_page_size = 0;
2024-07-12 16:29:35 +00:00
Page_Info_Node *pinfo_head = nullptr, *pinfo_tail = nullptr;
2024-07-15 13:54:22 +00:00
Page_Info_Node *last_inserted_pinfo = nullptr;
2024-07-16 12:34:51 +00:00
u64 n_clusters = 0;
2024-07-15 13:54:22 +00:00
chr::time_point start_t = chr::high_resolution_clock::now();
2024-07-16 12:34:51 +00:00
Cluster_Info_Node *clusters = arena_push_array<Cluster_Info_Node>(arena, descriptor.GetNActiveClusters());
2024-07-16 12:34:51 +00:00
// gather clusters and pages metadata
2024-07-12 16:29:35 +00:00
for (const RClusterDescriptor &cluster_desc : descriptor.GetClusterIterable()) {
2024-07-16 12:34:51 +00:00
++n_clusters;
2024-07-16 12:37:26 +00:00
for (const RClusterDescriptor::RColumnRange &col_range : cluster_desc.GetColumnRangeIterable()) {
2024-07-15 09:29:32 +00:00
// insert page infos sorted by byte range
2024-07-15 09:54:45 +00:00
const auto &page_range = cluster_desc.GetPageRange(col_range.fPhysicalColumnId);
2024-07-12 13:56:04 +00:00
for (const auto &page_info : page_range.fPageInfos) {
const u64 checksum_size = sizeof(u64);
2024-07-12 16:29:35 +00:00
Page_Info_Node *pinfo = arena_push<Page_Info_Node>(arena);
pinfo->range.start = page_info.fLocator.GetPosition<u64>();
2024-07-16 10:04:38 +00:00
pinfo->range.len = page_info.fLocator.fBytesOnStorage + (page_info.fHasChecksum) * checksum_size;
pinfo->n_elems = page_info.fHasChecksum ? -page_info.fNElements : page_info.fNElements;
2024-07-19 14:31:48 +00:00
pinfo->cluster_id = cluster_desc.GetId();
2024-07-15 09:29:32 +00:00
Cluster_Info_Node &cluster = clusters[pinfo->cluster_id];
if (!cluster.first_page || pinfo->range.start < cluster.first_page->range.start) {
cluster.first_page = pinfo;
}
if (UNLIKELY(!pinfo_head)) {
2024-07-15 09:29:32 +00:00
// first node inserted
2024-07-12 16:29:35 +00:00
assert(!pinfo_tail);
pinfo_head = pinfo_tail = pinfo;
2024-07-15 09:29:32 +00:00
} else if (pinfo->range.start >= pinfo_tail->range.end()) {
// after tail
pinfo_tail->next = pinfo;
2024-07-26 13:50:19 +00:00
pinfo->prev = pinfo_tail;
2024-07-15 09:29:32 +00:00
pinfo_tail = pinfo;
} else if (pinfo->range.end() <= pinfo_head->range.start) {
// before head
pinfo->next = pinfo_head;
2024-07-26 13:50:19 +00:00
pinfo_head->prev = pinfo;
2024-07-15 09:29:32 +00:00
pinfo_head = pinfo;
2024-07-26 13:50:19 +00:00
} else {
// Very commonly pages are already sorted either in increasing or decreasing order.
// By starting to look from the last inserted page we are very likely to find the
// proper slot immediately.
2024-07-29 21:36:27 +00:00
[[maybe_unused]] b8 inserted = false;
2024-07-26 13:50:19 +00:00
b8 pinfo_is_after_last = pinfo->range.start >= last_inserted_pinfo->range.end();
if (pinfo_is_after_last) {
for (Page_Info_Node *node = last_inserted_pinfo->next; node; node = node->next) {
// check if `pinfo` fits right before the node we're looking at
if (pinfo->range.end() <= node->range.start) {
Page_Info_Node *prev = node->prev;
if (UNLIKELY(!prev) || prev->range.end() <= pinfo->range.start) {
if (LIKELY(prev)) {
prev->next = pinfo;
pinfo->prev = prev;
}
node->prev = pinfo;
2024-07-26 13:50:19 +00:00
pinfo->next = node;
inserted = true;
break;
}
}
}
} else {
for (Page_Info_Node *node = last_inserted_pinfo; node; node = node->prev) {
// check if `pinfo` fits right before the node we're looking at
if (pinfo->range.end() <= node->range.start) {
Page_Info_Node *prev = node->prev;
if (UNLIKELY(!prev) || prev->range.end() <= pinfo->range.start) {
if (LIKELY(prev)) {
prev->next = pinfo;
pinfo->prev = prev;
}
node->prev = pinfo;
2024-07-26 13:50:19 +00:00
pinfo->next = node;
inserted = true;
break;
}
}
}
2024-07-15 09:29:32 +00:00
}
2024-07-26 13:50:19 +00:00
assert(inserted);
2024-07-12 16:29:35 +00:00
}
2024-07-15 09:29:32 +00:00
2024-07-15 13:54:22 +00:00
last_inserted_pinfo = pinfo;
2024-07-12 16:29:35 +00:00
++n_pages;
2024-07-16 12:34:51 +00:00
tot_page_size += pinfo->range.len;
2024-07-12 16:29:35 +00:00
n_elems += page_info.fNElements;
2024-07-12 13:56:04 +00:00
}
}
}
2024-07-12 16:29:35 +00:00
2024-07-15 13:54:22 +00:00
chr::time_point end_t = chr::high_resolution_clock::now();
u64 time_spent_ms = chr::duration_cast<chr::milliseconds>(end_t - start_t).count();
2024-07-26 13:51:04 +00:00
fprintf(stderr, "Loaded %lu pages in %lu ms.\nGenerating groups...\n", n_pages, time_spent_ms);
2024-07-12 16:29:35 +00:00
2024-07-15 09:54:45 +00:00
// Create page groups and chunks.
2024-07-15 09:29:32 +00:00
// Each page group is a grouping of GROUP_SIZE page infos whose range is equal to the combined ranges
// of its components. It is an acceleration structure used to more quickly find the correct page info
// that an offset belongs to.
2024-07-16 12:34:51 +00:00
// A page chunk is a grouping of adjacent pages, used to quickly determine if an offset is part
2024-07-15 09:54:45 +00:00
// of a page or not.
assert(pinfo_head);
const u64 GROUP_SIZE = 500;
2024-07-12 16:29:35 +00:00
Page_Info_Group *groups = arena_push_array_nozero<Page_Info_Group>(arena, n_pages / GROUP_SIZE + 1);
u64 n_groups = 1;
groups->first = pinfo_head;
groups->range.start = pinfo_head->range.start;
2024-07-15 13:54:22 +00:00
Page_Info_Chunk *chunks_head = arena_push<Page_Info_Chunk>(arena);
Page_Info_Chunk *chunks_tail = chunks_head;
chunks_head->range = pinfo_head->range;
u64 n_chunks = 1;
2024-07-15 09:29:32 +00:00
u64 idx = 1;
2024-07-26 13:50:19 +00:00
[[maybe_unused]] Page_Info_Node *prev = pinfo_head;
for (Page_Info_Node *pinfo = pinfo_head->next; pinfo; pinfo = pinfo->next) {
2024-07-26 13:50:19 +00:00
assert(prev->range.end() <= pinfo->range.start);
prev = pinfo;
if (pinfo->range.start != chunks_tail->range.end()) {
// close current chunk and open new one
2024-07-15 09:54:45 +00:00
Page_Info_Chunk *chunk = arena_push<Page_Info_Chunk>(arena);
chunk->range.start = pinfo->range.start;
2024-07-16 09:21:09 +00:00
chunk->first_group = n_groups - 1;
2024-07-15 09:54:45 +00:00
chunks_tail->next = chunk;
chunks_tail = chunk;
++n_chunks;
2024-07-12 16:29:35 +00:00
}
chunks_tail->range.len += pinfo->range.len;
if (idx++ % GROUP_SIZE != 0)
continue;
// Create a new group every GROUP_SIZE page infos
Page_Info_Group &cur_group = groups[n_groups];
cur_group.first = pinfo;
cur_group.range.start = pinfo->range.start;
Page_Info_Group &prev_group = groups[n_groups - 1];
prev_group.range.len = cur_group.range.start - prev_group.range.start;
2024-07-15 09:54:45 +00:00
++n_groups;
2024-07-12 16:29:35 +00:00
}
2024-07-15 13:54:22 +00:00
// verify that we added all pages to chunks
assert(idx == n_pages);
Page_Info_Group &last_group = groups[n_groups - 1];
last_group.range.len = pinfo_tail->range.end() - last_group.range.start;
2024-07-15 09:29:32 +00:00
fprintf(stderr, "Generated %lu groups and %lu chunks.\n", n_groups, n_chunks);
2024-07-12 16:29:35 +00:00
2024-07-15 13:54:22 +00:00
assert(!chunks_tail->next);
assert(!pinfo_tail->next);
2024-07-12 16:29:35 +00:00
rndata.pages = pinfo_head;
rndata.page_groups = groups;
rndata.n_page_groups = n_groups;
2024-07-15 09:29:32 +00:00
rndata.page_chunks = chunks_head;
rndata.n_page_chunks = n_chunks;
2024-07-12 16:29:35 +00:00
rndata.n_pages = n_pages;
rndata.n_elems = n_elems;
2024-07-16 12:34:51 +00:00
rndata.tot_page_size = tot_page_size;
rndata.cluster_groups = cluster_groups;
rndata.n_cluster_groups = cg_idx;
rndata.tot_page_list_size = tot_page_list_size;
rndata.clusters = clusters;
2024-07-16 12:34:51 +00:00
rndata.n_clusters = n_clusters;
2024-07-12 09:58:55 +00:00
}
internal
2024-07-29 22:03:35 +00:00
TFile_Data get_tfile_data(const Inspected_File &file, String8 ntpl_name)
2024-07-18 13:32:32 +00:00
{
TFile_Data tfile_data{};
// parse root version
2024-07-18 15:13:04 +00:00
const u64 version_seek = 4;
2024-07-18 13:32:32 +00:00
u32 version_be;
2024-07-18 15:13:04 +00:00
memcpy(&version_be, file.mem + version_seek, sizeof(version_be));
2024-07-29 21:36:27 +00:00
u32 version = bswap(version_be);
2024-07-18 15:13:04 +00:00
b32 is_big_file = version > 1000000;
version -= is_big_file * 1000000;
2024-07-18 13:32:32 +00:00
u32 version_major = version / 10000;
u32 version_minor = (version - version_major * 10000) / 100;
u32 version_patch = (version - version_major * 10000 - version_minor * 100);
tfile_data.root_version_major = (u16)version_major;
tfile_data.root_version_minor = (u16)version_minor;
tfile_data.root_version_patch = (u16)version_patch;
2024-07-18 15:13:04 +00:00
2024-07-29 22:03:35 +00:00
Root_File_Info root_file_info = get_root_file_info(file.name.c(), ntpl_name.c(), is_big_file);
2024-07-18 15:13:04 +00:00
tfile_data.root_file_header_size = root_file_info.tfile_header_nbytes;
tfile_data.rng_root_file_obj.start = root_file_info.tfile_obj_seek;
tfile_data.rng_root_file_obj.len = root_file_info.tfile_obj_nbytes;
2024-07-18 13:56:55 +00:00
// parse compression
u32 compression_be;
memcpy(&compression_be, file.mem + root_file_info.compression_seek, sizeof(compression_be));
2024-07-29 21:36:27 +00:00
tfile_data.compression = bswap(compression_be);
2024-07-18 13:56:55 +00:00
// parse info
2024-07-18 15:13:04 +00:00
if (is_big_file) {
u64 info_seek_be;
memcpy(&info_seek_be, file.mem + root_file_info.info_seek_seek, sizeof(info_seek_be));
2024-07-29 22:03:35 +00:00
tfile_data.rng_root_file_info_header.start = bswap(info_seek_be);
2024-07-18 15:13:04 +00:00
} else {
u32 info_seek_be;
memcpy(&info_seek_be, file.mem + root_file_info.info_seek_seek, sizeof(info_seek_be));
2024-07-29 22:03:35 +00:00
tfile_data.rng_root_file_info_header.start = bswap(info_seek_be);
2024-07-18 15:13:04 +00:00
}
2024-07-18 13:56:55 +00:00
u32 info_nbytes_be;
memcpy(&info_nbytes_be, file.mem + root_file_info.info_nbytes_seek, sizeof(info_nbytes_be));
2024-07-29 22:03:35 +00:00
tfile_data.rng_root_file_info_header.len = root_file_info.info_header_nbytes;
tfile_data.rng_root_file_info.start = tfile_data.rng_root_file_info_header.end();
tfile_data.rng_root_file_info.len = bswap(info_nbytes_be) - tfile_data.rng_root_file_info_header.len;
2024-07-18 13:56:55 +00:00
2024-07-23 08:41:55 +00:00
// parse free list
2024-07-18 15:13:04 +00:00
if (is_big_file) {
u64 free_seek_be;
memcpy(&free_seek_be, file.mem + root_file_info.free_seek_seek, sizeof(free_seek_be));
2024-07-29 22:03:35 +00:00
tfile_data.rng_root_file_free_header.start = bswap(free_seek_be);
2024-07-18 15:13:04 +00:00
} else {
u32 free_seek_be;
memcpy(&free_seek_be, file.mem + root_file_info.free_seek_seek, sizeof(free_seek_be));
2024-07-29 22:03:35 +00:00
tfile_data.rng_root_file_free_header.start = bswap(free_seek_be);
2024-07-18 15:13:04 +00:00
}
2024-07-18 13:56:55 +00:00
u32 free_nbytes_be;
memcpy(&free_nbytes_be, file.mem + root_file_info.free_nbytes_seek, sizeof(free_nbytes_be));
2024-07-29 22:03:35 +00:00
// FIXME
// tfile_data.rng_root_file_free_header.len = root_file_info.free_header_nbytes;
tfile_data.rng_root_file_free.start = tfile_data.rng_root_file_free_header.end();
tfile_data.rng_root_file_free.len = bswap(free_nbytes_be) - tfile_data.rng_root_file_free_header.len;
2024-07-18 13:32:32 +00:00
return tfile_data;
}
internal
RNTuple_Data get_rntuple_data(Arena *arena, const Inspected_File &file, String8 ntpl_name)
2024-07-12 09:58:55 +00:00
{
2024-07-18 13:32:32 +00:00
RNTuple_Data rndata {};
2024-07-11 12:00:43 +00:00
2024-07-11 14:29:44 +00:00
// TODO: proper error handling
2024-07-18 13:32:32 +00:00
RMicroFileReader file_reader { file.name.c() };
RNTuple_File_Info file_info = file_reader.GetNTupleProper(ntpl_name.c());
2024-07-12 07:53:01 +00:00
if (!file_info.failed) {
2024-07-12 09:58:55 +00:00
rndata.version.epoch = file_info.anchor.fVersionEpoch;
rndata.version.major = file_info.anchor.fVersionMajor;
rndata.version.minor = file_info.anchor.fVersionMinor;
rndata.version.patch = file_info.anchor.fVersionPatch;
rndata.rng_header.start = file_info.anchor.fSeekHeader;
rndata.rng_header.len = file_info.anchor.fNBytesHeader;
rndata.rng_footer.start = file_info.anchor.fSeekFooter;
rndata.rng_footer.len = file_info.anchor.fNBytesFooter;
rndata.rng_anchor.start = file_info.anchor_seek;
rndata.rng_anchor.len = file_info.anchor_nbytes;
rndata.rng_anchor_key.start = file_info.anchor_key_seek;
rndata.rng_anchor_key.len = file_info.anchor_key_nbytes;
rndata.rblob_header_size = file_info.rblob_key_header_nbytes;
2024-07-23 08:41:55 +00:00
rndata.rng_tkeys_list.start = file_info.tkeys_list_seek;
rndata.rng_tkeys_list.len = file_info.tkeys_list_nbytes;
2024-07-12 09:58:55 +00:00
2024-07-18 13:32:32 +00:00
gather_ntuple_metadata(arena, file_reader, file_info, rndata);
2024-07-11 14:29:44 +00:00
}
2024-07-12 09:58:55 +00:00
return rndata;
2024-07-11 12:00:43 +00:00
}
2024-07-12 09:58:55 +00:00
2024-07-25 16:23:58 +00:00
internal
Byte_Range get_section_range(const App_State &app, Section_Id sec)
{
switch (sec) {
default: return { 0, 0 };
2024-07-29 21:36:27 +00:00
case Sec_TFile_Header: return { 0, app.tfile_data.root_file_header_size };
2024-07-25 16:23:58 +00:00
case Sec_TFile_Object: return app.tfile_data.rng_root_file_obj;
case Sec_TFile_Info: return app.tfile_data.rng_root_file_info;
case Sec_TFile_FreeList: return app.tfile_data.rng_root_file_free;
case Sec_TKey_List: return app.rndata.rng_tkeys_list;
case Sec_RNTuple_Anchor: return app.rndata.rng_anchor;
case Sec_RNTuple_Header: return app.rndata.rng_header;
case Sec_RNTuple_Footer: return app.rndata.rng_footer;
}
}
internal
Section find_section(App_State &app, u64 off, i64 hilite_cluster = -1)
{
const RNTuple_Data &rdata = app.rndata;
const TFile_Data &tdata = app.tfile_data;
u64 rblob_sz = rdata.rblob_header_size; // @Incomplete
b8 hilite = false;
2024-07-29 22:03:35 +00:00
// TFile starting sections
2024-07-29 21:36:27 +00:00
u64 root_header_padding = tdata.rng_root_file_obj.start - tdata.root_file_header_size;
2024-07-29 22:03:35 +00:00
if (off < tdata.rng_root_file_obj.start)
2024-07-30 08:47:13 +00:00
return { Sec_TFile_Header, { 0, tdata.root_file_header_size + root_header_padding }, 0, root_header_padding, hilite };
2024-07-29 22:03:35 +00:00
if (tdata.rng_root_file_obj.start <= off && off < tdata.rng_root_file_obj.end())
return { Sec_TFile_Object, tdata.rng_root_file_obj, 0, 0, hilite };
// TFile ending sections
if (tdata.rng_root_file_info_header.start <= off && off < tdata.rng_root_file_info.end())
return { Sec_TFile_Info, tdata.rng_root_file_info, tdata.rng_root_file_info_header.len, 0, hilite };
if (tdata.rng_root_file_free_header.start <= off && off < tdata.rng_root_file_free.end()) {
2024-07-30 08:47:13 +00:00
// printf("%lu\n", tdata.rng_root_file_free_header.len);
2024-07-29 22:03:35 +00:00
return { Sec_TFile_FreeList, tdata.rng_root_file_free, tdata.rng_root_file_free_header.len, 0, hilite };
}
2024-07-25 16:23:58 +00:00
/// Handle pages
{
// fast case: `off` is in the same page info as previous `off`.
2024-07-29 21:36:27 +00:00
if (app.last_pinfo->range.start - rblob_sz < off && off < app.last_pinfo->range.end()) {
2024-07-25 16:23:58 +00:00
hilite = hilite_cluster >= 0 && app.last_pinfo->cluster_id == (u64)hilite_cluster;
2024-07-29 21:36:27 +00:00
return { Sec_Page, app.last_pinfo->range, rblob_sz, app.last_pinfo->checksum_size(), hilite };
2024-07-25 16:23:58 +00:00
}
// still fast case: `off is in the next page info as the previous.
if (app.last_pinfo->next) // don't check if it's checksum, since it's the first byte of the page
app.last_pinfo = app.last_pinfo->next;
2024-07-29 21:36:27 +00:00
if (app.last_pinfo && app.last_pinfo->range.start - rblob_sz <= off && off < app.last_pinfo->range.end()) {
2024-07-25 16:23:58 +00:00
hilite = hilite_cluster >= 0 && app.last_pinfo->cluster_id == (u64)hilite_cluster;
2024-07-29 21:36:27 +00:00
return { Sec_Page, app.last_pinfo->range, rblob_sz, app.last_pinfo->checksum_size(), hilite };
2024-07-25 16:23:58 +00:00
}
}
if (rdata.rng_anchor_key.start <= off && off < rdata.rng_anchor.end())
2024-07-29 21:36:27 +00:00
return { Sec_RNTuple_Anchor, rdata.rng_anchor, rdata.rng_anchor_key.len, 8, hilite };
2024-07-25 16:23:58 +00:00
if (rdata.rng_header.start - rblob_sz <= off && off < rdata.rng_header.end())
2024-07-29 21:36:27 +00:00
return { Sec_RNTuple_Header, rdata.rng_header, rblob_sz, 8, hilite };
2024-07-25 16:23:58 +00:00
if (rdata.rng_footer.start - rblob_sz <= off && off < rdata.rng_footer.end())
2024-07-29 21:36:27 +00:00
return { Sec_RNTuple_Footer, rdata.rng_footer, rblob_sz, 8, hilite };
2024-07-25 16:23:58 +00:00
if (rdata.rng_tkeys_list.start <= off && off < rdata.rng_tkeys_list.end())
2024-07-29 21:36:27 +00:00
return { Sec_TKey_List, rdata.rng_tkeys_list, rblob_sz, 0, hilite };
2024-07-25 16:23:58 +00:00
// @Speed
for (u64 cg_idx = 0; cg_idx < rdata.n_cluster_groups; ++cg_idx) {
Cluster_Group_Info &cg_info = rdata.cluster_groups[cg_idx];
if (cg_info.rng_page_list.start - rblob_sz <= off && off < cg_info.rng_page_list.end())
2024-07-29 21:36:27 +00:00
return { Sec_Page_List, cg_info.rng_page_list, rblob_sz, 8, hilite };
2024-07-25 16:23:58 +00:00
}
// Slow page group lookup, ideally only done once per render when last_pinfo is invalid.
for (Page_Info_Chunk *chunk = rdata.page_chunks; chunk; chunk = chunk->next) {
// If we're at the start of a chunk, return a fake Sec_Page used to highlight the RBlob header bytes.
if (chunk->range.start - rblob_sz <= off && off < chunk->range.start)
2024-07-29 21:36:27 +00:00
return { Sec_Page, { chunk->range.start, 0 }, rblob_sz, 0, hilite };
2024-07-25 16:23:58 +00:00
if (chunk->range.start <= off && off < chunk->range.end()) {
for (u64 group_idx = chunk->first_group; group_idx < rdata.n_page_groups; ++group_idx) {
const Page_Info_Group &group = rdata.page_groups[group_idx];
if (off < group.range.start || off >= group.range.end())
continue;
for (Page_Info_Node *pinfo = group.first; pinfo; pinfo = pinfo->next) {
if (pinfo->range.start <= off && off < pinfo->range.end()) {
app.last_pinfo = pinfo;
hilite = hilite_cluster >= 0 && pinfo->cluster_id == (u64)hilite_cluster;
2024-07-29 21:36:27 +00:00
return { Sec_Page, pinfo->range, rblob_sz, pinfo->checksum_size(), hilite };
2024-07-25 16:23:58 +00:00
}
}
}
fprintf(stderr, "Offset 0x%lX is in chunk 0x%lX - 0x%lX, but found in no page_info range!\n",
off, chunk->range.start, chunk->range.end());
assert(false);
}
}
return {};
}
2024-07-29 09:59:56 +00:00
struct Sec_Hover_Info {
Byte_Range rng;
2024-08-02 07:45:37 +00:00
// A string tree where children are more indented than parents
2024-07-29 09:59:56 +00:00
String8_Node *desc;
};
2024-07-29 21:36:27 +00:00
template <typename T> T bswap_if_needed(T x) { return x; }
template <> u16 bswap_if_needed(u16 x) { return bswap(x); }
template <> u32 bswap_if_needed(u32 x) { return bswap(x); }
template <> u64 bswap_if_needed(u64 x) { return bswap(x); }
2024-07-29 15:45:07 +00:00
template <typename T>
2024-07-29 21:36:27 +00:00
String8_Node *hover_display_val_be(Arena *arena, String8_Node *prev, const char *fmt, T val)
2024-07-29 15:45:07 +00:00
{
2024-07-29 21:36:27 +00:00
val = bswap_if_needed(val);
2024-08-02 07:45:37 +00:00
return push_str8_node_child(arena, prev, fmt, val);
2024-07-29 15:45:07 +00:00
}
2024-07-29 21:01:11 +00:00
template <>
2024-07-29 21:36:27 +00:00
String8_Node *hover_display_val_be(Arena *arena, String8_Node *prev, const char *fmt, String8 val)
2024-07-29 21:01:11 +00:00
{
2024-08-02 07:45:37 +00:00
return push_str8_node_child(arena, prev, fmt, val.c());
2024-07-29 21:01:11 +00:00
}
2024-07-29 21:36:27 +00:00
template <typename T>
String8_Node *hover_display_val_le(Arena *arena, String8_Node *prev, const char *fmt, T val)
{
2024-08-02 07:45:37 +00:00
return push_str8_node_child(arena, prev, fmt, val);
2024-07-29 21:36:27 +00:00
}
2024-07-29 15:45:07 +00:00
internal
String8_Node *hover_display_datetime_str(Arena *arena, String8_Node *prev, const char *fmt_pre, u32 datetime)
{
2024-07-29 21:36:27 +00:00
datetime = bswap(datetime);
2024-07-29 15:45:07 +00:00
// datetime:
// year (6b) | month (4b) | day (5b) | hour (5b) | min (6b) | sec (6b)
2024-07-29 21:01:11 +00:00
u32 year = (datetime >> 26) + 1995;
2024-07-29 15:45:07 +00:00
u32 month = ((datetime & 0x3ff'ffff) >> 22) - 1;
u32 day = (datetime & 0x3f'ffff) >> 17;
u32 hour = (datetime & 0x1'ffff) >> 12;
u32 min = (datetime & 0xfff) >> 6;
u32 sec = datetime & 0x3f;
2024-08-02 07:45:37 +00:00
return push_str8_node_child(arena, prev, "%s%u/%02u/%02u %02u:%02u:%02u", fmt_pre, year, month, day, hour, min, sec);
}
internal
String8_Node *display_val_rootzip(Arena *arena, String8_Node *prev, const char *fmt, const u8 *src)
{
const u8 Z_DEFLATED = 8;
String8 zip_method;
if (src[0] == 'Z' && src[1] == 'L' && src[2] == Z_DEFLATED) {
zip_method = str8("ZLIB");
}
if (src[0] == 'C' && src[1] == 'S' && src[2] == Z_DEFLATED) {
zip_method = str8("Old");
}
if (src[0] == 'X' && src[1] == 'Z' && src[2] == 0) {
zip_method = str8("LZMA");
}
if (src[0] == 'L' && src[1] == '4') {
zip_method = str8("LZ4");
}
if (src[0] == 'Z' && src[1] == 'S' && src[2] == 1) {
zip_method = str8("ZSTD");
}
u32 comp_size = src[3] | (src[4] << 8) | (src[5] << 16);
u32 uncomp_size = src[6] | (src[7] << 8) | (src[8] << 16);
String8_Node *sn = push_str8_node_child(arena, prev, "%s", fmt);
sn = push_str8_node_child(arena, sn, "Zip method: %s", zip_method.c());
sn = push_str8_node(arena, sn, "Compressed size: %s", to_pretty_size(arena, comp_size).c());
sn = push_str8_node(arena, sn, "Uncompressed size: %s", to_pretty_size(arena, uncomp_size).c());
return sn;
2024-07-29 15:45:07 +00:00
}
2024-07-29 21:36:27 +00:00
// Functor used by get_section_hover_info to describe the structure of a section and print data about it.
2024-07-29 15:45:07 +00:00
struct Try_Sec_Hover_Fn {
u64 start;
u64 roff;
const u8 *data;
Arena *arena;
Sec_Hover_Info &info;
u64 &cur_field_off;
template <typename TField_Type>
2024-08-02 07:45:37 +00:00
b8 field(const char *desc_fmt,
String8_Node *(*display_val)(Arena *, String8_Node *, const char *, TField_Type) = hover_display_val_be<TField_Type>
) const
2024-07-29 15:45:07 +00:00
{
u64 field_len = sizeof(TField_Type);
if (roff < cur_field_off + field_len) {
info.rng = { start + cur_field_off, field_len };
TField_Type val;
memcpy(&val, data + info.rng.start, info.rng.len);
display_val(arena, info.desc, desc_fmt, val);
return true;
}
cur_field_off += field_len;
return false;
}
2024-07-29 21:01:11 +00:00
template <>
2024-08-02 07:45:37 +00:00
b8 field<String8>(const char *desc_fmt,
String8_Node *(*display_val)(Arena *, String8_Node *, const char *, String8)
) const
2024-07-29 21:01:11 +00:00
{
u8 str_size = data[start + cur_field_off];
if (roff < cur_field_off + 1 + str_size) {
info.rng = { start + cur_field_off, 1 + (u64)str_size };
u8 *buf = arena_push_array_nozero<u8>(arena, str_size + 1);
memcpy(buf, data + start + cur_field_off + 1, str_size);
buf[str_size] = 0;
String8 s = { buf, str_size };
display_val(arena, info.desc, desc_fmt, s);
return true;
}
cur_field_off += 1 + str_size;
return false;
}
2024-08-02 07:45:37 +00:00
b8 range(const char *desc, u64 range_len) const
{
if (roff < cur_field_off + range_len) {
info.rng = { start + cur_field_off, range_len };
push_str8_node_child(arena, info.desc, "%s", desc);
return true;
}
cur_field_off += range_len;
return false;
}
b8 range_data(const char *desc, u64 range_len,
String8_Node *(*display_val)(Arena *, String8_Node *, const char *, const u8 *)
) const
2024-07-29 21:01:11 +00:00
{
if (roff < cur_field_off + range_len) {
info.rng = { start + cur_field_off, range_len };
2024-08-02 07:45:37 +00:00
display_val(arena, info.desc, desc, data + start + cur_field_off);
2024-07-29 21:01:11 +00:00
return true;
}
cur_field_off += range_len;
return false;
}
2024-07-29 15:45:07 +00:00
};
2024-07-29 21:01:11 +00:00
internal
2024-08-02 07:45:37 +00:00
b8 hover_try_key(const Try_Sec_Hover_Fn &try_sec_hover, const u8 *data, u64 start)
2024-07-29 21:01:11 +00:00
{
u16 version_be;
memcpy(&version_be, data + start + 4, sizeof(u16));
2024-07-29 21:36:27 +00:00
u32 version = bswap(version_be);
2024-07-29 21:01:11 +00:00
b8 is_big = version > 1000;
if (is_big) {
return try_sec_hover.field<u32>("NBytes: %u")
|| try_sec_hover.field<u16>("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) {
2024-07-29 21:36:27 +00:00
x = bswap(x);
2024-07-29 21:01:11 +00:00
x -= (x > 1000) * 1000;
2024-08-02 07:45:37 +00:00
return push_str8_node_child(arena, prev, fmt, x);
2024-07-29 21:01:11 +00:00
})
|| try_sec_hover.field<u32>("Obj Len: %u")
|| try_sec_hover.field<u32>("Datetime: ", hover_display_datetime_str)
|| try_sec_hover.field<u16>("Key Len: %u")
|| try_sec_hover.field<u16>("Cycle: %u")
|| try_sec_hover.field<u64>("Seek Key: 0x%lX")
|| try_sec_hover.field<u64>("Seek Pdir: 0x%lX")
2024-07-29 21:36:27 +00:00
|| try_sec_hover.field<String8>("Class Name: %s")
|| try_sec_hover.field<String8>("Obj Name: %s")
|| try_sec_hover.field<String8>("Obj Title: %s")
2024-07-29 21:01:11 +00:00
;
} else {
return try_sec_hover.field<u32>("NBytes: %u")
|| try_sec_hover.field<u16>("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) {
2024-07-29 21:36:27 +00:00
x = bswap(x);
2024-07-29 21:01:11 +00:00
x -= (x > 1000) * 1000;
2024-08-02 07:45:37 +00:00
return push_str8_node_child(arena, prev, fmt, x);
2024-07-29 21:01:11 +00:00
})
|| try_sec_hover.field<u32>("Obj Len: %u")
|| try_sec_hover.field<u32>("Datetime: ", hover_display_datetime_str)
|| try_sec_hover.field<u16>("Key Len: %u")
|| try_sec_hover.field<u16>("Cycle: %u")
|| try_sec_hover.field<u32>("Seek Key: 0x%lX")
|| try_sec_hover.field<u32>("Seek Pdir: 0x%lX")
2024-07-29 21:36:27 +00:00
|| try_sec_hover.field<String8>("Class Name: %s")
|| try_sec_hover.field<String8>("Obj Name: %s")
|| try_sec_hover.field<String8>("Obj Title: %s")
2024-07-29 21:01:11 +00:00
;
}
}
2024-08-02 07:45:37 +00:00
internal
b8 hover_try_rootzip(const Try_Sec_Hover_Fn &try_sec_hover, const u8 *data, u64 start)
{
// TODO boundary checks
return try_sec_hover.range_data("Zipped Block", 9, display_val_rootzip);
}
2024-07-29 09:59:56 +00:00
// `off` is the absolute offset into `data`.
internal
Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, const u8 *data)
{
Sec_Hover_Info info {};
2024-07-29 21:36:27 +00:00
// printf("off: 0x%lX, sec start - pre_size: (0x%lX - %lu) = 0x%lX\n", off, section.range.start, section.pre_size, section.range.start - section.pre_size);
assert(off >= section.range.start - section.pre_size);
2024-07-29 09:59:56 +00:00
2024-07-29 15:45:07 +00:00
info.desc = push_str8_node(arena, nullptr, section_names[section.id].c());
2024-07-29 21:36:27 +00:00
u64 start = section.range.start - section.pre_size;
2024-07-29 09:59:56 +00:00
u64 roff = off - start; // offset relative to `section`
2024-07-29 15:45:07 +00:00
u64 cur_field_off = 0;
Try_Sec_Hover_Fn try_sec_hover { start, roff, data, arena, info, cur_field_off };
2024-07-29 09:59:56 +00:00
2024-07-29 15:45:07 +00:00
switch (section.id) {
case Sec_RNTuple_Anchor: {
2024-07-29 21:36:27 +00:00
hover_try_key(try_sec_hover, data, start)
|| try_sec_hover.field<u32>("Object len: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u32 x) {
x = bswap(x);
2024-07-29 15:45:07 +00:00
x -= 0x4000'0000;
2024-08-02 07:45:37 +00:00
return push_str8_node_child(arena, prev, fmt, x);
2024-07-29 15:45:07 +00:00
})
2024-07-29 21:01:11 +00:00
|| try_sec_hover.field<u16>("Class version: %u")
|| try_sec_hover.field<u16>("Version Epoch: %u")
|| try_sec_hover.field<u16>("Version Major: %u")
|| try_sec_hover.field<u16>("Version Minor: %u")
|| try_sec_hover.field<u16>("Version Patch: %u")
|| try_sec_hover.field<u64>("Seek Header: 0x%lX")
2024-07-29 21:01:11 +00:00
|| try_sec_hover.field<u64>("NBytes Header: %u")
|| try_sec_hover.field<u64>("Len Header: %u")
|| try_sec_hover.field<u64>("Seek Footer: 0x%lX")
2024-07-29 21:01:11 +00:00
|| try_sec_hover.field<u64>("NBytes Footer: %u")
|| try_sec_hover.field<u64>("Len Footer: %u")
|| try_sec_hover.field<u64>("Max Key Size: %u")
2024-07-29 21:36:27 +00:00
|| try_sec_hover.field<u64>("Checksum: 0x%lX", hover_display_val_le)
2024-07-29 15:45:07 +00:00
;
} break;
case Sec_TFile_Header: {
u32 root_version_be;
memcpy(&root_version_be, data + start + 4, sizeof(u32));
2024-07-29 21:36:27 +00:00
u32 root_version = bswap(root_version_be);
2024-07-29 21:01:11 +00:00
b8 is_big = root_version > 1000000;
2024-07-29 15:45:07 +00:00
if (is_big) {
2024-07-29 21:01:11 +00:00
try_sec_hover.field<u32>("ROOT magic number")
|| try_sec_hover.field<u32>("ROOT version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u32 x) {
2024-07-29 21:36:27 +00:00
x = bswap(x);
2024-07-29 15:45:07 +00:00
x -= 1000000;
2024-08-02 07:45:37 +00:00
return push_str8_node_child(arena, prev, fmt, x);
2024-07-29 15:45:07 +00:00
})
2024-07-29 21:01:11 +00:00
|| try_sec_hover.field<u32>("fBEGIN: 0x%lX")
|| try_sec_hover.field<u64>("fEND: 0x%lX")
|| try_sec_hover.field<u64>("Seek Free: 0x%lX")
|| try_sec_hover.field<u32>("NBytes Free: %u")
|| try_sec_hover.field<u32>("N Free: %u")
|| try_sec_hover.field<u32>("NBytes Name: %u")
|| try_sec_hover.field<u8>("Units: %u")
|| try_sec_hover.field<u32>("Compression: %u")
|| try_sec_hover.field<u64>("Seek Info: 0x%lX")
|| try_sec_hover.field<u32>("NBytes Info: %u")
2024-07-29 21:36:27 +00:00
|| try_sec_hover.range("Padding", section.post_size)
2024-07-29 15:45:07 +00:00
;
} else {
2024-07-29 21:01:11 +00:00
try_sec_hover.field<u32>("ROOT magic number")
|| try_sec_hover.field<u32>("ROOT version: %u")
|| try_sec_hover.field<u32>("fBEGIN: 0x%lX")
|| try_sec_hover.field<u32>("fEND: 0x%lX")
|| try_sec_hover.field<u32>("Seek Free: 0x%lX")
|| try_sec_hover.field<u32>("NBytes Free: %u")
|| try_sec_hover.field<u32>("N Free: %u")
|| try_sec_hover.field<u32>("NBytes Name: %u")
|| try_sec_hover.field<u8>("Units: %u")
|| try_sec_hover.field<u32>("Compression: %u")
|| try_sec_hover.field<u32>("Seek Info: 0x%lX")
|| try_sec_hover.field<u32>("NBytes Info: %u")
2024-07-29 21:36:27 +00:00
|| try_sec_hover.range("Padding", section.post_size)
2024-07-29 15:45:07 +00:00
;
2024-07-29 09:59:56 +00:00
}
2024-07-29 15:45:07 +00:00
} break;
case Sec_TFile_Object: {
2024-07-29 21:01:11 +00:00
if (!hover_try_key(try_sec_hover, data, start)) {
2024-07-29 21:36:27 +00:00
b8 ok = try_sec_hover.field<String8>("File Name: %s")
2024-07-29 21:01:11 +00:00
|| try_sec_hover.field<String8>("File Title: %s")
;
if (!ok) {
u16 version_be;
memcpy(&version_be, data + cur_field_off, sizeof(u16));
2024-07-29 21:36:27 +00:00
u16 version = bswap(version_be);
2024-07-29 21:01:11 +00:00
b8 is_big = version > 1000;
if (is_big) {
ok = ok || try_sec_hover.field<u16>("Version: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u16 x) {
2024-07-29 21:36:27 +00:00
x = bswap(x);
2024-07-29 21:01:11 +00:00
x -= 1000;
2024-08-02 07:45:37 +00:00
return push_str8_node_child(arena, prev, fmt, x);
2024-07-29 21:01:11 +00:00
})
|| try_sec_hover.field<u32>("Created: ", hover_display_datetime_str)
|| try_sec_hover.field<u32>("Modified: ", hover_display_datetime_str)
|| try_sec_hover.field<u32>("NBytes Key: %u")
|| try_sec_hover.field<u32>("NBytes Name: %u")
|| try_sec_hover.field<u64>("Seek Dir: 0x%lX")
|| try_sec_hover.field<u64>("Seek Parent: 0x%lX")
|| try_sec_hover.field<u64>("Seek Keys: 0x%lX")
;
} else {
ok = ok || try_sec_hover.field<u16>("Version: %u")
|| try_sec_hover.field<u32>("Created: ", hover_display_datetime_str)
|| try_sec_hover.field<u32>("Modified: ", hover_display_datetime_str)
|| try_sec_hover.field<u32>("NBytes Key: %u")
|| try_sec_hover.field<u32>("NBytes Name: %u")
|| try_sec_hover.field<u32>("Seek Dir: 0x%lX")
|| try_sec_hover.field<u32>("Seek Parent: 0x%lX")
|| try_sec_hover.field<u32>("Seek Keys: 0x%lX")
;
}
ok = ok || try_sec_hover.field<u16>("UUID Vers.Class: %u")
2024-07-29 21:36:27 +00:00
|| try_sec_hover.field<u16>("UUID: %u", hover_display_val_le)
2024-07-29 21:01:11 +00:00
|| try_sec_hover.range("Padding", 3 * sizeof(u32))
;
}
}
2024-07-29 15:45:07 +00:00
} break;
2024-07-29 21:36:27 +00:00
case Sec_RNTuple_Header:
case Sec_RNTuple_Footer:
case Sec_Page_List:
case Sec_Page: {
hover_try_key(try_sec_hover, data, start)
2024-08-02 07:45:37 +00:00
|| hover_try_rootzip(try_sec_hover, data, start)
2024-07-29 21:36:27 +00:00
|| try_sec_hover.range("Payload", section.range.len - section.post_size) // TODO: improve
|| try_sec_hover.field<u64>("Checksum: 0x%lX", hover_display_val_le)
;
} break;
2024-07-29 22:03:35 +00:00
case Sec_TFile_Info: {
hover_try_key(try_sec_hover, data, start)
|| try_sec_hover.range("Payload", section.range.len) // TODO: improve
;
} break;
case Sec_TFile_FreeList: {
hover_try_key(try_sec_hover, data, start)
|| try_sec_hover.range("Payload", section.range.len) // TODO: improve
;
} break;
2024-07-29 15:45:07 +00:00
default:;
2024-07-29 09:59:56 +00:00
}
2024-07-29 15:45:07 +00:00
2024-07-29 09:59:56 +00:00
return info;
}