From 1c54cb0d03afde0f4f2fbae50f1d5b0ee8916fbf Mon Sep 17 00:00:00 2001 From: silverweed Date: Thu, 8 Aug 2024 16:06:17 +0200 Subject: [PATCH] fix a bunch of stuff --- src/render.cpp | 1 - src/rntuple.cpp | 178 +++++++++++++++++++++++++--------- src/rntuple.h | 3 +- src/root/RMicroFileReader.cxx | 7 +- src/root/RMicroFileReader.hxx | 1 - 5 files changed, 140 insertions(+), 50 deletions(-) diff --git a/src/render.cpp b/src/render.cpp index 15e2938..0b32b56 100644 --- a/src/render.cpp +++ b/src/render.cpp @@ -372,7 +372,6 @@ void update_and_render(Arena *arena, App_State &app, f32 delta_time_ms) i64 cluster_to_highlight = app.viewer.highlighted_cluster; ImGui::PushItemWidth(100.f); if (ImGui::InputScalar("##highlighted_cluster", ImGuiDataType_S64, &cluster_to_highlight, &step_i64, &step_fast_i64, "%u")) { - app.viewer.highlight_cluster = true; viewer_jump_to_cluster(app, cluster_to_highlight); } ImGui::PopItemWidth(); diff --git a/src/rntuple.cpp b/src/rntuple.cpp index 3f32266..3fa127f 100644 --- a/src/rntuple.cpp +++ b/src/rntuple.cpp @@ -111,11 +111,15 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl pinfo->range.start = page_info.fLocator.GetPosition(); pinfo->range.len = page_info.fLocator.fBytesOnStorage + (page_info.fHasChecksum) * checksum_size; pinfo->n_elems = page_info.fHasChecksum ? -page_info.fNElements : page_info.fNElements; + // This assert is just here because we're using u32 for cluster_id to save memory. + // If in the future we get RNTuples with more than 4B clusters we can just change the type to u64. + assert(cluster_desc.GetId() <= UINT_MAX); pinfo->cluster_id = cluster_desc.GetId(); Cluster_Info_Node &cluster = clusters[pinfo->cluster_id]; if (!cluster.first_page || pinfo->range.start < cluster.first_page->range.start) { cluster.first_page = pinfo; + pinfo->is_first_in_cluster = true; } if (UNLIKELY(!pinfo_head)) { @@ -288,7 +292,9 @@ TFile_Data get_tfile_data(const Inspected_File &file, String8 ntpl_name) Root_File_Info root_file_info = get_root_file_info(file.name.c(), ntpl_name.c(), is_big_file); tfile_data.root_file_header_size = root_file_info.tfile_header_nbytes; tfile_data.rng_root_file_obj.start = root_file_info.tfile_obj_seek; - tfile_data.rng_root_file_obj.len = root_file_info.tfile_obj_nbytes; + u32 tfile_obj_nbytes; + memcpy(&tfile_obj_nbytes, file.mem + root_file_info.tfile_obj_seek, sizeof(tfile_obj_nbytes)); + tfile_data.rng_root_file_obj.len = bswap(tfile_obj_nbytes); // parse compression u32 compression_be; @@ -384,66 +390,119 @@ Section find_section(App_State &app, u64 off, i64 hilite_cluster = -1) const RNTuple_Data &rdata = app.rndata; const TFile_Data &tdata = app.tfile_data; u64 rblob_sz = rdata.rblob_header_size; // @Incomplete - b8 hilite = false; + Section sec {}; // TFile starting sections u64 root_header_padding = tdata.rng_root_file_obj.start - tdata.root_file_header_size; - if (off < tdata.rng_root_file_obj.start) - return { Sec_TFile_Header, { 0, tdata.root_file_header_size + root_header_padding }, 0, root_header_padding, hilite }; - - if (tdata.rng_root_file_obj.start <= off && off < tdata.rng_root_file_obj.end()) - return { Sec_TFile_Object, tdata.rng_root_file_obj, 0, 0, hilite }; - - // TFile ending sections - if (tdata.rng_root_file_info_header.start <= off && off < tdata.rng_root_file_info.end()) - return { Sec_TFile_Info, tdata.rng_root_file_info, tdata.rng_root_file_info_header.len, 0, hilite }; - - if (tdata.rng_root_file_free_header.start <= off && off < tdata.rng_root_file_free.end()) { - // printf("%lu\n", tdata.rng_root_file_free_header.len); - return { Sec_TFile_FreeList, tdata.rng_root_file_free, tdata.rng_root_file_free_header.len, 0, hilite }; + if (off < tdata.rng_root_file_obj.start) { + sec.id = Sec_TFile_Header; + sec.range = { 0, tdata.root_file_header_size + root_header_padding }; + sec.post_size = root_header_padding; + return sec; } - /// Handle pages + if (tdata.rng_root_file_obj.start <= off && off < tdata.rng_root_file_obj.end()) { + sec.id = Sec_TFile_Object; + sec.range = tdata.rng_root_file_obj; + return sec; + } + + // TFile ending sections + if (tdata.rng_root_file_info_header.start <= off && off < tdata.rng_root_file_info.end()) { + sec.id = Sec_TFile_Info; + sec.range = tdata.rng_root_file_info; + sec.pre_size = tdata.rng_root_file_info_header.len; + return sec; + } + + if (tdata.rng_root_file_free_header.start <= off && off < tdata.rng_root_file_free.end()) { + sec.id = Sec_TFile_FreeList; + sec.range = tdata.rng_root_file_free; + sec.pre_size = tdata.rng_root_file_free_header.len; + return sec; + } + + /// Page fast lookup (relative to app.last_pinfo) { // fast case: `off` is in the same page info as previous `off`. - if (app.last_pinfo->range.start - rblob_sz < off && off < app.last_pinfo->range.end()) { - hilite = hilite_cluster >= 0 && app.last_pinfo->cluster_id == (u64)hilite_cluster; - return { Sec_Page, app.last_pinfo->range, rblob_sz, app.last_pinfo->checksum_size(), hilite }; + u64 pre_size = app.last_pinfo->is_first_in_cluster ? rblob_sz : 0; + if (app.last_pinfo->range.start - pre_size < off && off < app.last_pinfo->range.end()) { + sec.id = Sec_Page; + sec.range = app.last_pinfo->range; + sec.pre_size = pre_size; + sec.post_size = app.last_pinfo->checksum_size(); + sec.highlighted = hilite_cluster >= 0 && app.last_pinfo->cluster_id == (u64)hilite_cluster; + return sec; } // still fast case: `off is in the next page info as the previous. if (app.last_pinfo->next) // don't check if it's checksum, since it's the first byte of the page app.last_pinfo = app.last_pinfo->next; - if (app.last_pinfo && app.last_pinfo->range.start - rblob_sz <= off && off < app.last_pinfo->range.end()) { - hilite = hilite_cluster >= 0 && app.last_pinfo->cluster_id == (u64)hilite_cluster; - return { Sec_Page, app.last_pinfo->range, rblob_sz, app.last_pinfo->checksum_size(), hilite }; + if (app.last_pinfo) { + u64 pre_size = app.last_pinfo->is_first_in_cluster ? rblob_sz : 0; + if (app.last_pinfo->range.start - pre_size <= off && off < app.last_pinfo->range.end()) { + sec.id = Sec_Page; + sec.range = app.last_pinfo->range; + sec.pre_size = pre_size; + sec.post_size = app.last_pinfo->checksum_size(); + sec.highlighted = hilite_cluster >= 0 && app.last_pinfo->cluster_id == (u64)hilite_cluster; + return sec; + } } } - if (rdata.rng_anchor_key.start <= off && off < rdata.rng_anchor.end()) - return { Sec_RNTuple_Anchor, rdata.rng_anchor, rdata.rng_anchor_key.len, 8, hilite }; + if (rdata.rng_anchor_key.start <= off && off < rdata.rng_anchor.end()) { + sec.id = Sec_RNTuple_Anchor; + sec.range = rdata.rng_anchor; + sec.pre_size = rdata.rng_anchor_key.len; + sec.post_size = 8; + return sec; + } - if (rdata.rng_header.start - rblob_sz <= off && off < rdata.rng_header.end()) - return { Sec_RNTuple_Header, rdata.rng_header, rblob_sz, 8, hilite }; + if (rdata.rng_header.start - rblob_sz <= off && off < rdata.rng_header.end()) { + sec.id = Sec_RNTuple_Header; + sec.range = rdata.rng_header; + sec.pre_size = rblob_sz; + sec.post_size = 8; + return sec; + } - if (rdata.rng_footer.start - rblob_sz <= off && off < rdata.rng_footer.end()) - return { Sec_RNTuple_Footer, rdata.rng_footer, rblob_sz, 8, hilite }; + if (rdata.rng_footer.start - rblob_sz <= off && off < rdata.rng_footer.end()) { + sec.id = Sec_RNTuple_Footer; + sec.range = rdata.rng_footer; + sec.pre_size = rblob_sz; + sec.post_size = 8; + return sec; + } - if (rdata.rng_tkeys_list.start <= off && off < rdata.rng_tkeys_list.end()) - return { Sec_TKey_List, rdata.rng_tkeys_list, rblob_sz, 0, hilite }; + if (rdata.rng_tkeys_list.start <= off && off < rdata.rng_tkeys_list.end()) { + sec.id = Sec_TKey_List; + sec.range = rdata.rng_tkeys_list; + sec.pre_size = rblob_sz; + return sec; + } // @Speed for (u64 cg_idx = 0; cg_idx < rdata.n_cluster_groups; ++cg_idx) { Cluster_Group_Info &cg_info = rdata.cluster_groups[cg_idx]; - if (cg_info.rng_page_list.start - rblob_sz <= off && off < cg_info.rng_page_list.end()) - return { Sec_Page_List, cg_info.rng_page_list, rblob_sz, 8, hilite }; + if (cg_info.rng_page_list.start - rblob_sz <= off && off < cg_info.rng_page_list.end()) { + sec.id = Sec_Page_List; + sec.range = cg_info.rng_page_list; + sec.pre_size = rblob_sz; + sec.post_size = 8; + return sec; + } } // Slow page group lookup, ideally only done once per render when last_pinfo is invalid. for (Page_Info_Chunk *chunk = rdata.page_chunks; chunk; chunk = chunk->next) { // If we're at the start of a chunk, return a fake Sec_Page used to highlight the RBlob header bytes. - if (chunk->range.start - rblob_sz <= off && off < chunk->range.start) - return { Sec_Page, { chunk->range.start, 0 }, rblob_sz, 0, hilite }; + if (chunk->range.start - rblob_sz <= off && off < chunk->range.start) { + sec.id = Sec_Page; + sec.range = { chunk->range.start, 0 }; + sec.pre_size = rblob_sz; + return sec; + } if (chunk->range.start <= off && off < chunk->range.end()) { for (u64 group_idx = chunk->first_group; group_idx < rdata.n_page_groups; ++group_idx) { @@ -452,10 +511,15 @@ Section find_section(App_State &app, u64 off, i64 hilite_cluster = -1) continue; for (Page_Info_Node *pinfo = group.first; pinfo; pinfo = pinfo->next) { - if (pinfo->range.start <= off && off < pinfo->range.end()) { + u64 pre_size = pinfo->is_first_in_cluster ? rblob_sz : 0; + if (pinfo->range.start - pre_size <= off && off < pinfo->range.end()) { app.last_pinfo = pinfo; - hilite = hilite_cluster >= 0 && pinfo->cluster_id == (u64)hilite_cluster; - return { Sec_Page, pinfo->range, rblob_sz, pinfo->checksum_size(), hilite }; + sec.id = Sec_Page; + sec.range = pinfo->range; + sec.pre_size = pre_size; + sec.post_size = pinfo->checksum_size(); + sec.highlighted = hilite_cluster >= 0 && pinfo->cluster_id == (u64)hilite_cluster; + return sec; } } } @@ -467,7 +531,7 @@ Section find_section(App_State &app, u64 off, i64 hilite_cluster = -1) } return {}; -} +} struct Sec_Hover_Info { Byte_Range rng; @@ -674,6 +738,14 @@ b8 hover_try_key(const Try_Sec_Hover_Fn &try_sec_hover, const u8 *data, u64 star } } +internal +b8 hover_try_object(const Try_Sec_Hover_Fn &try_sec_hover) +{ + return try_sec_hover.field("Version: %u") + || try_sec_hover.field("Unique ID: %u") + || try_sec_hover.field("Bits: %u"); +} + // `off` is the absolute offset into `data`. internal Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, const u8 *data) @@ -781,6 +853,8 @@ Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, co || try_sec_hover.field("Seek Dir: 0x%lX") || try_sec_hover.field("Seek Parent: 0x%lX") || try_sec_hover.field("Seek Keys: 0x%lX") + || try_sec_hover.field("UUID Vers.Class: %u") + || try_sec_hover.field("UUID: %u", hover_display_val_le) ; } else { ok = ok || try_sec_hover.field("Version: %u") @@ -791,20 +865,18 @@ Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, co || try_sec_hover.field("Seek Dir: 0x%lX") || try_sec_hover.field("Seek Parent: 0x%lX") || try_sec_hover.field("Seek Keys: 0x%lX") + || try_sec_hover.field("UUID Vers.Class: %u") + || try_sec_hover.field("UUID: %u", hover_display_val_le) + || try_sec_hover.range("Padding", 3 * sizeof(u32)) ; } - ok = ok || try_sec_hover.field("UUID Vers.Class: %u") - || try_sec_hover.field("UUID: %u", hover_display_val_le) - || try_sec_hover.range("Padding", 3 * sizeof(u32)) - ; } } } break; case Sec_RNTuple_Header: case Sec_RNTuple_Footer: - case Sec_Page_List: - case Sec_Page: { + case Sec_Page_List: { hover_try_key(try_sec_hover, data, start) || try_sec_hover.maybe_rootzip(data, start) || try_sec_hover.range("Payload", section.range.len - section.post_size) // TODO: improve @@ -812,8 +884,26 @@ Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, co ; } break; + case Sec_Page: { + b8 ok = section.pre_size && hover_try_key(try_sec_hover, data, start); + ok = ok || try_sec_hover.maybe_rootzip(data, start) + || try_sec_hover.range("Payload", section.range.len - section.post_size) // TODO: improve + || try_sec_hover.field("Checksum: 0x%lX", hover_display_val_le) + ; + } break; + case Sec_TFile_Info: { hover_try_key(try_sec_hover, data, start) + || try_sec_hover.maybe_rootzip(data, start) + // || try_sec_hover.field("Byte Count: %u", [] (Arena *arena, String8_Node *prev, const char *fmt, u32 x) { + // x = bswap(x); + // x -= 0x400000000; + // return push_str8_node_child(arena, prev, fmt, x); + // }) + // || try_sec_hover.field("Version: %u") + // || hover_try_object(try_sec_hover) + // || try_sec_hover.field("Name: %u") + // || try_sec_hover.field("N Objects: %u") || try_sec_hover.range("Payload", section.range.len) // TODO: improve ; } break; diff --git a/src/rntuple.h b/src/rntuple.h index 854d826..167898e 100644 --- a/src/rntuple.h +++ b/src/rntuple.h @@ -15,7 +15,8 @@ struct Page_Info_Node { Byte_Range range; // len includes checksum i32 n_elems; // negative = page has checksum - u64 cluster_id; + u32 cluster_id; + b8 is_first_in_cluster; u64 checksum_size() const { return (n_elems < 0) * 8; diff --git a/src/root/RMicroFileReader.cxx b/src/root/RMicroFileReader.cxx index 8cad463..966c390 100644 --- a/src/root/RMicroFileReader.cxx +++ b/src/root/RMicroFileReader.cxx @@ -1081,7 +1081,6 @@ Root_File_Info get_root_file_info(const char *fname, const char *ntplName, bool 100, 0, RTFString{"TFile"}, fileNameStr, RTFString{}, sizeof(RTFFile) + fileNameStr.GetSize() + RTFString{}.GetSize() + RTFUUID{}.GetSize() }; - fileInfo.tfile_obj_nbytes = fileHeader.GetSize(); fileInfo.version_seek = offsetof(RTFHeader, fVersion); fileInfo.compression_seek = is_big_file ? offsetof(RTFHeader, fInfoLong.fCompress) : offsetof(RTFHeader, fInfoShort.fCompress); fileInfo.info_seek_seek = is_big_file ? offsetof(RTFHeader, fInfoLong.fSeekInfo) : offsetof(RTFHeader, fInfoShort.fSeekInfo); @@ -1217,9 +1216,11 @@ RMicroFileReader::GetNTupleProper(const char *ntupleName) fileInfo.anchor_nbytes = objNbytes; { // @Incomplete: each section has a differently-sized RBlob, we need to account for that! + RTFString blobName { kBlobClassName }; RTFKey dummy; - dummy.MakeBigKey(); - fileInfo.rblob_key_header_nbytes = dummy.GetHeaderSize(); + if (fileHeader.IsBigFile()) + dummy.MakeBigKey(); + fileInfo.rblob_key_header_nbytes = dummy.GetHeaderSize() + blobName.GetSize() + 2 * RTFString{}.GetSize(); } // @---- ReadBuffer(ntuple, objNbytes, offset); diff --git a/src/root/RMicroFileReader.hxx b/src/root/RMicroFileReader.hxx index d68f48c..20e93a1 100644 --- a/src/root/RMicroFileReader.hxx +++ b/src/root/RMicroFileReader.hxx @@ -47,7 +47,6 @@ struct RNTuple_File_Info { struct Root_File_Info { std::uint64_t tfile_header_nbytes; std::uint64_t tfile_obj_seek; - std::uint64_t tfile_obj_nbytes; std::uint64_t version_seek; std::uint64_t compression_seek; std::uint64_t info_seek_seek;