make rntviewer more resilient to corrupted files

This commit is contained in:
silverweed 2024-09-20 11:53:53 +02:00
parent 9e47b823f6
commit cae82ef66f
2 changed files with 68 additions and 55 deletions

View file

@ -719,13 +719,12 @@ Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, co
// so they're always occupying only 8 bytes. // so they're always occupying only 8 bytes.
|| hover.field_le<u64>("Flags: 0x%" PRIX64) || hover.field_le<u64>("Flags: 0x%" PRIX64)
|| hover.field_le<u64>("Header checksum: 0x%" PRIX64) || hover.field_le<u64>("Header checksum: 0x%" PRIX64)
|| hover.frame_header()
|| hover.schema_description("Schema Extension") || hover.schema_description("Schema Extension")
// TODO: // - list of column group record frames (TODO)
// - list of column group record frames || hover.frame_header("Column Groups")
|| hover.frame_header() // - list of cluster group record frames (TODO)
// - list of cluster group record frames || hover.frame_header("Cluster Groups")
|| hover.frame_header()
|| hover.cluster_group()
|| hover.range("Payload", section.range.len - hover.cur_field_off) || hover.range("Payload", section.range.len - hover.cur_field_off)
|| hover.field_le<u64>("Checksum: 0x%" PRIX64) || hover.field_le<u64>("Checksum: 0x%" PRIX64)
; ;

View file

@ -34,8 +34,12 @@ ROOT::Experimental::RNTupleDescriptor create_descriptor(Arena *arena, RMicroFile
// Deserialize header+footer // Deserialize header+footer
RNTupleDescriptorBuilder desc_builder; RNTupleDescriptorBuilder desc_builder;
try {
RNTupleSerializer::DeserializeHeader(header, anchor.fLenHeader, desc_builder); RNTupleSerializer::DeserializeHeader(header, anchor.fLenHeader, desc_builder);
RNTupleSerializer::DeserializeFooter(footer, anchor.fLenFooter, desc_builder); RNTupleSerializer::DeserializeFooter(footer, anchor.fLenFooter, desc_builder);
} catch (...) {
fprintf(stderr, "Failed to deserialize header/footer!\n");
}
RNTupleDescriptor descriptor = desc_builder.MoveDescriptor(); RNTupleDescriptor descriptor = desc_builder.MoveDescriptor();
for (const RClusterGroupDescriptor &cgdesc : descriptor.GetClusterGroupIterable()) { for (const RClusterGroupDescriptor &cgdesc : descriptor.GetClusterGroupIterable()) {
@ -262,19 +266,26 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
// that an offset belongs to. // that an offset belongs to.
// A page chunk is a grouping of adjacent pages, used to quickly determine if an offset is part // A page chunk is a grouping of adjacent pages, used to quickly determine if an offset is part
// of a page or not. // of a page or not.
assert(pinfo_head); Page_Info_Group *groups = nullptr;
Page_Info_Chunk *chunks_head = nullptr, *chunks_tail = nullptr;
u64 n_groups = 0;
u64 n_chunks = 0;
u64 idx = 0;
// NOTE: pinfo_head may be null if we failed to load any page (which may happen e.g. if the rntuple
// is corrupted)
if (pinfo_head) {
const u64 GROUP_SIZE = 500; const u64 GROUP_SIZE = 500;
Page_Info_Group *groups = arena_push_array_nozero<Page_Info_Group>(arena, n_pages / GROUP_SIZE + 1); groups = arena_push_array_nozero<Page_Info_Group>(arena, n_pages / GROUP_SIZE + 1);
u64 n_groups = 1; n_groups = 1;
groups->first = pinfo_head; groups->first = pinfo_head;
groups->range.start = pinfo_head->range.start; groups->range.start = pinfo_head->range.start;
Page_Info_Chunk *chunks_head = arena_push<Page_Info_Chunk>(arena); chunks_head = arena_push<Page_Info_Chunk>(arena);
Page_Info_Chunk *chunks_tail = chunks_head; chunks_tail = chunks_head;
chunks_head->range = pinfo_head->range; chunks_head->range = pinfo_head->range;
u64 n_chunks = 1; n_chunks = 1;
u64 idx = 1; idx = 1;
[[maybe_unused]] Page_Info_Node *prev = pinfo_head; [[maybe_unused]] Page_Info_Node *prev = pinfo_head;
for (Page_Info_Node *pinfo = pinfo_head->next; pinfo; pinfo = pinfo->next) { for (Page_Info_Node *pinfo = pinfo_head->next; pinfo; pinfo = pinfo->next) {
assert(prev->range.end() <= pinfo->range.start); assert(prev->range.end() <= pinfo->range.start);
@ -310,17 +321,20 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
++n_groups; ++n_groups;
} }
}
// verify that we added all pages to chunks // verify that we added all pages to chunks
assert(idx == n_pages); assert(idx == n_pages);
if (n_groups) {
Page_Info_Group &last_group = groups[n_groups - 1]; Page_Info_Group &last_group = groups[n_groups - 1];
last_group.range.len = pinfo_tail->range.end() - last_group.range.start; last_group.range.len = pinfo_tail->range.end() - last_group.range.start;
}
fprintf(stderr, "Generated %" PRIu64 " groups and %" PRIu64 " chunks.\n", n_groups, n_chunks); fprintf(stderr, "Generated %" PRIu64 " groups and %" PRIu64 " chunks.\n", n_groups, n_chunks);
assert(!chunks_tail->next); assert(!chunks_tail || !chunks_tail->next);
assert(!pinfo_tail->next); assert(!pinfo_tail || !pinfo_tail->next);
rndata.pages = pinfo_head; rndata.pages = pinfo_head;
rndata.page_groups = groups; rndata.page_groups = groups;