make rntviewer more resilient to corrupted files
This commit is contained in:
parent
9e47b823f6
commit
cae82ef66f
2 changed files with 68 additions and 55 deletions
|
@ -719,13 +719,12 @@ Sec_Hover_Info get_section_hover_info(Arena *arena, Section section, u64 off, co
|
||||||
// so they're always occupying only 8 bytes.
|
// so they're always occupying only 8 bytes.
|
||||||
|| hover.field_le<u64>("Flags: 0x%" PRIX64)
|
|| hover.field_le<u64>("Flags: 0x%" PRIX64)
|
||||||
|| hover.field_le<u64>("Header checksum: 0x%" PRIX64)
|
|| hover.field_le<u64>("Header checksum: 0x%" PRIX64)
|
||||||
|
|| hover.frame_header()
|
||||||
|| hover.schema_description("Schema Extension")
|
|| hover.schema_description("Schema Extension")
|
||||||
// TODO:
|
// - list of column group record frames (TODO)
|
||||||
// - list of column group record frames
|
|| hover.frame_header("Column Groups")
|
||||||
|| hover.frame_header()
|
// - list of cluster group record frames (TODO)
|
||||||
// - list of cluster group record frames
|
|| hover.frame_header("Cluster Groups")
|
||||||
|| hover.frame_header()
|
|
||||||
|| hover.cluster_group()
|
|
||||||
|| hover.range("Payload", section.range.len - hover.cur_field_off)
|
|| hover.range("Payload", section.range.len - hover.cur_field_off)
|
||||||
|| hover.field_le<u64>("Checksum: 0x%" PRIX64)
|
|| hover.field_le<u64>("Checksum: 0x%" PRIX64)
|
||||||
;
|
;
|
||||||
|
|
112
src/rntuple.cpp
112
src/rntuple.cpp
|
@ -34,8 +34,12 @@ ROOT::Experimental::RNTupleDescriptor create_descriptor(Arena *arena, RMicroFile
|
||||||
|
|
||||||
// Deserialize header+footer
|
// Deserialize header+footer
|
||||||
RNTupleDescriptorBuilder desc_builder;
|
RNTupleDescriptorBuilder desc_builder;
|
||||||
RNTupleSerializer::DeserializeHeader(header, anchor.fLenHeader, desc_builder);
|
try {
|
||||||
RNTupleSerializer::DeserializeFooter(footer, anchor.fLenFooter, desc_builder);
|
RNTupleSerializer::DeserializeHeader(header, anchor.fLenHeader, desc_builder);
|
||||||
|
RNTupleSerializer::DeserializeFooter(footer, anchor.fLenFooter, desc_builder);
|
||||||
|
} catch (...) {
|
||||||
|
fprintf(stderr, "Failed to deserialize header/footer!\n");
|
||||||
|
}
|
||||||
|
|
||||||
RNTupleDescriptor descriptor = desc_builder.MoveDescriptor();
|
RNTupleDescriptor descriptor = desc_builder.MoveDescriptor();
|
||||||
for (const RClusterGroupDescriptor &cgdesc : descriptor.GetClusterGroupIterable()) {
|
for (const RClusterGroupDescriptor &cgdesc : descriptor.GetClusterGroupIterable()) {
|
||||||
|
@ -262,65 +266,75 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
||||||
// that an offset belongs to.
|
// that an offset belongs to.
|
||||||
// A page chunk is a grouping of adjacent pages, used to quickly determine if an offset is part
|
// A page chunk is a grouping of adjacent pages, used to quickly determine if an offset is part
|
||||||
// of a page or not.
|
// of a page or not.
|
||||||
assert(pinfo_head);
|
Page_Info_Group *groups = nullptr;
|
||||||
const u64 GROUP_SIZE = 500;
|
Page_Info_Chunk *chunks_head = nullptr, *chunks_tail = nullptr;
|
||||||
Page_Info_Group *groups = arena_push_array_nozero<Page_Info_Group>(arena, n_pages / GROUP_SIZE + 1);
|
u64 n_groups = 0;
|
||||||
u64 n_groups = 1;
|
u64 n_chunks = 0;
|
||||||
groups->first = pinfo_head;
|
u64 idx = 0;
|
||||||
groups->range.start = pinfo_head->range.start;
|
// NOTE: pinfo_head may be null if we failed to load any page (which may happen e.g. if the rntuple
|
||||||
|
// is corrupted)
|
||||||
|
if (pinfo_head) {
|
||||||
|
const u64 GROUP_SIZE = 500;
|
||||||
|
groups = arena_push_array_nozero<Page_Info_Group>(arena, n_pages / GROUP_SIZE + 1);
|
||||||
|
n_groups = 1;
|
||||||
|
groups->first = pinfo_head;
|
||||||
|
groups->range.start = pinfo_head->range.start;
|
||||||
|
|
||||||
Page_Info_Chunk *chunks_head = arena_push<Page_Info_Chunk>(arena);
|
chunks_head = arena_push<Page_Info_Chunk>(arena);
|
||||||
Page_Info_Chunk *chunks_tail = chunks_head;
|
chunks_tail = chunks_head;
|
||||||
chunks_head->range = pinfo_head->range;
|
chunks_head->range = pinfo_head->range;
|
||||||
u64 n_chunks = 1;
|
n_chunks = 1;
|
||||||
|
|
||||||
u64 idx = 1;
|
idx = 1;
|
||||||
[[maybe_unused]] Page_Info_Node *prev = pinfo_head;
|
[[maybe_unused]] Page_Info_Node *prev = pinfo_head;
|
||||||
for (Page_Info_Node *pinfo = pinfo_head->next; pinfo; pinfo = pinfo->next) {
|
for (Page_Info_Node *pinfo = pinfo_head->next; pinfo; pinfo = pinfo->next) {
|
||||||
assert(prev->range.end() <= pinfo->range.start);
|
assert(prev->range.end() <= pinfo->range.start);
|
||||||
prev = pinfo;
|
prev = pinfo;
|
||||||
|
|
||||||
if (pinfo->range.start != chunks_tail->range.end()) {
|
if (pinfo->range.start != chunks_tail->range.end()) {
|
||||||
// close current chunk and open new one
|
// close current chunk and open new one
|
||||||
Page_Info_Chunk *chunk = arena_push<Page_Info_Chunk>(arena);
|
Page_Info_Chunk *chunk = arena_push<Page_Info_Chunk>(arena);
|
||||||
chunk->range.start = pinfo->range.start;
|
chunk->range.start = pinfo->range.start;
|
||||||
chunk->first_group = n_groups - 1;
|
chunk->first_group = n_groups - 1;
|
||||||
chunks_tail->next = chunk;
|
chunks_tail->next = chunk;
|
||||||
chunks_tail = chunk;
|
chunks_tail = chunk;
|
||||||
++n_chunks;
|
++n_chunks;
|
||||||
|
}
|
||||||
|
chunks_tail->range.len += pinfo->range.len;
|
||||||
|
|
||||||
|
// while we're at it, set the first_page_idx information on the page's parent cluster
|
||||||
|
// Note that the first page won't update its cluster's `first_page_idx` (since we loop
|
||||||
|
// from idx = 1) but that's fine because that idx is by definition 0.
|
||||||
|
if (pinfo->is_first_in_cluster)
|
||||||
|
clusters[pinfo->cluster_id].first_page_idx = idx;
|
||||||
|
|
||||||
|
if (idx++ % GROUP_SIZE != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Create a new group every GROUP_SIZE page infos
|
||||||
|
|
||||||
|
Page_Info_Group &cur_group = groups[n_groups];
|
||||||
|
cur_group.first = pinfo;
|
||||||
|
cur_group.range.start = pinfo->range.start;
|
||||||
|
Page_Info_Group &prev_group = groups[n_groups - 1];
|
||||||
|
prev_group.range.len = cur_group.range.start - prev_group.range.start;
|
||||||
|
|
||||||
|
++n_groups;
|
||||||
}
|
}
|
||||||
chunks_tail->range.len += pinfo->range.len;
|
|
||||||
|
|
||||||
// while we're at it, set the first_page_idx information on the page's parent cluster
|
|
||||||
// Note that the first page won't update its cluster's `first_page_idx` (since we loop
|
|
||||||
// from idx = 1) but that's fine because that idx is by definition 0.
|
|
||||||
if (pinfo->is_first_in_cluster)
|
|
||||||
clusters[pinfo->cluster_id].first_page_idx = idx;
|
|
||||||
|
|
||||||
if (idx++ % GROUP_SIZE != 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Create a new group every GROUP_SIZE page infos
|
|
||||||
|
|
||||||
Page_Info_Group &cur_group = groups[n_groups];
|
|
||||||
cur_group.first = pinfo;
|
|
||||||
cur_group.range.start = pinfo->range.start;
|
|
||||||
Page_Info_Group &prev_group = groups[n_groups - 1];
|
|
||||||
prev_group.range.len = cur_group.range.start - prev_group.range.start;
|
|
||||||
|
|
||||||
++n_groups;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// verify that we added all pages to chunks
|
// verify that we added all pages to chunks
|
||||||
assert(idx == n_pages);
|
assert(idx == n_pages);
|
||||||
|
|
||||||
Page_Info_Group &last_group = groups[n_groups - 1];
|
if (n_groups) {
|
||||||
last_group.range.len = pinfo_tail->range.end() - last_group.range.start;
|
Page_Info_Group &last_group = groups[n_groups - 1];
|
||||||
|
last_group.range.len = pinfo_tail->range.end() - last_group.range.start;
|
||||||
|
}
|
||||||
|
|
||||||
fprintf(stderr, "Generated %" PRIu64 " groups and %" PRIu64 " chunks.\n", n_groups, n_chunks);
|
fprintf(stderr, "Generated %" PRIu64 " groups and %" PRIu64 " chunks.\n", n_groups, n_chunks);
|
||||||
|
|
||||||
assert(!chunks_tail->next);
|
assert(!chunks_tail || !chunks_tail->next);
|
||||||
assert(!pinfo_tail->next);
|
assert(!pinfo_tail || !pinfo_tail->next);
|
||||||
|
|
||||||
rndata.pages = pinfo_head;
|
rndata.pages = pinfo_head;
|
||||||
rndata.page_groups = groups;
|
rndata.page_groups = groups;
|
||||||
|
|
Loading…
Reference in a new issue