fix bug in chunks building

we were discarding like 90% of the pages due to buggy insertion of pages
in the linked list
This commit is contained in:
silverweed 2024-08-01 14:02:00 +02:00
parent 65776eee8a
commit d76dd022fd
3 changed files with 21 additions and 10 deletions

View file

@ -179,15 +179,8 @@ void viewer_jump_to_cluster(App_State &app, u64 cluster_idx)
assert(app.rndata.n_clusters > 0);
cluster_idx = (cluster_idx + app.rndata.n_clusters) % app.rndata.n_clusters;
// @Speed: this is slow! Consider an acceleration structure, or maybe we can reuse
// Page_Info_Groups + binary search? (depends on whether cluster_idx are sorted)
Page_Info_Node *page = app.rndata.pages;
for (u64 i = 0; i < app.rndata.n_pages; ++i) {
if (page->cluster_id == cluster_idx)
break;
page = page->next;
Page_Info_Node *page = app.rndata.clusters[cluster_idx].first_page;
assert(page);
}
app.viewer.highlighted_cluster = cluster_idx;
viewer_jump_to(app, page->range.start);

View file

@ -96,6 +96,8 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
chr::time_point start_t = chr::high_resolution_clock::now();
Cluster_Info_Node *clusters = arena_push_array<Cluster_Info_Node>(arena, descriptor.GetNActiveClusters());
// gather clusters and pages metadata
for (const RClusterDescriptor &cluster_desc : descriptor.GetClusterIterable()) {
++n_clusters;
@ -111,7 +113,12 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
pinfo->n_elems = page_info.fHasChecksum ? -page_info.fNElements : page_info.fNElements;
pinfo->cluster_id = cluster_desc.GetId();
if (!pinfo_head) {
Cluster_Info_Node &cluster = clusters[pinfo->cluster_id];
if (!cluster.first_page || pinfo->range.start < cluster.first_page->range.start) {
cluster.first_page = pinfo;
}
if (UNLIKELY(!pinfo_head)) {
// first node inserted
assert(!pinfo_tail);
pinfo_head = pinfo_tail = pinfo;
@ -141,6 +148,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
prev->next = pinfo;
pinfo->prev = prev;
}
node->prev = pinfo;
pinfo->next = node;
inserted = true;
break;
@ -157,6 +165,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
prev->next = pinfo;
pinfo->prev = prev;
}
node->prev = pinfo;
pinfo->next = node;
inserted = true;
break;
@ -231,6 +240,9 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
++n_groups;
}
// verify that we added all pages to chunks
assert(idx == n_pages);
Page_Info_Group &last_group = groups[n_groups - 1];
last_group.range.len = pinfo_tail->range.end() - last_group.range.start;
@ -250,6 +262,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
rndata.cluster_groups = cluster_groups;
rndata.n_cluster_groups = cg_idx;
rndata.tot_page_list_size = tot_page_list_size;
rndata.clusters = clusters;
rndata.n_clusters = n_clusters;
}

View file

@ -35,6 +35,10 @@ struct Page_Info_Chunk {
u32 first_group;
};
struct Cluster_Info_Node {
Page_Info_Node *first_page;
};
struct Cluster_Group_Info {
Byte_Range rng_page_list;
};
@ -78,6 +82,7 @@ struct RNTuple_Data {
u64 n_cluster_groups;
u64 tot_page_list_size;
Cluster_Info_Node *clusters;
u64 n_clusters;
Page_Info_Group *page_groups;