fix bug in chunks building

we were discarding like 90% of the pages due to buggy insertion of pages
in the linked list
This commit is contained in:
silverweed 2024-08-01 14:02:00 +02:00
parent 65776eee8a
commit d76dd022fd
3 changed files with 21 additions and 10 deletions

View file

@ -179,15 +179,8 @@ void viewer_jump_to_cluster(App_State &app, u64 cluster_idx)
assert(app.rndata.n_clusters > 0); assert(app.rndata.n_clusters > 0);
cluster_idx = (cluster_idx + app.rndata.n_clusters) % app.rndata.n_clusters; cluster_idx = (cluster_idx + app.rndata.n_clusters) % app.rndata.n_clusters;
// @Speed: this is slow! Consider an acceleration structure, or maybe we can reuse Page_Info_Node *page = app.rndata.clusters[cluster_idx].first_page;
// Page_Info_Groups + binary search? (depends on whether cluster_idx are sorted) assert(page);
Page_Info_Node *page = app.rndata.pages;
for (u64 i = 0; i < app.rndata.n_pages; ++i) {
if (page->cluster_id == cluster_idx)
break;
page = page->next;
assert(page);
}
app.viewer.highlighted_cluster = cluster_idx; app.viewer.highlighted_cluster = cluster_idx;
viewer_jump_to(app, page->range.start); viewer_jump_to(app, page->range.start);

View file

@ -96,6 +96,8 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
chr::time_point start_t = chr::high_resolution_clock::now(); chr::time_point start_t = chr::high_resolution_clock::now();
Cluster_Info_Node *clusters = arena_push_array<Cluster_Info_Node>(arena, descriptor.GetNActiveClusters());
// gather clusters and pages metadata // gather clusters and pages metadata
for (const RClusterDescriptor &cluster_desc : descriptor.GetClusterIterable()) { for (const RClusterDescriptor &cluster_desc : descriptor.GetClusterIterable()) {
++n_clusters; ++n_clusters;
@ -111,7 +113,12 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
pinfo->n_elems = page_info.fHasChecksum ? -page_info.fNElements : page_info.fNElements; pinfo->n_elems = page_info.fHasChecksum ? -page_info.fNElements : page_info.fNElements;
pinfo->cluster_id = cluster_desc.GetId(); pinfo->cluster_id = cluster_desc.GetId();
if (!pinfo_head) { Cluster_Info_Node &cluster = clusters[pinfo->cluster_id];
if (!cluster.first_page || pinfo->range.start < cluster.first_page->range.start) {
cluster.first_page = pinfo;
}
if (UNLIKELY(!pinfo_head)) {
// first node inserted // first node inserted
assert(!pinfo_tail); assert(!pinfo_tail);
pinfo_head = pinfo_tail = pinfo; pinfo_head = pinfo_tail = pinfo;
@ -141,6 +148,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
prev->next = pinfo; prev->next = pinfo;
pinfo->prev = prev; pinfo->prev = prev;
} }
node->prev = pinfo;
pinfo->next = node; pinfo->next = node;
inserted = true; inserted = true;
break; break;
@ -157,6 +165,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
prev->next = pinfo; prev->next = pinfo;
pinfo->prev = prev; pinfo->prev = prev;
} }
node->prev = pinfo;
pinfo->next = node; pinfo->next = node;
inserted = true; inserted = true;
break; break;
@ -231,6 +240,9 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
++n_groups; ++n_groups;
} }
// verify that we added all pages to chunks
assert(idx == n_pages);
Page_Info_Group &last_group = groups[n_groups - 1]; Page_Info_Group &last_group = groups[n_groups - 1];
last_group.range.len = pinfo_tail->range.end() - last_group.range.start; last_group.range.len = pinfo_tail->range.end() - last_group.range.start;
@ -250,6 +262,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
rndata.cluster_groups = cluster_groups; rndata.cluster_groups = cluster_groups;
rndata.n_cluster_groups = cg_idx; rndata.n_cluster_groups = cg_idx;
rndata.tot_page_list_size = tot_page_list_size; rndata.tot_page_list_size = tot_page_list_size;
rndata.clusters = clusters;
rndata.n_clusters = n_clusters; rndata.n_clusters = n_clusters;
} }

View file

@ -35,6 +35,10 @@ struct Page_Info_Chunk {
u32 first_group; u32 first_group;
}; };
struct Cluster_Info_Node {
Page_Info_Node *first_page;
};
struct Cluster_Group_Info { struct Cluster_Group_Info {
Byte_Range rng_page_list; Byte_Range rng_page_list;
}; };
@ -78,6 +82,7 @@ struct RNTuple_Data {
u64 n_cluster_groups; u64 n_cluster_groups;
u64 tot_page_list_size; u64 tot_page_list_size;
Cluster_Info_Node *clusters;
u64 n_clusters; u64 n_clusters;
Page_Info_Group *page_groups; Page_Info_Group *page_groups;