fix bug in chunks building
we were discarding like 90% of the pages due to buggy insertion of pages in the linked list
This commit is contained in:
parent
65776eee8a
commit
d76dd022fd
3 changed files with 21 additions and 10 deletions
|
@ -179,15 +179,8 @@ void viewer_jump_to_cluster(App_State &app, u64 cluster_idx)
|
|||
assert(app.rndata.n_clusters > 0);
|
||||
cluster_idx = (cluster_idx + app.rndata.n_clusters) % app.rndata.n_clusters;
|
||||
|
||||
// @Speed: this is slow! Consider an acceleration structure, or maybe we can reuse
|
||||
// Page_Info_Groups + binary search? (depends on whether cluster_idx are sorted)
|
||||
Page_Info_Node *page = app.rndata.pages;
|
||||
for (u64 i = 0; i < app.rndata.n_pages; ++i) {
|
||||
if (page->cluster_id == cluster_idx)
|
||||
break;
|
||||
page = page->next;
|
||||
Page_Info_Node *page = app.rndata.clusters[cluster_idx].first_page;
|
||||
assert(page);
|
||||
}
|
||||
|
||||
app.viewer.highlighted_cluster = cluster_idx;
|
||||
viewer_jump_to(app, page->range.start);
|
||||
|
|
|
@ -96,6 +96,8 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
|
||||
chr::time_point start_t = chr::high_resolution_clock::now();
|
||||
|
||||
Cluster_Info_Node *clusters = arena_push_array<Cluster_Info_Node>(arena, descriptor.GetNActiveClusters());
|
||||
|
||||
// gather clusters and pages metadata
|
||||
for (const RClusterDescriptor &cluster_desc : descriptor.GetClusterIterable()) {
|
||||
++n_clusters;
|
||||
|
@ -111,7 +113,12 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
pinfo->n_elems = page_info.fHasChecksum ? -page_info.fNElements : page_info.fNElements;
|
||||
pinfo->cluster_id = cluster_desc.GetId();
|
||||
|
||||
if (!pinfo_head) {
|
||||
Cluster_Info_Node &cluster = clusters[pinfo->cluster_id];
|
||||
if (!cluster.first_page || pinfo->range.start < cluster.first_page->range.start) {
|
||||
cluster.first_page = pinfo;
|
||||
}
|
||||
|
||||
if (UNLIKELY(!pinfo_head)) {
|
||||
// first node inserted
|
||||
assert(!pinfo_tail);
|
||||
pinfo_head = pinfo_tail = pinfo;
|
||||
|
@ -141,6 +148,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
prev->next = pinfo;
|
||||
pinfo->prev = prev;
|
||||
}
|
||||
node->prev = pinfo;
|
||||
pinfo->next = node;
|
||||
inserted = true;
|
||||
break;
|
||||
|
@ -157,6 +165,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
prev->next = pinfo;
|
||||
pinfo->prev = prev;
|
||||
}
|
||||
node->prev = pinfo;
|
||||
pinfo->next = node;
|
||||
inserted = true;
|
||||
break;
|
||||
|
@ -231,6 +240,9 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
++n_groups;
|
||||
}
|
||||
|
||||
// verify that we added all pages to chunks
|
||||
assert(idx == n_pages);
|
||||
|
||||
Page_Info_Group &last_group = groups[n_groups - 1];
|
||||
last_group.range.len = pinfo_tail->range.end() - last_group.range.start;
|
||||
|
||||
|
@ -250,6 +262,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
rndata.cluster_groups = cluster_groups;
|
||||
rndata.n_cluster_groups = cg_idx;
|
||||
rndata.tot_page_list_size = tot_page_list_size;
|
||||
rndata.clusters = clusters;
|
||||
rndata.n_clusters = n_clusters;
|
||||
}
|
||||
|
||||
|
|
|
@ -35,6 +35,10 @@ struct Page_Info_Chunk {
|
|||
u32 first_group;
|
||||
};
|
||||
|
||||
struct Cluster_Info_Node {
|
||||
Page_Info_Node *first_page;
|
||||
};
|
||||
|
||||
struct Cluster_Group_Info {
|
||||
Byte_Range rng_page_list;
|
||||
};
|
||||
|
@ -78,6 +82,7 @@ struct RNTuple_Data {
|
|||
u64 n_cluster_groups;
|
||||
u64 tot_page_list_size;
|
||||
|
||||
Cluster_Info_Node *clusters;
|
||||
u64 n_clusters;
|
||||
|
||||
Page_Info_Group *page_groups;
|
||||
|
|
Loading…
Reference in a new issue